date:20100913

[COMMIT master] qemu-kvm-x86.c: remove extraneous line continuation

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index f2c81f0..c5d44e0 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -659,7 +659,7 @@ static int get_msr_entry(struct kvm_msr_entry *entry, 
CPUState *env)
 #endif
 default:
 #ifdef KVM_CAP_MCE
-if (entry-index = MSR_MC0_CTL   \
+if (entry-index = MSR_MC0_CTL 
 entry-index  MSR_MC0_CTL + (env-mcg_cap  0xff) * 4) {
 env-mce_banks[entry-index - MSR_MC0_CTL] = entry-data;
 break;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] qemu-kvm-x86.c: reindent

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Reindent qemu-kvm-x86.c according to CODING_STYLE.  The original used a mix
of qemu and linux indentation styles.

Signed-off-by: Avi Kivity a...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 4c32771..f2c81f0 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -24,7 +24,7 @@
 #include kvm.h
 #include hw/apic.h
 
-#define MSR_IA32_TSC   0x10
+#define MSR_IA32_TSC0x10
 
 static struct kvm_msr_list *kvm_msr_list;
 extern unsigned int kvm_shadow_memory;
@@ -35,205 +35,203 @@ static int lm_capable_kernel;
 
 int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr)
 {
-   int r;
-/*
- * Tell fw_cfg to notify the BIOS to reserve the range.
- */
-if (e820_add_entry(addr, 0x4000, E820_RESERVED)  0) {
-perror(e820_add_entry() table is full);
-exit(1);
-}
+int r;
+/*
+ * Tell fw_cfg to notify the BIOS to reserve the range.
+ */
+if (e820_add_entry(addr, 0x4000, E820_RESERVED)  0) {
+perror(e820_add_entry() table is full);
+exit(1);
+}
 
-   r = kvm_vm_ioctl(kvm_state, KVM_SET_TSS_ADDR, addr);
-   if (r  0) {
-   fprintf(stderr, kvm_set_tss_addr: %m\n);
-   return r;
-   }
-   return 0;
+r = kvm_vm_ioctl(kvm_state, KVM_SET_TSS_ADDR, addr);
+if (r  0) {
+fprintf(stderr, kvm_set_tss_addr: %m\n);
+return r;
+}
+return 0;
 }
 
 static int kvm_init_tss(kvm_context_t kvm)
 {
-   int r;
-
-   r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
-   if (r  0) {
-   /*
-* this address is 3 pages before the bios, and the bios should 
present
-* as unavaible memory
-*/
-   r = kvm_set_tss_addr(kvm, 0xfeffd000);
-   if (r  0) {
-   fprintf(stderr, kvm_init_tss: unable to set tss 
addr\n);
-   return r;
-   }
-   } else {
-   fprintf(stderr, kvm does not support KVM_CAP_SET_TSS_ADDR\n);
-   }
-   return 0;
+int r;
+
+r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
+if (r  0) {
+/*
+ * this address is 3 pages before the bios, and the bios should present
+ * as unavaible memory
+ */
+r = kvm_set_tss_addr(kvm, 0xfeffd000);
+if (r  0) {
+fprintf(stderr, kvm_init_tss: unable to set tss addr\n);
+return r;
+}
+} else {
+fprintf(stderr, kvm does not support KVM_CAP_SET_TSS_ADDR\n);
+}
+return 0;
 }
 
 static int kvm_set_identity_map_addr(kvm_context_t kvm, uint64_t addr)
 {
 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
-   int r;
-
-   r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, 
KVM_CAP_SET_IDENTITY_MAP_ADDR);
-   if (r  0) {
-   r = kvm_vm_ioctl(kvm_state, KVM_SET_IDENTITY_MAP_ADDR, addr);
-   if (r == -1) {
-   fprintf(stderr, kvm_set_identity_map_addr: %m\n);
-   return -errno;
-   }
-   return 0;
-   }
+int r;
+
+r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, 
KVM_CAP_SET_IDENTITY_MAP_ADDR);
+if (r  0) {
+r = kvm_vm_ioctl(kvm_state, KVM_SET_IDENTITY_MAP_ADDR, addr);
+if (r == -1) {
+fprintf(stderr, kvm_set_identity_map_addr: %m\n);
+return -errno;
+}
+return 0;
+}
 #endif
-   return -ENOSYS;
+return -ENOSYS;
 }
 
 static int kvm_init_identity_map_page(kvm_context_t kvm)
 {
 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
-   int r;
-
-   r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, 
KVM_CAP_SET_IDENTITY_MAP_ADDR);
-   if (r  0) {
-   /*
-* this address is 4 pages before the bios, and the bios should 
present
-* as unavaible memory
-*/
-   r = kvm_set_identity_map_addr(kvm, 0xfeffc000);
-   if (r  0) {
-   fprintf(stderr, kvm_init_identity_map_page: 
-   unable to set identity mapping addr\n);
-   return r;
-   }
-
-   }
+int r;
+
+r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, 
KVM_CAP_SET_IDENTITY_MAP_ADDR);
+if (r  0) {
+/*
+ * this address is 4 pages before the bios, and the bios should present
+ * as unavaible memory
+ */
+r = kvm_set_identity_map_addr(kvm, 0xfeffc000);
+if (r  0) {
+fprintf(stderr, kvm_init_identity_map_page: 
+unable to set identity mapping addr\n);
+return r;
+}
+}
 #endif
-   return 0;
+return 0;
 }
 
 static int kvm_create_pit(kvm_context_t kvm)
 {
 #ifdef KVM_CAP_PIT
-   int r;
-
-   kvm_state-pit_in_kernel = 0;
-   if

[COMMIT master] qemu-kvm-x86.c: add braces where appropriate

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Adjust to comply with CODING_STYLE, at least where braces are concerned.

Signed-off-by: Avi Kivity a...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index c5d44e0..46257d6 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -123,9 +123,9 @@ static int kvm_create_pit(kvm_context_t kvm)
 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_PIT);
 if (r  0) {
 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT);
-if (r = 0)
+if (r = 0) {
 kvm_state-pit_in_kernel = 1;
-else {
+} else {
 fprintf(stderr, Create kernel PIC irqchip failed\n);
 return r;
 }
@@ -141,20 +141,24 @@ int kvm_arch_create(kvm_context_t kvm, unsigned long 
phys_mem_bytes,
 int r = 0;
 
 r = kvm_init_tss(kvm);
-if (r  0)
+if (r  0) {
 return r;
+}
 
 r = kvm_init_identity_map_page(kvm);
-if (r  0)
+if (r  0) {
 return r;
+}
 
 r = kvm_create_pit(kvm);
-if (r  0)
+if (r  0) {
 return r;
+}
 
 r = kvm_init_coalesced_mmio(kvm);
-if (r  0)
+if (r  0) {
 return r;
+}
 
 return 0;
 }
@@ -211,12 +215,14 @@ int kvm_get_lapic(CPUState *env, struct kvm_lapic_state 
*s)
 {
 int r = 0;
 
-if (!kvm_irqchip_in_kernel())
+if (!kvm_irqchip_in_kernel()) {
 return r;
+}
 
 r = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, s);
-if (r  0)
+if (r  0) {
 fprintf(stderr, KVM_GET_LAPIC failed\n);
+}
 return r;
 }
 
@@ -224,13 +230,15 @@ int kvm_set_lapic(CPUState *env, struct kvm_lapic_state 
*s)
 {
 int r = 0;
 
-if (!kvm_irqchip_in_kernel())
+if (!kvm_irqchip_in_kernel()) {
 return 0;
+}
 
 r = kvm_vcpu_ioctl(env, KVM_SET_LAPIC, s);
 
-if (r  0)
+if (r  0) {
 fprintf(stderr, KVM_SET_LAPIC failed\n);
+}
 return r;
 }
 
@@ -240,30 +248,34 @@ int kvm_set_lapic(CPUState *env, struct kvm_lapic_state 
*s)
 
 int kvm_get_pit(kvm_context_t kvm, struct kvm_pit_state *s)
 {
-if (!kvm_pit_in_kernel())
+if (!kvm_pit_in_kernel()) {
 return 0;
+}
 return kvm_vm_ioctl(kvm_state, KVM_GET_PIT, s);
 }
 
 int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s)
 {
-if (!kvm_pit_in_kernel())
+if (!kvm_pit_in_kernel()) {
 return 0;
+}
 return kvm_vm_ioctl(kvm_state, KVM_SET_PIT, s);
 }
 
 #ifdef KVM_CAP_PIT_STATE2
 int kvm_get_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2)
 {
-if (!kvm_pit_in_kernel())
+if (!kvm_pit_in_kernel()) {
 return 0;
+}
 return kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, ps2);
 }
 
 int kvm_set_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2)
 {
-if (!kvm_pit_in_kernel())
+if (!kvm_pit_in_kernel()) {
 return 0;
+}
 return kvm_vm_ioctl(kvm_state, KVM_SET_PIT2, ps2);
 }
 
@@ -303,12 +315,14 @@ void kvm_show_code(CPUState *env)
 }
 rip = sregs.cs.base + regs.rip;
 back_offset = regs.rip;
-if (back_offset  20)
+if (back_offset  20) {
 back_offset = 20;
+}
 *code_str = 0;
 for (n = -back_offset; n  SHOW_CODE_LEN-back_offset; ++n) {
-if (n == 0)
+if (n == 0) {
 strcat(code_str,  --);
+}
 cpu_physical_memory_rw(rip + n, code, 1, 1);
 sprintf(code_str + strlen(code_str),  %02x, code);
 }
@@ -326,8 +340,9 @@ static struct kvm_msr_list *kvm_get_msr_list(void)
 
 sizer.nmsrs = 0;
 r = kvm_ioctl(kvm_state, KVM_GET_MSR_INDEX_LIST, sizer);
-if (r  0  r != -E2BIG)
+if (r  0  r != -E2BIG) {
 return NULL;
+}
 /* Old kernel modules had a bug and could write beyond the provided
memory. Allocate at least a safe amount of 1K. */
 msrs = qemu_malloc(MAX(1024, sizeof(*msrs) +
@@ -536,8 +551,9 @@ static int kvm_enable_tpr_access_reporting(CPUState *env)
 struct kvm_tpr_access_ctl tac = { .enabled = 1 };
 
 r = kvm_ioctl(env-kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_VAPIC);
-if (r = 0)
+if (r = 0) {
 return -ENOSYS;
+}
 return kvm_vcpu_ioctl(env, KVM_TPR_ACCESS_REPORTING, tac);
 }
 #endif
@@ -581,22 +597,27 @@ int kvm_arch_qemu_create_context(void)
 uname(utsname);
 lm_capable_kernel = strcmp(utsname.machine, x86_64) == 0;
 
-if (kvm_shadow_memory)
+if (kvm_shadow_memory) {
 kvm_set_shadow_pages(kvm_context, kvm_shadow_memory);
+}
 
 kvm_msr_list = kvm_get_msr_list();
-if (!kvm_msr_list)
+if (!kvm_msr_list) {
 return -1;
+}
 for (i = 0; i  kvm_msr_list-nmsrs; ++i) {
-if (kvm_msr_list-indices[i] == MSR_STAR)
+if (kvm_msr_list-indices[i] == MSR_STAR) {
 kvm_has_msr_star = 1;
-if (kvm_msr_list-indices[i] == MSR_VM_HSAVE_PA)
+}
+if (kvm_msr_list-indices[i] == MSR_VM_HSAVE_PA) {

[COMMIT master] qemu-kvm.c: add braces where appropriate

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Adjust to comply with CODING_STYLE, at least where braces are concerned.

Signed-off-by: Avi Kivity a...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index c9818de..36f3a2e 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -201,8 +201,9 @@ int kvm_init(int smp_cpus)
 kvm_context-max_gsi = gsi_bits;
 
 /* Mark any over-allocated bits as already in use */
-for (i = gsi_count; i  gsi_bits; i++)
+for (i = gsi_count; i  gsi_bits; i++) {
 set_gsi(kvm_context, i);
+}
 }
 
 kvm_cpu_register_phys_memory_client();
@@ -296,8 +297,9 @@ static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t 
id)
 {
 #ifdef KVM_CAP_SET_BOOT_CPU_ID
 int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
-if (r  0)
+if (r  0) {
 return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
+}
 return -ENOSYS;
 #else
 return -ENOSYS;
@@ -352,8 +354,9 @@ void kvm_create_irqchip(kvm_context_t kvm)
 #if defined(KVM_CAP_IRQ_INJECT_STATUS)  defined(KVM_IRQ_LINE_STATUS)
 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
   KVM_CAP_IRQ_INJECT_STATUS);
-if (r  0)
+if (r  0) {
 kvm-irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
+}
 #endif
 kvm-irqchip_in_kernel = 1;
 } else
@@ -369,17 +372,22 @@ int kvm_create(kvm_context_t kvm, unsigned long 
phys_mem_bytes, void **vm_mem)
 int r, i;
 
 r = kvm_create_vm(kvm);
-if (r  0)
+if (r  0) {
 return r;
+}
 r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
-if (r  0)
+if (r  0) {
 return r;
-for (i = 0; i  ARRAY_SIZE(kvm_state-slots); i++)
+}
+for (i = 0; i  ARRAY_SIZE(kvm_state-slots); i++) {
 kvm_state-slots[i].slot = i;
+}
 
 r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
-if (r  0)
+if (r  0) {
 return r;
+}
+
 kvm_create_irqchip(kvm);
 
 return 0;
@@ -392,13 +400,15 @@ int kvm_set_irq_level(kvm_context_t kvm, int irq, int 
level, int *status)
 struct kvm_irq_level event;
 int r;
 
-if (!kvm-irqchip_in_kernel)
+if (!kvm-irqchip_in_kernel) {
 return 0;
+}
 event.level = level;
 event.irq = irq;
 r = kvm_vm_ioctl(kvm_state, kvm-irqchip_inject_ioctl, event);
-if (r  0)
+if (r  0) {
 perror(kvm_set_irq_level);
+}
 
 if (status) {
 #ifdef KVM_CAP_IRQ_INJECT_STATUS
@@ -416,8 +426,9 @@ int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip 
*chip)
 {
 int r;
 
-if (!kvm-irqchip_in_kernel)
+if (!kvm-irqchip_in_kernel) {
 return 0;
+}
 r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
 if (r  0) {
 perror(kvm_get_irqchip\n);
@@ -429,8 +440,9 @@ int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip 
*chip)
 {
 int r;
 
-if (!kvm-irqchip_in_kernel)
+if (!kvm-irqchip_in_kernel) {
 return 0;
+}
 r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
 if (r  0) {
 perror(kvm_set_irqchip\n);
@@ -487,8 +499,9 @@ int kvm_get_mpstate(CPUState *env, struct kvm_mp_state 
*mp_state)
 int r;
 
 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
-if (r  0)
+if (r  0) {
 return kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, mp_state);
+}
 return -ENOSYS;
 }
 
@@ -497,8 +510,9 @@ int kvm_set_mpstate(CPUState *env, struct kvm_mp_state 
*mp_state)
 int r;
 
 r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
-if (r  0)
+if (r  0) {
 return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, mp_state);
+}
 return -ENOSYS;
 }
 #endif
@@ -534,8 +548,9 @@ static int handle_mmio(CPUState *env)
 void *data = kvm_run-mmio.data;
 
 /* hack: Red Hat 7.1 generates these weird accesses. */
-if ((addr  0xa - 4  addr = 0xa)  kvm_run-mmio.len == 3)
+if ((addr  0xa - 4  addr = 0xa)  kvm_run-mmio.len == 3) {
 return 0;
+}
 
 cpu_physical_memory_rw(addr, data, kvm_run-mmio.len, 
kvm_run-mmio.is_write);
 return 0;
@@ -596,13 +611,15 @@ int kvm_run(CPUState *env)
 }
 push_nmi(kvm);
 #if !defined(__s390__)
-if (!kvm-irqchip_in_kernel)
+if (!kvm-irqchip_in_kernel) {
 run-request_interrupt_window = kvm_arch_try_push_interrupts(env);
+}
 #endif
 
 r = pre_kvm_run(kvm, env);
-if (r)
+if (r) {
 return r;
+}
 if (env-exit_request) {
 env-exit_request = 0;
 pthread_kill(env-kvm_cpu_state.thread, SIG_IPI);
@@ -684,9 +701,10 @@ int kvm_run(CPUState *env)
 break;
 }
 }
-  more:
-if (!r)
+more:
+if (!r) {
 goto again;
+}
 return r;
 }
 
@@ -822,13 +840,15 @@ int kvm_add_routing_entry(kvm_context_t kvm,
 
 if (kvm-irq_routes-nr ==

[COMMIT master] kvm: reset MSR_IA32_CR_PAT correctly

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

The power-on value of MSR_IA32_CR_PAT is not 0 - that disables cacheing and
makes everything dog slow.

Fix to reset MSR_IA32_CR_PAT to the correct value.

Signed-off-by: Avi Kivity a...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 46257d6..016dcf1 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -1323,12 +1323,21 @@ static int kvm_reset_msrs(CPUState *env)
 } msr_data;
 int n;
 struct kvm_msr_entry *msrs = msr_data.entries;
+uint32_t index;
+uint64_t data;
 
 if (!kvm_msr_list) {
 return -1;
 }
 
 for (n = 0; n  kvm_msr_list-nmsrs; n++) {
+index = kvm_msr_list-indices[n];
+switch (index) {
+case MSR_PAT:
+data = 0x0007040600070406ULL;
+default:
+data = 0;
+}
 kvm_msr_entry_set(msrs[n], kvm_msr_list-indices[n], 0);
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] qemu-kvm: drop posix-aio-compat.cs signalfd usage

2010-09-13 Thread Avi Kivity

From: Marcelo Tosatti mtosa...@redhat.com

Block SIGUSR2, which makes the signal be handled through qemu-kvm.c's
signalfd.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index c05c77b..a67ffe3 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -26,7 +26,6 @@
 #include osdep.h
 #include qemu-common.h
 #include block_int.h
-#include compatfd.h
 
 #include block/raw-posix-aio.h
 
@@ -54,7 +53,7 @@ struct qemu_paiocb {
 };
 
 typedef struct PosixAioState {
-int fd;
+int rfd, wfd;
 struct qemu_paiocb *first_aio;
 } PosixAioState;
 
@@ -473,29 +472,18 @@ static int posix_aio_process_queue(void *opaque)
 static void posix_aio_read(void *opaque)
 {
 PosixAioState *s = opaque;
-union {
-struct qemu_signalfd_siginfo siginfo;
-char buf[128];
-} sig;
-size_t offset;
+ssize_t len;
 
-/* try to read from signalfd, don't freak out if we can't read anything */
-offset = 0;
-while (offset  128) {
-ssize_t len;
+/* read all bytes from signal pipe */
+for (;;) {
+char bytes[16];
 
-len = read(s-fd, sig.buf + offset, 128 - offset);
+len = read(s-rfd, bytes, sizeof(bytes));
 if (len == -1  errno == EINTR)
-continue;
-if (len == -1  errno == EAGAIN) {
-/* there is no natural reason for this to happen,
- * so we'll spin hard until we get everything just
- * to be on the safe side. */
-if (offset  0)
-continue;
-}
-
-offset += len;
+continue; /* try again */
+if (len == sizeof(bytes))
+continue; /* more to read */
+break;
 }
 
 posix_aio_process_queue(s);
@@ -509,6 +497,20 @@ static int posix_aio_flush(void *opaque)
 
 static PosixAioState *posix_aio_state;
 
+static void aio_signal_handler(int signum)
+{
+if (posix_aio_state) {
+char byte = 0;
+ssize_t ret;
+
+ret = write(posix_aio_state-wfd, byte, sizeof(byte));
+if (ret  0  errno != EAGAIN)
+die(write());
+}
+
+qemu_service_io();
+}
+
 static void paio_remove(struct qemu_paiocb *acb)
 {
 struct qemu_paiocb **pacb;
@@ -610,8 +612,9 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
 
 int paio_init(void)
 {
-sigset_t mask;
+struct sigaction act;
 PosixAioState *s;
+int fds[2];
 int ret;
 
 if (posix_aio_state)
@@ -619,21 +622,24 @@ int paio_init(void)
 
 s = qemu_malloc(sizeof(PosixAioState));
 
-/* Make sure to block AIO signal */
-sigemptyset(mask);
-sigaddset(mask, SIGUSR2);
-sigprocmask(SIG_BLOCK, mask, NULL);
+sigfillset(act.sa_mask);
+act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
+act.sa_handler = aio_signal_handler;
+sigaction(SIGUSR2, act, NULL);
 
 s-first_aio = NULL;
-s-fd = qemu_signalfd(mask);
-if (s-fd == -1) {
-fprintf(stderr, failed to create signalfd\n);
+if (qemu_pipe(fds) == -1) {
+fprintf(stderr, failed to create pipe\n);
 return -1;
 }
 
-fcntl(s-fd, F_SETFL, O_NONBLOCK);
+s-rfd = fds[0];
+s-wfd = fds[1];
+
+fcntl(s-rfd, F_SETFL, O_NONBLOCK);
+fcntl(s-wfd, F_SETFL, O_NONBLOCK);
 
-qemu_aio_set_fd_handler(s-fd, posix_aio_read, NULL, posix_aio_flush,
+qemu_aio_set_fd_handler(s-rfd, posix_aio_read, NULL, posix_aio_flush,
 posix_aio_process_queue, s);
 
 ret = pthread_attr_init(attr);
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 060c47d..2fb927c 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1680,6 +1680,7 @@ int kvm_main_loop(void)
 sigemptyset(mask);
 sigaddset(mask, SIGIO);
 sigaddset(mask, SIGALRM);
+sigaddset(mask, SIGUSR2);
 sigaddset(mask, SIGBUS);
 sigprocmask(SIG_BLOCK, mask, NULL);
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] Fix kvm: reset MSR_IA32_CR_PAT correctly thinkos

2010-09-13 Thread Avi Kivity

From: Marcelo Tosatti mtosa...@redhat.com

Missing break  wrong parameter.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 016dcf1..fd974b3 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -1335,10 +1335,11 @@ static int kvm_reset_msrs(CPUState *env)
 switch (index) {
 case MSR_PAT:
 data = 0x0007040600070406ULL;
+break;
 default:
 data = 0;
 }
-kvm_msr_entry_set(msrs[n], kvm_msr_list-indices[n], 0);
+kvm_msr_entry_set(msrs[n], kvm_msr_list-indices[n], data);
 }
 
 msr_data.info.nmsrs = n;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] Add missing tcg_prologue_init() for --disable-cpu-emulation build

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Add missing tcg_prologue_init() and tcg_ctx, remove code_gen_max_block_size.

Fixes ./configure --disable-cpu-emulation.

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/target-i386/fake-exec.c b/target-i386/fake-exec.c
index dfa202d..e6f8363 100644
--- a/target-i386/fake-exec.c
+++ b/target-i386/fake-exec.c
@@ -12,22 +12,20 @@
  */
 #include exec.h
 #include cpu.h
+#include tcg.h
 
 int code_copy_enabled = 0;
 
 CCTable cc_table[CC_OP_NB];
 
+TCGContext tcg_ctx;
+
 void cpu_dump_statistics (CPUState *env, FILE*f,
   int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
   int flags)
 {
 }
 
-unsigned long code_gen_max_block_size(void)
-{
-return 32;
-}
-
 void cpu_gen_init(void)
 {
 }
@@ -48,3 +46,7 @@ int cpu_x86_gen_code(CPUState *env, TranslationBlock *tb, int 
*gen_code_size_ptr
 void optimize_flags_init(void)
 {
 }
+
+void tcg_prologue_init(TCGContext *ctx)
+{
+}
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] qemu-kvm: use usptream eventfd code

2010-09-13 Thread Avi Kivity

From: Marcelo Tosatti mtosa...@redhat.com

Upstream code is equivalent.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/cpus.c b/cpus.c
index 8319d4e..c545a62 100644
--- a/cpus.c
+++ b/cpus.c
@@ -290,11 +290,6 @@ void qemu_notify_event(void)
 {
 CPUState *env = cpu_single_env;
 
-if (kvm_enabled()) {
-qemu_kvm_notify_work();
-return;
-}
-
 qemu_event_increment ();
 if (env) {
 cpu_exit(env);
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 36f3a2e..060c47d 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -71,7 +71,6 @@ static int qemu_system_ready;
 #define SIG_IPI (SIGRTMIN+4)
 
 pthread_t io_thread;
-static int io_thread_fd = -1;
 static int io_thread_sigfd = -1;
 
 static CPUState *kvm_debug_cpu_requested;
@@ -1634,28 +1633,6 @@ int kvm_init_ap(void)
 return 0;
 }
 
-void qemu_kvm_notify_work(void)
-{
-/* Write 8 bytes to be compatible with eventfd.  */
-static uint64_t val = 1;
-ssize_t ret;
-
-if (io_thread_fd == -1) {
-return;
-}
-
-do {
-ret = write(io_thread_fd, val, sizeof(val));
-} while (ret  0  errno == EINTR);
-
-/* EAGAIN is fine in case we have a pipe.  */
-if (ret  0  errno != EAGAIN) {
- fprintf(stderr, qemu_kvm_notify_work: write() filed: %s\n,
- strerror(errno));
- exit (1);
-}
-}
-
 /* If we have signalfd, we mask out the signals we want to handle and then
  * use signalfd to listen for them.  We rely on whatever the current signal
  * handler is to dispatch the signals when we receive them.
@@ -1692,41 +1669,14 @@ static void sigfd_handler(void *opaque)
 }
 }
 
-/* Used to break IO thread out of select */
-static void io_thread_wakeup(void *opaque)
-{
-int fd = (unsigned long) opaque;
-ssize_t len;
-char buffer[512];
-
-/* Drain the notify pipe.  For eventfd, only 8 bytes will be read.  */
-do {
-len = read(fd, buffer, sizeof(buffer));
-} while ((len == -1  errno == EINTR) || len == sizeof(buffer));
-}
-
 int kvm_main_loop(void)
 {
-int fds[2];
 sigset_t mask;
 int sigfd;
 
 io_thread = pthread_self();
 qemu_system_ready = 1;
 
-if (qemu_eventfd(fds) == -1) {
-fprintf(stderr, failed to create eventfd\n);
-return -errno;
-}
-
-fcntl(fds[0], F_SETFL, O_NONBLOCK);
-fcntl(fds[1], F_SETFL, O_NONBLOCK);
-
-qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
- (void *)(unsigned long) fds[0]);
-
-io_thread_fd = fds[1];
-
 sigemptyset(mask);
 sigaddset(mask, SIGIO);
 sigaddset(mask, SIGALRM);
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 42c990d..9809574 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -863,8 +863,6 @@ void qemu_kvm_aio_wait_start(void);
 void qemu_kvm_aio_wait(void);
 void qemu_kvm_aio_wait_end(void);
 
-void qemu_kvm_notify_work(void);
-
 void kvm_tpr_access_report(CPUState *env, uint64_t rip, int is_write);
 
 int kvm_arch_init_irq_routing(void);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] Remove kvm/doxygen.conf

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Unused.

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/kvm/doxygen.conf b/kvm/doxygen.conf
deleted file mode 100644
index 21a04c0..000
--- a/kvm/doxygen.conf
+++ /dev/null
@@ -1,1252 +0,0 @@
-# Doxyfile 1.5.1
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project
-#
-# All text after a hash (#) is considered a comment and will be ignored
-# The format is:
-#   TAG = value [value, ...]
-# For lists items can also be appended using:
-#   TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes ( )
-
-#---
-# Project related configuration options
-#---
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
-# by quotes) that should identify the project.
-
-PROJECT_NAME   = KVM
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
-# This could be handy for archiving the generated documentation or 
-# if some version control system is used.
-
-PROJECT_NUMBER = Release 7
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
-# base path where the generated documentation will be put. 
-# If a relative path is entered, it will be relative to the location 
-# where doxygen was started. If left blank the current directory will be used.
-
-OUTPUT_DIRECTORY   = docs
-
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 
-# 4096 sub-directories (in 2 levels) under the output directory of each output 
-# format and will distribute the generated files over these directories. 
-# Enabling this option can be useful when feeding doxygen a huge amount of 
-# source files, where putting all generated files in the same directory would 
-# otherwise cause performance problems for the file system.
-
-CREATE_SUBDIRS = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
-# documentation generated by doxygen is written. Doxygen will use this 
-# information to generate all constant output in the proper language. 
-# The default language is English, other supported languages are: 
-# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, 
-# Croatian, Czech, Danish, Dutch, Finnish, French, German, Greek, Hungarian, 
-# Italian, Japanese, Japanese-en (Japanese with English messages), Korean, 
-# Korean-en, Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, 
-# Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian.
-
-OUTPUT_LANGUAGE= English
-
-# This tag can be used to specify the encoding used in the generated output. 
-# The encoding is not always determined by the language that is chosen, 
-# but also whether or not the output is meant for Windows or non-Windows 
users. 
-# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES 
-# forces the Windows encoding (this is the default for the Windows binary), 
-# whereas setting the tag to NO uses a Unix-style encoding (the default for 
-# all platforms other than Windows).
-
-USE_WINDOWS_ENCODING   = NO
-
-# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
-# include brief member descriptions after the members that are listed in 
-# the file and class documentation (similar to JavaDoc). 
-# Set to NO to disable this.
-
-BRIEF_MEMBER_DESC  = YES
-
-# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
-# the brief description of a member or function before the detailed 
description. 
-# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
-# brief descriptions will be completely suppressed.
-
-REPEAT_BRIEF   = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator 
-# that is used to form the text in various listings. Each string 
-# in this list, if found as the leading text of the brief description, will be 
-# stripped from the text and the result after processing the whole list, is 
-# used as the annotated text. Otherwise, the brief description is used as-is. 
-# If left blank, the following values are used ($name is automatically 
-# replaced with the name of the entity): The $name class The $name widget 
-# The $name file is provides specifies contains 
-# represents a an the
-
-ABBREVIATE_BRIEF   = 
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
-# Doxygen will generate a detailed section even if there is only a brief 
-# description.
-
-ALWAYS_DETAILED_SEC= NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 
-# inherited members of a class in the documentation of that class as if those 
-# members were ordinary class members. Constructors, destructors and 
assignment 
-# operators of the base classes will not be shown.

[COMMIT master] Revert qemu-kvm: drop posix-aio-compat.cs signalfd usage

2010-09-13 Thread Avi Kivity

From: Marcelo Tosatti mtosa...@redhat.com

This reverts commit cb375ad1a62ba9de0207d144d0ad8ca1bee09d33.

Breaks FC8 32/64 install.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index a67ffe3..c05c77b 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -26,6 +26,7 @@
 #include osdep.h
 #include qemu-common.h
 #include block_int.h
+#include compatfd.h
 
 #include block/raw-posix-aio.h
 
@@ -53,7 +54,7 @@ struct qemu_paiocb {
 };
 
 typedef struct PosixAioState {
-int rfd, wfd;
+int fd;
 struct qemu_paiocb *first_aio;
 } PosixAioState;
 
@@ -472,18 +473,29 @@ static int posix_aio_process_queue(void *opaque)
 static void posix_aio_read(void *opaque)
 {
 PosixAioState *s = opaque;
-ssize_t len;
+union {
+struct qemu_signalfd_siginfo siginfo;
+char buf[128];
+} sig;
+size_t offset;
 
-/* read all bytes from signal pipe */
-for (;;) {
-char bytes[16];
+/* try to read from signalfd, don't freak out if we can't read anything */
+offset = 0;
+while (offset  128) {
+ssize_t len;
 
-len = read(s-rfd, bytes, sizeof(bytes));
+len = read(s-fd, sig.buf + offset, 128 - offset);
 if (len == -1  errno == EINTR)
-continue; /* try again */
-if (len == sizeof(bytes))
-continue; /* more to read */
-break;
+continue;
+if (len == -1  errno == EAGAIN) {
+/* there is no natural reason for this to happen,
+ * so we'll spin hard until we get everything just
+ * to be on the safe side. */
+if (offset  0)
+continue;
+}
+
+offset += len;
 }
 
 posix_aio_process_queue(s);
@@ -497,20 +509,6 @@ static int posix_aio_flush(void *opaque)
 
 static PosixAioState *posix_aio_state;
 
-static void aio_signal_handler(int signum)
-{
-if (posix_aio_state) {
-char byte = 0;
-ssize_t ret;
-
-ret = write(posix_aio_state-wfd, byte, sizeof(byte));
-if (ret  0  errno != EAGAIN)
-die(write());
-}
-
-qemu_service_io();
-}
-
 static void paio_remove(struct qemu_paiocb *acb)
 {
 struct qemu_paiocb **pacb;
@@ -612,9 +610,8 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
 
 int paio_init(void)
 {
-struct sigaction act;
+sigset_t mask;
 PosixAioState *s;
-int fds[2];
 int ret;
 
 if (posix_aio_state)
@@ -622,24 +619,21 @@ int paio_init(void)
 
 s = qemu_malloc(sizeof(PosixAioState));
 
-sigfillset(act.sa_mask);
-act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
-act.sa_handler = aio_signal_handler;
-sigaction(SIGUSR2, act, NULL);
+/* Make sure to block AIO signal */
+sigemptyset(mask);
+sigaddset(mask, SIGUSR2);
+sigprocmask(SIG_BLOCK, mask, NULL);
 
 s-first_aio = NULL;
-if (qemu_pipe(fds) == -1) {
-fprintf(stderr, failed to create pipe\n);
+s-fd = qemu_signalfd(mask);
+if (s-fd == -1) {
+fprintf(stderr, failed to create signalfd\n);
 return -1;
 }
 
-s-rfd = fds[0];
-s-wfd = fds[1];
-
-fcntl(s-rfd, F_SETFL, O_NONBLOCK);
-fcntl(s-wfd, F_SETFL, O_NONBLOCK);
+fcntl(s-fd, F_SETFL, O_NONBLOCK);
 
-qemu_aio_set_fd_handler(s-rfd, posix_aio_read, NULL, posix_aio_flush,
+qemu_aio_set_fd_handler(s-fd, posix_aio_read, NULL, posix_aio_flush,
 posix_aio_process_queue, s);
 
 ret = pthread_attr_init(attr);
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 2fb927c..060c47d 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1680,7 +1680,6 @@ int kvm_main_loop(void)
 sigemptyset(mask);
 sigaddset(mask, SIGIO);
 sigaddset(mask, SIGALRM);
-sigaddset(mask, SIGUSR2);
 sigaddset(mask, SIGBUS);
 sigprocmask(SIG_BLOCK, mask, NULL);
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] Don't launch guest if -no-kvm when tcg is not configured in

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/vl.c b/vl.c
index 2a32cc5..22a3616 100644
--- a/vl.c
+++ b/vl.c
@@ -2473,6 +2473,10 @@ int main(int argc, char **argv, char **envp)
 break;
case QEMU_OPTION_no_kvm:
kvm_allowed = 0;
+#ifdef CONFIG_NO_CPU_EMULATION
+fprintf(stderr, cpu emulation not configured\n);
+exit(1);
+#endif
break;
 #ifdef CONFIG_KVM
case QEMU_OPTION_no_kvm_irqchip: {
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Fix 32 bit legacy paging with NPT

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch fixes 32 bit legacy paging with NPT enabled. The
mmu_check_root call on the top-level of the loop causes
root_gfn to take values (in the tdp_enabled path) which are
outside of guest memory. So the mmu_check_root call fails at
some point in the loop interation causing the guest to
tiple-fault.
This patch changes the mmu_check_root calls to the places
where they are really necessary. As a side-effect it
introduces a check for the root of a pae page table too.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d2dad65..b2136f9 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2387,6 +2387,10 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
return 0;
}
direct = !is_paging(vcpu);
+
+   if (mmu_check_root(vcpu, root_gfn))
+   return 1;
+
for (i = 0; i  4; ++i) {
hpa_t root = vcpu-arch.mmu.pae_root[i];
 
@@ -2398,10 +2402,10 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
continue;
}
root_gfn = pdptr  PAGE_SHIFT;
+   if (mmu_check_root(vcpu, root_gfn))
+   return 1;
} else if (vcpu-arch.mmu.root_level == 0)
root_gfn = 0;
-   if (mmu_check_root(vcpu, root_gfn))
-   return 1;
if (tdp_enabled) {
direct = 1;
root_gfn = i  30;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: x86: Emulate MSR_EBC_FREQUENCY_ID

2010-09-13 Thread Avi Kivity

From: Jes Sorensen jes.soren...@redhat.com

Some operating systems store data about the host processor at the
time of installation, and when booted on a more uptodate cpu tries
to read MSR_EBC_FREQUENCY_ID. This has been found with XP.

Signed-off-by: Jes Sorensen jes.soren...@redhat.com
Reviewed-by: Juan Quintela quint...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f47db25..9d43477 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1651,6 +1651,20 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, 
u64 *pdata)
case 0xcd: /* fsb frequency */
data = 3;
break;
+   /*
+* MSR_EBC_FREQUENCY_ID
+* Conservative value valid for even the basic CPU models.
+* Models 0,1: 000 in bits 23:21 indicating a bus speed of
+* 100MHz, model 2 000 in bits 18:16 indicating 100MHz,
+* and 266MHz for model 3, or 4. Set Core Clock
+* Frequency to System Bus Frequency Ratio to 1 (bits
+* 31:24) even though these are only valid for CPU
+* models  2, however guests may end up dividing or
+* multiplying by zero otherwise.
+*/
+   case MSR_EBC_FREQUENCY_ID:
+   data = 1  24;
+   break;
case MSR_IA32_APICBASE:
data = kvm_get_apic_base(vcpu);
break;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Introduce inject_page_fault function pointer

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch introduces an inject_page_fault function pointer
into struct kvm_mmu which will be used to inject a page
fault. This will be used later when Nested Nested Paging is
implemented.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ab708ee..3fefcd8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -239,6 +239,9 @@ struct kvm_mmu {
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
+   void (*inject_page_fault)(struct kvm_vcpu *vcpu,
+ unsigned long addr,
+ u32 error_code);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
u32 *error);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e4a7de4..a751dfc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2571,7 +2571,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
  u64 addr,
  u32 err_code)
 {
-   kvm_inject_page_fault(vcpu, addr, err_code);
+   vcpu-arch.mmu.inject_page_fault(vcpu, addr, err_code);
 }
 
 static void paging_free(struct kvm_vcpu *vcpu)
@@ -2721,6 +2721,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context-direct_map = true;
context-set_cr3 = kvm_x86_ops-set_tdp_cr3;
context-get_cr3 = get_cr3;
+   context-inject_page_fault = kvm_inject_page_fault;
 
if (!is_paging(vcpu)) {
context-gva_to_gpa = nonpaging_gva_to_gpa;
@@ -2762,6 +2763,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
vcpu-arch.mmu.base_role.cr0_wp  = is_write_protection(vcpu);
vcpu-arch.mmu.set_cr3   = kvm_x86_ops-set_cr3;
vcpu-arch.mmu.get_cr3   = get_cr3;
+   vcpu-arch.mmu.inject_page_fault = kvm_inject_page_fault;
 
return r;
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Make set_cr3 a function pointer in kvm_mmu

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This is necessary to implement Nested Nested Paging. As a
side effect this allows some cleanups in the SVM nested
paging code.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 53cdf39..43c8db0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -236,6 +236,7 @@ struct kvm_pio_request {
  */
 struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
+   void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5c28e97..c8acb96 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2714,6 +2714,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context-shadow_root_level = kvm_x86_ops-get_tdp_level();
context-root_hpa = INVALID_PAGE;
context-direct_map = true;
+   context-set_cr3 = kvm_x86_ops-set_cr3;
 
if (!is_paging(vcpu)) {
context-gva_to_gpa = nonpaging_gva_to_gpa;
@@ -2752,7 +2753,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
r = paging32_init_context(vcpu);
 
vcpu-arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
-   vcpu-arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
+   vcpu-arch.mmu.base_role.cr0_wp  = is_write_protection(vcpu);
+   vcpu-arch.mmu.set_cr3   = kvm_x86_ops-set_cr3;
 
return r;
 }
@@ -2796,7 +2798,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
if (r)
goto out;
/* set_cr3() should ensure TLB has been flushed */
-   kvm_x86_ops-set_cr3(vcpu, vcpu-arch.mmu.root_hpa);
+   vcpu-arch.mmu.set_cr3(vcpu, vcpu-arch.mmu.root_hpa);
 out:
return r;
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: X86: Introduce a tdp_set_cr3 function

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch introduces a special set_tdp_cr3 function pointer
in kvm_x86_ops which is only used for tpd enabled mmu
contexts. This allows to remove some hacks from svm code.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 43c8db0..aeeea9c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -526,6 +526,8 @@ struct kvm_x86_ops {
bool (*rdtscp_supported)(void);
void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment);
 
+   void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
+
void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
 
bool (*has_wbinvd_exit)(void);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c8acb96..a55f8d5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2714,7 +2714,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context-shadow_root_level = kvm_x86_ops-get_tdp_level();
context-root_hpa = INVALID_PAGE;
context-direct_map = true;
-   context-set_cr3 = kvm_x86_ops-set_cr3;
+   context-set_cr3 = kvm_x86_ops-set_tdp_cr3;
 
if (!is_paging(vcpu)) {
context-gva_to_gpa = nonpaging_gva_to_gpa;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6808f64..094df31 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3216,9 +3216,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
gs_selector = kvm_read_gs();
ldt_selector = kvm_read_ldt();
svm-vmcb-save.cr2 = vcpu-arch.cr2;
-   /* required for live migration with NPT */
-   if (npt_enabled)
-   svm-vmcb-save.cr3 = vcpu-arch.cr3;
 
clgi();
 
@@ -3335,16 +3332,22 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned 
long root)
 {
struct vcpu_svm *svm = to_svm(vcpu);
 
-   if (npt_enabled) {
-   svm-vmcb-control.nested_cr3 = root;
-   force_new_asid(vcpu);
-   return;
-   }
-
svm-vmcb-save.cr3 = root;
force_new_asid(vcpu);
 }
 
+static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   svm-vmcb-control.nested_cr3 = root;
+
+   /* Also sync guest cr3 here in case we live migrate */
+   svm-vmcb-save.cr3 = vcpu-arch.cr3;
+
+   force_new_asid(vcpu);
+}
+
 static int is_disabled(void)
 {
u64 vm_cr;
@@ -3571,6 +3574,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 
.write_tsc_offset = svm_write_tsc_offset,
.adjust_tsc_offset = svm_adjust_tsc_offset,
+
+   .set_tdp_cr3 = set_tdp_cr3,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 676555c..0e62d8a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4347,6 +4347,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
.write_tsc_offset = vmx_write_tsc_offset,
.adjust_tsc_offset = vmx_adjust_tsc_offset,
+
+   .set_tdp_cr3 = vmx_set_cr3,
 };
 
 static int __init vmx_init(void)
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: SVM: Restore correct registers after sel_cr0 intercept emulation

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch implements restoring of the correct rip, rsp, and
rax after the svm emulation in KVM injected a selective_cr0
write intercept into the guest hypervisor. The problem was
that the vmexit is emulated in the instruction emulation
which later commits the registers right after the write-cr0
instruction. So the l1 guest will continue to run with the
l2 rip, rsp and rax resulting in unpredictable behavior.

This patch is not the final word, it is just an easy patch
to fix the issue. The real fix will be done when the
instruction emulator is made aware of nested virtualization.
Until this is done this patch fixes the issue and provides
an easy way to fix this in -stable too.

Cc: sta...@kernel.org
Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 55743ab..ecd4e58 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -88,6 +88,14 @@ struct nested_state {
/* A VMEXIT is required but not yet emulated */
bool exit_required;
 
+   /*
+* If we vmexit during an instruction emulation we need this to restore
+* the l1 guest rip after the emulation
+*/
+   unsigned long vmexit_rip;
+   unsigned long vmexit_rsp;
+   unsigned long vmexit_rax;
+
/* cache for intercepts of the guest */
u16 intercept_cr_read;
u16 intercept_cr_write;
@@ -1213,8 +1221,12 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned 
long cr0)
if (old == new) {
/* cr0 write with ts and mp unchanged */
svm-vmcb-control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
-   if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
+   if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) {
+   svm-nested.vmexit_rip = kvm_rip_read(vcpu);
+   svm-nested.vmexit_rsp = 
kvm_register_read(vcpu, VCPU_REGS_RSP);
+   svm-nested.vmexit_rax = 
kvm_register_read(vcpu, VCPU_REGS_RAX);
return;
+   }
}
}
 
@@ -2430,6 +2442,23 @@ static int emulate_on_interception(struct vcpu_svm *svm)
return emulate_instruction(svm-vcpu, 0, 0, 0) == EMULATE_DONE;
 }
 
+static int cr0_write_interception(struct vcpu_svm *svm)
+{
+   struct kvm_vcpu *vcpu = svm-vcpu;
+   int r;
+
+   r = emulate_instruction(svm-vcpu, 0, 0, 0);
+
+   if (svm-nested.vmexit_rip) {
+   kvm_register_write(vcpu, VCPU_REGS_RIP, svm-nested.vmexit_rip);
+   kvm_register_write(vcpu, VCPU_REGS_RSP, svm-nested.vmexit_rsp);
+   kvm_register_write(vcpu, VCPU_REGS_RAX, svm-nested.vmexit_rax);
+   svm-nested.vmexit_rip = 0;
+   }
+
+   return r == EMULATE_DONE;
+}
+
 static int cr8_write_interception(struct vcpu_svm *svm)
 {
struct kvm_run *kvm_run = svm-vcpu.run;
@@ -2692,7 +2721,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = 
{
[SVM_EXIT_READ_CR4] = emulate_on_interception,
[SVM_EXIT_READ_CR8] = emulate_on_interception,
[SVM_EXIT_CR0_SEL_WRITE]= emulate_on_interception,
-   [SVM_EXIT_WRITE_CR0]= emulate_on_interception,
+   [SVM_EXIT_WRITE_CR0]= cr0_write_interception,
[SVM_EXIT_WRITE_CR3]= emulate_on_interception,
[SVM_EXIT_WRITE_CR4]= emulate_on_interception,
[SVM_EXIT_WRITE_CR8]= cr8_write_interception,
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Make walk_addr_generic capable for two-level walking

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch uses kvm_read_guest_page_tdp to make the
walk_addr_generic functions suitable for two-level page
table walking.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index eefe363..f4e09d3 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -124,6 +124,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
unsigned index, pt_access, uninitialized_var(pte_access);
gpa_t pte_gpa;
bool eperm, present, rsvd_fault;
+   int offset;
+   u32 access = 0;
 
trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
 fetch_fault);
@@ -153,12 +155,14 @@ walk:
index = PT_INDEX(addr, walker-level);
 
table_gfn = gpte_to_gfn(pte);
-   pte_gpa = gfn_to_gpa(table_gfn);
-   pte_gpa += index * sizeof(pt_element_t);
+   offset= index * sizeof(pt_element_t);
+   pte_gpa   = gfn_to_gpa(table_gfn) + offset;
walker-table_gfn[walker-level - 1] = table_gfn;
walker-pte_gpa[walker-level - 1] = pte_gpa;
 
-   if (kvm_read_guest(vcpu-kvm, pte_gpa, pte, sizeof(pte))) {
+   if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, pte,
+   offset, sizeof(pte),
+   PFERR_USER_MASK|PFERR_WRITE_MASK)) {
present = false;
break;
}
@@ -209,15 +213,27 @@ walk:
is_large_pte(pte) 
mmu-root_level == PT64_ROOT_LEVEL)) {
int lvl = walker-level;
+   gpa_t real_gpa;
+   gfn_t gfn;
 
-   walker-gfn = gpte_to_gfn_lvl(pte, lvl);
-   walker-gfn += (addr  PT_LVL_OFFSET_MASK(lvl))
-PAGE_SHIFT;
+   gfn = gpte_to_gfn_lvl(pte, lvl);
+   gfn += (addr  PT_LVL_OFFSET_MASK(lvl))  PAGE_SHIFT;
 
if (PTTYPE == 32 
walker-level == PT_DIRECTORY_LEVEL 
is_cpuid_PSE36())
-   walker-gfn += pse36_gfn_delta(pte);
+   gfn += pse36_gfn_delta(pte);
+
+   access |= write_fault ? PFERR_WRITE_MASK : 0;
+   access |= fetch_fault ? PFERR_FETCH_MASK : 0;
+   access |= user_fault  ? PFERR_USER_MASK  : 0;
+
+   real_gpa = mmu-translate_gpa(vcpu, gfn_to_gpa(gfn),
+ access);
+   if (real_gpa == UNMAPPED_GVA)
+   return 0;
+
+   walker-gfn = real_gpa  PAGE_SHIFT;
 
break;
}
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: X86: Introduce pointer to mmu context used for gva_to_gpa

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch introduces the walk_mmu pointer which points to
the mmu-context currently used for gva_to_gpa translations.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7f95260..91c7d35 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -286,9 +286,22 @@ struct kvm_vcpu_arch {
u64 ia32_misc_enable_msr;
bool tpr_access_reporting;
 
+   /*
+* Paging state of the vcpu
+*
+* If the vcpu runs in guest mode with two level paging this still saves
+* the paging mode of the l1 guest. This context is always used to
+* handle faults.
+*/
struct kvm_mmu mmu;
 
/*
+* Pointer to the mmu context currently used for
+* gva_to_gpa translations.
+*/
+   struct kvm_mmu *walk_mmu;
+
+   /*
 * This struct is filled with the necessary information to propagate a
 * page fault into the guest
 */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9936727..cb06ada 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2708,7 +2708,7 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu,
 
 static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 {
-   struct kvm_mmu *context = vcpu-arch.mmu;
+   struct kvm_mmu *context = vcpu-arch.walk_mmu;
 
context-new_cr3 = nonpaging_new_cr3;
context-page_fault = tdp_page_fault;
@@ -2767,11 +2767,11 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
 static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
-   int r = kvm_init_shadow_mmu(vcpu, vcpu-arch.mmu);
+   int r = kvm_init_shadow_mmu(vcpu, vcpu-arch.walk_mmu);
 
-   vcpu-arch.mmu.set_cr3   = kvm_x86_ops-set_cr3;
-   vcpu-arch.mmu.get_cr3   = get_cr3;
-   vcpu-arch.mmu.inject_page_fault = kvm_inject_page_fault;
+   vcpu-arch.walk_mmu-set_cr3   = kvm_x86_ops-set_cr3;
+   vcpu-arch.walk_mmu-get_cr3   = get_cr3;
+   vcpu-arch.walk_mmu-inject_page_fault = kvm_inject_page_fault;
 
return r;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2364c2c..4196fc7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3456,27 +3456,27 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t 
gpa, u32 access)
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
 {
u32 access = (kvm_x86_ops-get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
-   return vcpu-arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+   return vcpu-arch.walk_mmu-gva_to_gpa(vcpu, gva, access, error);
 }
 
  gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
 {
u32 access = (kvm_x86_ops-get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
access |= PFERR_FETCH_MASK;
-   return vcpu-arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+   return vcpu-arch.walk_mmu-gva_to_gpa(vcpu, gva, access, error);
 }
 
 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
 {
u32 access = (kvm_x86_ops-get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
access |= PFERR_WRITE_MASK;
-   return vcpu-arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+   return vcpu-arch.walk_mmu-gva_to_gpa(vcpu, gva, access, error);
 }
 
 /* uses this to access any guest's mapped memory without checking CPL */
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
 {
-   return vcpu-arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
+   return vcpu-arch.walk_mmu-gva_to_gpa(vcpu, gva, 0, error);
 }
 
 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int 
bytes,
@@ -3487,7 +3487,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void 
*val, unsigned int bytes,
int r = X86EMUL_CONTINUE;
 
while (bytes) {
-   gpa_t gpa = vcpu-arch.mmu.gva_to_gpa(vcpu, addr, access, 
error);
+   gpa_t gpa = vcpu-arch.walk_mmu-gva_to_gpa(vcpu, addr, access,
+   error);
unsigned offset = addr  (PAGE_SIZE-1);
unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
@@ -3542,8 +3543,9 @@ static int kvm_write_guest_virt_system(gva_t addr, void 
*val,
int r = X86EMUL_CONTINUE;
 
while (bytes) {
-   gpa_t gpa =  vcpu-arch.mmu.gva_to_gpa(vcpu, addr,
-  PFERR_WRITE_MASK, error);
+   gpa_t gpa =  vcpu-arch.walk_mmu-gva_to_gpa(vcpu, addr,
+PFERR_WRITE_MASK,
+error);
unsigned offset = addr  (PAGE_SIZE-1);
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE -

[COMMIT master] KVM: MMU: Let is_rsvd_bits_set take mmu context instead of vcpu

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch changes is_rsvd_bits_set() function prototype to
take only a kvm_mmu context instead of a full vcpu.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9e48a77..86f7557 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2578,12 +2578,12 @@ static void paging_free(struct kvm_vcpu *vcpu)
nonpaging_free(vcpu);
 }
 
-static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
 {
int bit7;
 
bit7 = (gpte  7)  1;
-   return (gpte  vcpu-arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+   return (gpte  mmu-rsvd_bits_mask[bit7][level-1]) != 0;
 }
 
 #define PTTYPE 64
@@ -2859,7 +2859,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
return;
 }
 
-   if (is_rsvd_bits_set(vcpu, *(u64 *)new, PT_PAGE_TABLE_LEVEL))
+   if (is_rsvd_bits_set(vcpu-arch.mmu, *(u64 *)new, PT_PAGE_TABLE_LEVEL))
return;
 
++vcpu-kvm-stat.mmu_pte_updated;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 13d0c06..68ee1b7 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -168,7 +168,7 @@ walk:
break;
}
 
-   if (is_rsvd_bits_set(vcpu, pte, walker-level)) {
+   if (is_rsvd_bits_set(vcpu-arch.mmu, pte, walker-level)) {
rsvd_fault = true;
break;
}
@@ -327,6 +327,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, 
struct guest_walker *gw,
u64 *sptep)
 {
struct kvm_mmu_page *sp;
+   struct kvm_mmu *mmu = vcpu-arch.mmu;
pt_element_t *gptep = gw-prefetch_ptes;
u64 *spte;
int i;
@@ -358,7 +359,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, 
struct guest_walker *gw,
gpte = gptep[i];
 
if (!is_present_gpte(gpte) ||
- is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)) {
+ is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) {
if (!sp-unsync)
__set_spte(spte, shadow_notrap_nonpresent_pte);
continue;
@@ -713,7 +714,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *sp,
return -EINVAL;
 
gfn = gpte_to_gfn(gpte);
-   if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)
+   if (is_rsvd_bits_set(vcpu-arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)
  || gfn != sp-gfns[i] || !is_present_gpte(gpte)
  || !(gpte  PT_ACCESSED_MASK)) {
u64 nonpresent;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: SVM: Expect two more candiates for exit_int_info

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch adds INTR and NMI intercepts to the list of
expected intercepts with an exit_int_info set. While this
can't happen on bare metal it is architectural legal and may
happen with KVMs SVM emulation.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9df60c3..ede95e0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2991,7 +2991,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 
if (is_external_interrupt(svm-vmcb-control.exit_int_info) 
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR 
-   exit_code != SVM_EXIT_NPF  exit_code != SVM_EXIT_TASK_SWITCH)
+   exit_code != SVM_EXIT_NPF  exit_code != SVM_EXIT_TASK_SWITCH 
+   exit_code != SVM_EXIT_INTR  exit_code != SVM_EXIT_NMI)
printk(KERN_ERR %s: unexpected exit_ini_info 0x%x 
   exit_code 0x%x\n,
   __func__, svm-vmcb-control.exit_int_info,
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Introduce generic walk_addr function

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This is the first patch in the series towards a generic
walk_addr implementation which could walk two-dimensional
page tables in the end. In this first step the walk_addr
function is renamed into walk_addr_generic which takes a
mmu context as an additional parameter.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d07f48a..a704a81 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -114,9 +114,10 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, 
pt_element_t gpte)
 /*
  * Fetch a guest pte for a guest virtual address
  */
-static int FNAME(walk_addr)(struct guest_walker *walker,
-   struct kvm_vcpu *vcpu, gva_t addr,
-   int write_fault, int user_fault, int fetch_fault)
+static int FNAME(walk_addr_generic)(struct guest_walker *walker,
+   struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+   gva_t addr, int write_fault,
+   int user_fault, int fetch_fault)
 {
pt_element_t pte;
gfn_t table_gfn;
@@ -129,10 +130,11 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 walk:
present = true;
eperm = rsvd_fault = false;
-   walker-level = vcpu-arch.mmu.root_level;
-   pte = vcpu-arch.mmu.get_cr3(vcpu);
+   walker-level = mmu-root_level;
+   pte   = mmu-get_cr3(vcpu);
+
 #if PTTYPE == 64
-   if (vcpu-arch.mmu.root_level == PT32E_ROOT_LEVEL) {
+   if (walker-level == PT32E_ROOT_LEVEL) {
pte = kvm_pdptr_read(vcpu, (addr  30)  3);
trace_kvm_mmu_paging_element(pte, walker-level);
if (!is_present_gpte(pte)) {
@@ -143,7 +145,7 @@ walk:
}
 #endif
ASSERT((!is_long_mode(vcpu)  is_pae(vcpu)) ||
-  (vcpu-arch.mmu.get_cr3(vcpu)  CR3_NONPAE_RESERVED_BITS) == 0);
+  (mmu-get_cr3(vcpu)  CR3_NONPAE_RESERVED_BITS) == 0);
 
pt_access = ACC_ALL;
 
@@ -205,7 +207,7 @@ walk:
(PTTYPE == 64 || is_pse(vcpu))) ||
((walker-level == PT_PDPE_LEVEL) 
is_large_pte(pte) 
-   vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL)) {
+   mmu-root_level == PT64_ROOT_LEVEL)) {
int lvl = walker-level;
 
walker-gfn = gpte_to_gfn_lvl(pte, lvl);
@@ -266,6 +268,14 @@ error:
return 0;
 }
 
+static int FNAME(walk_addr)(struct guest_walker *walker,
+   struct kvm_vcpu *vcpu, gva_t addr,
+   int write_fault, int user_fault, int fetch_fault)
+{
+   return FNAME(walk_addr_generic)(walker, vcpu, vcpu-arch.mmu, addr,
+   write_fault, user_fault, fetch_fault);
+}
+
 static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
  u64 *spte, const void *pte)
 {
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Track page fault data in struct vcpu

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch introduces a struct with two new fields in
vcpu_arch for x86:

* fault.address
* fault.error_code

This will be used to correctly propagate page faults back
into the guest when we could have either an ordinary page
fault or a nested page fault. In the case of a nested page
fault the fault-address is different from the original
address that should be walked. So we need to keep track
about the real fault-address.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_emulate.h 
b/arch/x86/include/asm/kvm_emulate.h
index 1bf1140..5187dd8 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -229,7 +229,6 @@ struct x86_emulate_ctxt {
int exception; /* exception that happens during emulation or -1 */
u32 error_code; /* error code for exception */
bool error_code_valid;
-   unsigned long cr2; /* faulted address in case of #PF */
 
/* decode cache */
struct decode_cache decode;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3fefcd8..235023e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -239,9 +239,7 @@ struct kvm_mmu {
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
-   void (*inject_page_fault)(struct kvm_vcpu *vcpu,
- unsigned long addr,
- u32 error_code);
+   void (*inject_page_fault)(struct kvm_vcpu *vcpu);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
u32 *error);
@@ -288,6 +286,16 @@ struct kvm_vcpu_arch {
bool tpr_access_reporting;
 
struct kvm_mmu mmu;
+
+   /*
+* This struct is filled with the necessary information to propagate a
+* page fault into the guest
+*/
+   struct {
+   u64  address;
+   unsigned error_code;
+   } fault;
+
/* only needed in kvm_pv_mmu_op() path, but it's hot so
 * put it here to avoid allocation */
struct kvm_pv_mmu_op_buffer mmu_op_buffer;
@@ -624,8 +632,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned 
nr);
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 
error_code);
-void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
-  u32 error_code);
+void kvm_inject_page_fault(struct kvm_vcpu *vcpu);
 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
 
 int kvm_pic_set_irq(void *opaque, int irq, int level);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 27d2c22..2b08b78 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -487,11 +487,9 @@ static void emulate_gp(struct x86_emulate_ctxt *ctxt, int 
err)
emulate_exception(ctxt, GP_VECTOR, err, true);
 }
 
-static void emulate_pf(struct x86_emulate_ctxt *ctxt, unsigned long addr,
-  int err)
+static void emulate_pf(struct x86_emulate_ctxt *ctxt)
 {
-   ctxt-cr2 = addr;
-   emulate_exception(ctxt, PF_VECTOR, err, true);
+   emulate_exception(ctxt, PF_VECTOR, 0, true);
 }
 
 static void emulate_ud(struct x86_emulate_ctxt *ctxt)
@@ -834,7 +832,7 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
rc = ops-read_emulated(addr, mc-data + mc-end, n, err,
ctxt-vcpu);
if (rc == X86EMUL_PROPAGATE_FAULT)
-   emulate_pf(ctxt, addr, err);
+   emulate_pf(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
mc-end += n;
@@ -921,7 +919,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt 
*ctxt,
addr = dt.address + index * 8;
ret = ops-read_std(addr, desc, sizeof *desc, ctxt-vcpu,  err);
if (ret == X86EMUL_PROPAGATE_FAULT)
-   emulate_pf(ctxt, addr, err);
+   emulate_pf(ctxt);
 
return ret;
 }
@@ -947,7 +945,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt 
*ctxt,
addr = dt.address + index * 8;
ret = ops-write_std(addr, desc, sizeof *desc, ctxt-vcpu, err);
if (ret == X86EMUL_PROPAGATE_FAULT)
-   emulate_pf(ctxt, addr, err);
+   emulate_pf(ctxt);
 
return ret;
 }
@@ -1117,7 +1115,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
err,

[COMMIT master] x86: Define MSR_EBC_FREQUENCY_ID

2010-09-13 Thread Avi Kivity

From: Jes Sorensen jes.soren...@redhat.com

Signed-off-by: Jes Sorensen jes.soren...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 986f779..83c4bb1 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -198,6 +198,7 @@
 #define MSR_IA32_TSC   0x0010
 #define MSR_IA32_PLATFORM_ID   0x0017
 #define MSR_IA32_EBL_CR_POWERON0x002a
+#define MSR_EBC_FREQUENCY_ID   0x002c
 #define MSR_IA32_FEATURE_CONTROL0x003a
 
 #define FEATURE_CONTROL_LOCKED (10)
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Introduce kvm_read_nested_guest_page()

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch introduces the kvm_read_guest_page_x86 function
which reads from the physical memory of the guest. If the
guest is running in guest-mode itself with nested paging
enabled it will read from the guest's guest physical memory
instead.
The patch also changes changes the code to use this function
where it is necessary.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a2efb70..46843ed 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -392,6 +392,13 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct 
kvm_mmu *mmu,
 }
 EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
 
+int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+  void *data, int offset, int len, u32 access)
+{
+   return kvm_read_guest_page_mmu(vcpu, vcpu-arch.walk_mmu, gfn,
+  data, offset, len, access);
+}
+
 /*
  * Load the pae pdptrs.  Return true is they are all valid.
  */
@@ -403,8 +410,9 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
int ret;
u64 pdpte[ARRAY_SIZE(vcpu-arch.pdptrs)];
 
-   ret = kvm_read_guest_page(vcpu-kvm, pdpt_gfn, pdpte,
- offset * sizeof(u64), sizeof(pdpte));
+   ret = kvm_read_nested_guest_page(vcpu, pdpt_gfn, pdpte,
+offset * sizeof(u64), sizeof(pdpte),
+PFERR_USER_MASK|PFERR_WRITE_MASK);
if (ret  0) {
ret = 0;
goto out;
@@ -433,6 +441,8 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
 {
u64 pdpte[ARRAY_SIZE(vcpu-arch.pdptrs)];
bool changed = true;
+   int offset;
+   gfn_t gfn;
int r;
 
if (is_long_mode(vcpu) || !is_pae(vcpu))
@@ -442,7 +452,10 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
  (unsigned long *)vcpu-arch.regs_avail))
return true;
 
-   r = kvm_read_guest(vcpu-kvm, vcpu-arch.cr3  ~31u, pdpte, 
sizeof(pdpte));
+   gfn = (vcpu-arch.cr3  ~31u)  PAGE_SHIFT;
+   offset = (vcpu-arch.cr3  ~31u)  (PAGE_SIZE - 1);
+   r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
+  PFERR_USER_MASK | PFERR_WRITE_MASK);
if (r  0)
goto out;
changed = memcmp(pdpte, vcpu-arch.pdptrs, sizeof(pdpte)) != 0;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: SVM: Initialize Nested Nested MMU context on VMRUN

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch adds code to initialize the Nested Nested Paging
MMU context when the L1 guest executes a VMRUN instruction
and has nested paging enabled in its VMCB.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 95cbeed..6e248d8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2962,6 +2962,7 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
mmu_free_roots(vcpu);
 }
+EXPORT_SYMBOL_GPL(kvm_mmu_unload);
 
 static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
  struct kvm_mmu_page *sp,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a483aa9..9df60c3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -294,6 +294,15 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
force_new_asid(vcpu);
 }
 
+static int get_npt_level(void)
+{
+#ifdef CONFIG_X86_64
+   return PT64_ROOT_LEVEL;
+#else
+   return PT32E_ROOT_LEVEL;
+#endif
+}
+
 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
vcpu-arch.efer = efer;
@@ -1630,6 +1639,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu 
*vcpu)
nested_svm_vmexit(svm);
 }
 
+static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
+{
+   int r;
+
+   r = kvm_init_shadow_mmu(vcpu, vcpu-arch.mmu);
+
+   vcpu-arch.mmu.set_cr3   = nested_svm_set_tdp_cr3;
+   vcpu-arch.mmu.get_cr3   = nested_svm_get_tdp_cr3;
+   vcpu-arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
+   vcpu-arch.mmu.shadow_root_level = get_npt_level();
+   vcpu-arch.walk_mmu  = vcpu-arch.nested_mmu;
+
+   return r;
+}
+
+static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
+{
+   vcpu-arch.walk_mmu = vcpu-arch.mmu;
+}
+
 static int nested_svm_check_permissions(struct vcpu_svm *svm)
 {
if (!(svm-vcpu.arch.efer  EFER_SVME)
@@ -1998,6 +2027,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_clear_exception_queue(svm-vcpu);
kvm_clear_interrupt_queue(svm-vcpu);
 
+   svm-nested.nested_cr3 = 0;
+
/* Restore selected save entries */
svm-vmcb-save.es = hsave-save.es;
svm-vmcb-save.cs = hsave-save.cs;
@@ -2024,6 +2055,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 
nested_svm_unmap(page);
 
+   nested_svm_uninit_mmu_context(svm-vcpu);
kvm_mmu_reset_context(svm-vcpu);
kvm_mmu_load(svm-vcpu);
 
@@ -2071,6 +2103,9 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
if (vmcb-control.asid == 0)
return false;
 
+   if (vmcb-control.nested_ctl  !npt_enabled)
+   return false;
+
return true;
 }
 
@@ -2143,6 +2178,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
else
svm-vcpu.arch.hflags = ~HF_HIF_MASK;
 
+   if (nested_vmcb-control.nested_ctl) {
+   kvm_mmu_unload(svm-vcpu);
+   svm-nested.nested_cr3 = nested_vmcb-control.nested_cr3;
+   nested_svm_init_mmu_context(svm-vcpu);
+   }
+
/* Load the nested guest state */
svm-vmcb-save.es = nested_vmcb-save.es;
svm-vmcb-save.cs = nested_vmcb-save.cs;
@@ -3410,15 +3451,6 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
 }
 
-static int get_npt_level(void)
-{
-#ifdef CONFIG_X86_64
-   return PT64_ROOT_LEVEL;
-#else
-   return PT32E_ROOT_LEVEL;
-#endif
-}
-
 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
return 0;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Add kvm_mmu parameter to load_pdptrs function

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This function need to be able to load the pdptrs from any
mmu context currently in use. So change this function to
take an kvm_mmu parameter to fit these needs.
As a side effect this patch also moves the cached pdptrs
from vcpu_arch into the kvm_mmu struct.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 61d94cd..ac95c6f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -257,6 +257,8 @@ struct kvm_mmu {
 
u64 *pae_root;
u64 rsvd_bits_mask[2][4];
+
+   u64 pdptrs[4]; /* pae */
 };
 
 struct kvm_vcpu_arch {
@@ -276,7 +278,6 @@ struct kvm_vcpu_arch {
unsigned long cr4_guest_owned_bits;
unsigned long cr8;
u32 hflags;
-   u64 pdptrs[4]; /* pae */
u64 efer;
u64 apic_base;
struct kvm_lapic *apic;/* kernel irqchip context */
@@ -592,7 +593,7 @@ void kvm_mmu_zap_all(struct kvm *kvm);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
 
-int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
+int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
 
 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
  const void *val, int bytes);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 6491ac8..a37abe2 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -42,7 +42,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int 
index)
  (unsigned long *)vcpu-arch.regs_avail))
kvm_x86_ops-cache_reg(vcpu, VCPU_EXREG_PDPTR);
 
-   return vcpu-arch.pdptrs[index];
+   return vcpu-arch.walk_mmu-pdptrs[index];
 }
 
 static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 094df31..a98ac52 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1010,7 +1010,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum 
kvm_reg reg)
switch (reg) {
case VCPU_EXREG_PDPTR:
BUG_ON(!npt_enabled);
-   load_pdptrs(vcpu, vcpu-arch.cr3);
+   load_pdptrs(vcpu, vcpu-arch.walk_mmu, vcpu-arch.cr3);
break;
default:
BUG();
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0e62d8a..0a70194 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1848,20 +1848,20 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
return;
 
if (is_paging(vcpu)  is_pae(vcpu)  !is_long_mode(vcpu)) {
-   vmcs_write64(GUEST_PDPTR0, vcpu-arch.pdptrs[0]);
-   vmcs_write64(GUEST_PDPTR1, vcpu-arch.pdptrs[1]);
-   vmcs_write64(GUEST_PDPTR2, vcpu-arch.pdptrs[2]);
-   vmcs_write64(GUEST_PDPTR3, vcpu-arch.pdptrs[3]);
+   vmcs_write64(GUEST_PDPTR0, vcpu-arch.mmu.pdptrs[0]);
+   vmcs_write64(GUEST_PDPTR1, vcpu-arch.mmu.pdptrs[1]);
+   vmcs_write64(GUEST_PDPTR2, vcpu-arch.mmu.pdptrs[2]);
+   vmcs_write64(GUEST_PDPTR3, vcpu-arch.mmu.pdptrs[3]);
}
 }
 
 static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 {
if (is_paging(vcpu)  is_pae(vcpu)  !is_long_mode(vcpu)) {
-   vcpu-arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
-   vcpu-arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
-   vcpu-arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
-   vcpu-arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
+   vcpu-arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+   vcpu-arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+   vcpu-arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+   vcpu-arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
}
 
__set_bit(VCPU_EXREG_PDPTR,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3101060..bbd9f4a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -418,17 +418,17 @@ int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, 
gfn_t gfn,
 /*
  * Load the pae pdptrs.  Return true is they are all valid.
  */
-int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
+int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 {
gfn_t pdpt_gfn = cr3  PAGE_SHIFT;
unsigned offset = ((cr3  (PAGE_SIZE-1))  5)  2;
int i;
int ret;
-   u64 pdpte[ARRAY_SIZE(vcpu-arch.pdptrs)];
+   u64 pdpte[ARRAY_SIZE(mmu-pdptrs)];
 
-   ret = kvm_read_nested_guest_page(vcpu, pdpt_gfn, pdpte,
-offset * sizeof(u64), sizeof(pdpte),
-PFERR_USER_MASK|PFERR_WRITE_MASK);
+   ret = kvm_read_guest_page_mmu(vcpu,

[COMMIT master] KVM: MMU: Track NX state in struct kvm_mmu

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

With Nested Paging emulation the NX state between the two
MMU contexts may differ. To make sure that always the right
fault error code is recorded this patch moves the NX state
into struct kvm_mmu so that the code can distinguish between
L1 and L2 NX state.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 88d6c84..3a00741 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -259,6 +259,8 @@ struct kvm_mmu {
u64 *lm_root;
u64 rsvd_bits_mask[2][4];
 
+   bool nx;
+
u64 pdptrs[4]; /* pae */
 };
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dd76765..95cbeed 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2634,6 +2634,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu,
context-shadow_root_level = PT32E_ROOT_LEVEL;
context-root_hpa = INVALID_PAGE;
context-direct_map = true;
+   context-nx = false;
return 0;
 }
 
@@ -2687,7 +2688,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
int maxphyaddr = cpuid_maxphyaddr(vcpu);
u64 exb_bit_rsvd = 0;
 
-   if (!is_nx(vcpu))
+   if (!context-nx)
exb_bit_rsvd = rsvd_bits(63, 63);
switch (level) {
case PT32_ROOT_LEVEL:
@@ -2746,6 +2747,8 @@ static int paging64_init_context_common(struct kvm_vcpu 
*vcpu,
struct kvm_mmu *context,
int level)
 {
+   context-nx = is_nx(vcpu);
+
reset_rsvds_bits_mask(vcpu, context, level);
 
ASSERT(is_pae(vcpu));
@@ -2772,6 +2775,8 @@ static int paging64_init_context(struct kvm_vcpu *vcpu,
 static int paging32_init_context(struct kvm_vcpu *vcpu,
 struct kvm_mmu *context)
 {
+   context-nx = false;
+
reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL);
 
context-new_cr3 = paging_new_cr3;
@@ -2810,19 +2815,24 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context-set_cr3 = kvm_x86_ops-set_tdp_cr3;
context-get_cr3 = get_cr3;
context-inject_page_fault = kvm_inject_page_fault;
+   context-nx = is_nx(vcpu);
 
if (!is_paging(vcpu)) {
+   context-nx = false;
context-gva_to_gpa = nonpaging_gva_to_gpa;
context-root_level = 0;
} else if (is_long_mode(vcpu)) {
+   context-nx = is_nx(vcpu);
reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL);
context-gva_to_gpa = paging64_gva_to_gpa;
context-root_level = PT64_ROOT_LEVEL;
} else if (is_pae(vcpu)) {
+   context-nx = is_nx(vcpu);
reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL);
context-gva_to_gpa = paging64_gva_to_gpa;
context-root_level = PT32E_ROOT_LEVEL;
} else {
+   context-nx = false;
reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL);
context-gva_to_gpa = paging32_gva_to_gpa;
context-root_level = PT32_ROOT_LEVEL;
@@ -2878,17 +2888,21 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 * functions between mmu and nested_mmu are swapped.
 */
if (!is_paging(vcpu)) {
+   g_context-nx = false;
g_context-root_level = 0;
g_context-gva_to_gpa = nonpaging_gva_to_gpa_nested;
} else if (is_long_mode(vcpu)) {
+   g_context-nx = is_nx(vcpu);
reset_rsvds_bits_mask(vcpu, g_context, PT64_ROOT_LEVEL);
g_context-root_level = PT64_ROOT_LEVEL;
g_context-gva_to_gpa = paging64_gva_to_gpa_nested;
} else if (is_pae(vcpu)) {
+   g_context-nx = is_nx(vcpu);
reset_rsvds_bits_mask(vcpu, g_context, PT32E_ROOT_LEVEL);
g_context-root_level = PT32E_ROOT_LEVEL;
g_context-gva_to_gpa = paging64_gva_to_gpa_nested;
} else {
+   g_context-nx = false;
reset_rsvds_bits_mask(vcpu, g_context, PT32_ROOT_LEVEL);
g_context-root_level = PT32_ROOT_LEVEL;
g_context-gva_to_gpa = paging32_gva_to_gpa_nested;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a28f09b..2bdd843 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -105,7 +105,7 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, 
pt_element_t gpte)
 
access = (gpte  (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
 #if PTTYPE == 64
-   if (is_nx(vcpu))
+   if (vcpu-arch.mmu.nx)
access = ~(gpte  PT64_NX_SHIFT);
 #endif
return access;
@@ -272,7 +272,7 @@ error:
walker-error_code |=

[COMMIT master] KVM: MMU: Fix regression with ept memory types merged into non-ept page tables

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Commit KVM: MMU: Make tdp_enabled a mmu-context parameter made real-mode
set -direct_map, and changed the code that merges in the memory type depend
on direct_map instead of tdp_enabled.  However, in this case what really
matters is tdp, not direct_map, since tdp changes the pte format regardless
of whether the mapping is direct or not.

As a result, real-mode shadow mappings got corrupted with ept memory types.
The result was a huge slowdown, likely due to the cache being disabled.

Change it back as the simplest fix for the regression (real fix is to move
all that to vmx code, and not use tdp_enabled as a synonym for ept).

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6e248d8..3ce56bf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1980,7 +1980,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= shadow_user_mask;
if (level  PT_PAGE_TABLE_LEVEL)
spte |= PT_PAGE_SIZE_MASK;
-   if (vcpu-arch.mmu.direct_map)
+   if (tdp_enabled)
spte |= kvm_x86_ops-get_mt_mask(vcpu, gfn,
kvm_is_mmio_pfn(pfn));
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: X86: Propagate fetch faults

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

KVM currently ignores fetch faults in the instruction
emulator. With nested-npt we could have such faults. This
patch adds the code to handle these.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2b08b78..aead72e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1198,6 +1198,9 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
*(unsigned long *)dest =
(ctxt-eflags  ~change_mask) | (val  change_mask);
 
+   if (rc == X86EMUL_PROPAGATE_FAULT)
+   emulate_pf(ctxt);
+
return rc;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0281d92..3101060 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4247,6 +4247,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
vcpu-arch.emulate_ctxt.perm_ok = false;
 
r = x86_decode_insn(vcpu-arch.emulate_ctxt);
+   if (r == X86EMUL_PROPAGATE_FAULT)
+   goto done;
+
trace_kvm_emulate_insn_start(vcpu);
 
/* Only allow emulation of specific instructions on #UD
@@ -4305,6 +4308,7 @@ restart:
return handle_emulation_failure(vcpu);
}
 
+done:
if (vcpu-arch.emulate_ctxt.exception = 0) {
inject_emulated_exception(vcpu);
r = EMULATE_DONE;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Add infrastructure for two-level page walker

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch introduces a mmu-callback to translate gpa
addresses in the walk_addr code. This is later used to
translate l2_gpa addresses into l1_gpa addresses.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 235023e..7f95260 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -243,6 +243,7 @@ struct kvm_mmu {
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
u32 *error);
+   gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
void (*prefetch_page)(struct kvm_vcpu *vcpu,
  struct kvm_mmu_page *page);
int (*sync_page)(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 48b74d2..2364c2c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3448,6 +3448,11 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
kvm_x86_ops-get_segment(vcpu, var, seg);
 }
 
+static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+{
+   return gpa;
+}
+
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
 {
u32 access = (kvm_x86_ops-get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
@@ -5659,6 +5664,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
vcpu-arch.emulate_ctxt.ops = emulate_ops;
vcpu-arch.mmu.root_hpa = INVALID_PAGE;
+   vcpu-arch.mmu.translate_gpa = translate_gpa;
if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
vcpu-arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f2ecdd5..917e68f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -534,6 +534,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
return (gpa_t)gfn  PAGE_SHIFT;
 }
 
+static inline gfn_t gpa_to_gfn(gpa_t gpa)
+{
+   return (gfn_t)(gpa  PAGE_SHIFT);
+}
+
 static inline hpa_t pfn_to_hpa(pfn_t pfn)
 {
return (hpa_t)pfn  PAGE_SHIFT;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Document that KVM_GET_SUPPORTED_CPUID may return emulated values

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 24d6341..b336266 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -1042,8 +1042,9 @@ number is just right, the 'nent' field is adjusted to the 
number of valid
 entries in the 'entries' array, which is then filled.
 
 The entries returned are the host cpuid as returned by the cpuid instruction,
-with unknown or unsupported features masked out.  The fields in each entry
-are defined as follows:
+with unknown or unsupported features masked out.  Some features (for example,
+x2apic), may not be present in the host cpu, but are exposed by kvm if it can
+emulate them efficiently. The fields in each entry are defined as follows:
 
   function: the eax value used to obtain the entry
   index: the ecx value used to obtain the entry (for entries that are
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: SVM: Report Nested Paging support to userspace

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch implements the reporting of the nested paging
feature support to userspace.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ede95e0..678602e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3476,6 +3476,10 @@ static void svm_set_supported_cpuid(u32 func, struct 
kvm_cpuid_entry2 *entry)
if (svm_has(SVM_FEATURE_NRIP))
entry-edx |= SVM_FEATURE_NRIP;
 
+   /* Support NPT for the guest if enabled */
+   if (npt_enabled)
+   entry-edx |= SVM_FEATURE_NPT;
+
break;
}
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] Merge branch 'master' of ssh://master.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into next

2010-09-13 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

* 'master' of 
ssh://master.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6: (407 
commits)
  PM / Hibernate: Avoid hitting OOM during preallocation of memory
  x86, tsc: Fix a preemption leak in restore_sched_clock_state()
  x86, tsc: Fix a preemption leak in restore_sched_clock_state()
  PM QoS: Correct pr_debug() misuse and improve parameter checks
  xfs: log IO completion workqueue is a high priority queue
  execve: make responsive to SIGKILL with large arguments
  execve: improve interactivity with large arguments
  setup_arg_pages: diagnose excessive argument size
  KEYS: Fix bug in keyctl_session_to_parent() if parent has no session keyring
  KEYS: Fix RCU no-lock warning in keyctl_session_to_parent()
  drm/i915: don't enable self-refresh on Ironlake
  xfs: prevent reading uninitialized stack memory
  AT91: at91sam9261ek: remove C99 comments but keep information
  AT91: at91sam9261ek board: remove warnings related to use of SPI or SD/MMC
  AT91: dm9000 initialization update
  block: Range check cpu in blk_cpu_to_group
  ipheth: remove incorrect devtype to WWAN
  MAINTAINERS: Add CAIF
  tracing: t_start: reset FTRACE_ITER_HASH in case of seek/pread
  libata-sff: Reenable Port Multiplier after libata-sff remodeling.
  ...

Conflicts:
arch/x86/include/asm/kvm_emulate.h
virt/kvm/kvm_main.c

Signed-off-by: Avi Kivity a...@redhat.com
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: X86: Report SVM bit to userspace only when supported

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch fixes a bug in KVM where it _always_ reports the
support of the SVM feature to userspace. But KVM only
supports SVM on AMD hardware and only when it is enabled in
the kernel module. This patch fixes the wrong reporting.

Cc: sta...@kernel.org
Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 678602e..eeb08d6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3464,6 +3464,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 {
switch (func) {
+   case 0x8001:
+   if (nested)
+   entry-ecx |= (1  2); /* Set SVM bit */
+   break;
case 0x800A:
entry-eax = 1; /* SVM revision 1 */
entry-ebx = 8; /* Lets support 8 ASIDs in case we add proper
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bbd9f4a..3ff0a8f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2209,7 +2209,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, 
u32 function,
0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX);
/* cpuid 0x8001.ecx */
const u32 kvm_supported_word6_x86_features =
-   F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
+   F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
0 /* SKINIT */ | 0 /* WDT */;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Propagate the right fault back to the guest after gva_to_gpa

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch implements logic to make sure that either a
page-fault/page-fault-vmexit or a nested-page-fault-vmexit
is propagated back to the guest.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5d9e0bb..61d94cd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -660,6 +660,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu);
 int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gfn_t gfn, void *data, int offset, int len,
u32 access);
+void kvm_propagate_fault(struct kvm_vcpu *vcpu);
 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
 
 int kvm_pic_set_irq(void *opaque, int irq, int level);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e4c76bf..0281d92 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -338,6 +338,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu)
kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
 }
 
+void kvm_propagate_fault(struct kvm_vcpu *vcpu)
+{
+   u32 nested, error;
+
+   error   = vcpu-arch.fault.error_code;
+   nested  = error   PFERR_NESTED_MASK;
+   error   = error  ~PFERR_NESTED_MASK;
+
+   vcpu-arch.fault.error_code = error;
+
+   if (mmu_is_nested(vcpu)  !nested)
+   vcpu-arch.nested_mmu.inject_page_fault(vcpu);
+   else
+   vcpu-arch.mmu.inject_page_fault(vcpu);
+}
+
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
 {
vcpu-arch.nmi_pending = 1;
@@ -4140,7 +4156,7 @@ static void inject_emulated_exception(struct kvm_vcpu 
*vcpu)
 {
struct x86_emulate_ctxt *ctxt = vcpu-arch.emulate_ctxt;
if (ctxt-exception == PF_VECTOR)
-   kvm_inject_page_fault(vcpu);
+   kvm_propagate_fault(vcpu);
else if (ctxt-error_code_valid)
kvm_queue_exception_e(vcpu, ctxt-exception, ctxt-error_code);
else
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Allow long mode shadows for legacy page tables

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

Currently the KVM softmmu implementation can not shadow a 32
bit legacy or PAE page table with a long mode page table.
This is a required feature for nested paging emulation
because the nested page table must alway be in host format.
So this patch implements the missing pieces to allow long
mode page tables for page table types.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ac95c6f..88d6c84 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -256,6 +256,7 @@ struct kvm_mmu {
bool direct_map;
 
u64 *pae_root;
+   u64 *lm_root;
u64 rsvd_bits_mask[2][4];
 
u64 pdptrs[4]; /* pae */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9cd5a71..dd76765 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1504,6 +1504,12 @@ static void shadow_walk_init(struct 
kvm_shadow_walk_iterator *iterator,
iterator-addr = addr;
iterator-shadow_addr = vcpu-arch.mmu.root_hpa;
iterator-level = vcpu-arch.mmu.shadow_root_level;
+
+   if (iterator-level == PT64_ROOT_LEVEL 
+   vcpu-arch.mmu.root_level  PT64_ROOT_LEVEL 
+   !vcpu-arch.mmu.direct_map)
+   --iterator-level;
+
if (iterator-level == PT32E_ROOT_LEVEL) {
iterator-shadow_addr
= vcpu-arch.mmu.pae_root[(addr  30)  3];
@@ -2314,7 +2320,9 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
if (!VALID_PAGE(vcpu-arch.mmu.root_hpa))
return;
spin_lock(vcpu-kvm-mmu_lock);
-   if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+   if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL 
+   (vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL ||
+vcpu-arch.mmu.direct_map)) {
hpa_t root = vcpu-arch.mmu.root_hpa;
 
sp = page_header(root);
@@ -2394,10 +2402,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
 static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 {
-   int i;
-   gfn_t root_gfn;
struct kvm_mmu_page *sp;
-   u64 pdptr;
+   u64 pdptr, pm_mask;
+   gfn_t root_gfn;
+   int i;
 
root_gfn = vcpu-arch.mmu.get_cr3(vcpu)  PAGE_SHIFT;
 
@@ -2426,8 +2434,13 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
/*
 * We shadow a 32 bit page table. This may be a legacy 2-level
-* or a PAE 3-level page table.
+* or a PAE 3-level page table. In either case we need to be aware that
+* the shadow page table may be a PAE or a long mode page table.
 */
+   pm_mask = PT_PRESENT_MASK;
+   if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL)
+   pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
+
for (i = 0; i  4; ++i) {
hpa_t root = vcpu-arch.mmu.pae_root[i];
 
@@ -2451,9 +2464,35 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
++sp-root_count;
spin_unlock(vcpu-kvm-mmu_lock);
 
-   vcpu-arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+   vcpu-arch.mmu.pae_root[i] = root | pm_mask;
+   vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root);
}
-   vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root);
+
+   /*
+* If we shadow a 32 bit page table with a long mode page
+* table we enter this path.
+*/
+   if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+   if (vcpu-arch.mmu.lm_root == NULL) {
+   /*
+* The additional page necessary for this is only
+* allocated on demand.
+*/
+
+   u64 *lm_root;
+
+   lm_root = (void*)get_zeroed_page(GFP_KERNEL);
+   if (lm_root == NULL)
+   return 1;
+
+   lm_root[0] = __pa(vcpu-arch.mmu.pae_root) | pm_mask;
+
+   vcpu-arch.mmu.lm_root = lm_root;
+   }
+
+   vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.lm_root);
+   }
+
return 0;
 }
 
@@ -2470,9 +2509,12 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
int i;
struct kvm_mmu_page *sp;
 
+   if (vcpu-arch.mmu.direct_map)
+   return;
+
if (!VALID_PAGE(vcpu-arch.mmu.root_hpa))
return;
-   if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+   if (vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu-arch.mmu.root_hpa;
sp = page_header(root);
mmu_sync_children(vcpu, sp);
@@ -3253,6 +3295,8 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
 static void free_mmu_pages(struct kvm_vcpu

[COMMIT master] KVM: X86: Add kvm_read_guest_page_mmu function

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch adds a function which can read from the guests
physical memory or from the guest's guest physical memory.
This will be used in the two-dimensional page table walker.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 10a5ddd..5d9e0bb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -657,6 +657,9 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned 
nr, u32 error_code);
 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 
error_code);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu);
+int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+   gfn_t gfn, void *data, int offset, int len,
+   u32 access);
 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
 
 int kvm_pic_set_irq(void *opaque, int irq, int level);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4196fc7..a2efb70 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -370,6 +370,29 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int 
required_cpl)
 EXPORT_SYMBOL_GPL(kvm_require_cpl);
 
 /*
+ * This function will be used to read from the physical memory of the currently
+ * running guest. The difference to kvm_read_guest_page is that this function
+ * can read from guest physical or from the guest's guest physical memory.
+ */
+int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+   gfn_t ngfn, void *data, int offset, int len,
+   u32 access)
+{
+   gfn_t real_gfn;
+   gpa_t ngpa;
+
+   ngpa = gfn_to_gpa(ngfn);
+   real_gfn = mmu-translate_gpa(vcpu, ngpa, access);
+   if (real_gfn == UNMAPPED_GVA)
+   return -EFAULT;
+
+   real_gfn = gpa_to_gfn(real_gfn);
+
+   return kvm_read_guest_page(vcpu-kvm, real_gfn, data, offset, len);
+}
+EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
+
+/*
  * Load the pae pdptrs.  Return true is they are all valid.
  */
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: SVM: Implement MMU helper functions for Nested Nested Paging

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch adds the helper functions which will be used in
the mmu context for handling nested nested page faults.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a98ac52..a483aa9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -104,6 +104,8 @@ struct nested_state {
u32 intercept_exceptions;
u64 intercept;
 
+   /* Nested Paging related state */
+   u64 nested_cr3;
 };
 
 #define MSRPM_OFFSETS  16
@@ -1600,6 +1602,34 @@ static int vmmcall_interception(struct vcpu_svm *svm)
return 1;
 }
 
+static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   return svm-nested.nested_cr3;
+}
+
+static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
+  unsigned long root)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   svm-vmcb-control.nested_cr3 = root;
+   force_new_asid(vcpu);
+}
+
+static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   svm-vmcb-control.exit_code = SVM_EXIT_NPF;
+   svm-vmcb-control.exit_code_hi = 0;
+   svm-vmcb-control.exit_info_1 = vcpu-arch.fault.error_code;
+   svm-vmcb-control.exit_info_2 = vcpu-arch.fault.address;
+
+   nested_svm_vmexit(svm);
+}
+
 static int nested_svm_check_permissions(struct vcpu_svm *svm)
 {
if (!(svm-vcpu.arch.efer  EFER_SVME)
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Refactor mmu_alloc_roots function

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This patch factors out the direct-mapping paths of the
mmu_alloc_roots function into a seperate function. This
makes it a lot easier to avoid all the unnecessary checks
done in the shadow path which may break when running direct.
In fact, this patch already fixes a problem when running PAE
guests on a PAE shadow page table.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a25173a..9cd5a71 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2357,42 +2357,77 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t 
root_gfn)
return ret;
 }
 
-static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
+static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
+{
+   struct kvm_mmu_page *sp;
+   int i;
+
+   if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+   spin_lock(vcpu-kvm-mmu_lock);
+   kvm_mmu_free_some_pages(vcpu);
+   sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
+ 1, ACC_ALL, NULL);
+   ++sp-root_count;
+   spin_unlock(vcpu-kvm-mmu_lock);
+   vcpu-arch.mmu.root_hpa = __pa(sp-spt);
+   } else if (vcpu-arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) {
+   for (i = 0; i  4; ++i) {
+   hpa_t root = vcpu-arch.mmu.pae_root[i];
+
+   ASSERT(!VALID_PAGE(root));
+   spin_lock(vcpu-kvm-mmu_lock);
+   kvm_mmu_free_some_pages(vcpu);
+   sp = kvm_mmu_get_page(vcpu, i  30, i  30,
+ PT32_ROOT_LEVEL, 1, ACC_ALL,
+ NULL);
+   root = __pa(sp-spt);
+   ++sp-root_count;
+   spin_unlock(vcpu-kvm-mmu_lock);
+   vcpu-arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+   vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root);
+   }
+   } else
+   BUG();
+
+   return 0;
+}
+
+static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 {
int i;
gfn_t root_gfn;
struct kvm_mmu_page *sp;
-   int direct = 0;
u64 pdptr;
 
root_gfn = vcpu-arch.mmu.get_cr3(vcpu)  PAGE_SHIFT;
 
-   if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+   if (mmu_check_root(vcpu, root_gfn))
+   return 1;
+
+   /*
+* Do we shadow a long mode page table? If so we need to
+* write-protect the guests page table root.
+*/
+   if (vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu-arch.mmu.root_hpa;
 
ASSERT(!VALID_PAGE(root));
-   if (mmu_check_root(vcpu, root_gfn))
-   return 1;
-   if (vcpu-arch.mmu.direct_map) {
-   direct = 1;
-   root_gfn = 0;
-   }
+
spin_lock(vcpu-kvm-mmu_lock);
kvm_mmu_free_some_pages(vcpu);
-   sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, direct,
- ACC_ALL, NULL);
+   sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
+ 0, ACC_ALL, NULL);
root = __pa(sp-spt);
++sp-root_count;
spin_unlock(vcpu-kvm-mmu_lock);
vcpu-arch.mmu.root_hpa = root;
return 0;
}
-   direct = !is_paging(vcpu);
-
-   if (mmu_check_root(vcpu, root_gfn))
-   return 1;
 
+   /*
+* We shadow a 32 bit page table. This may be a legacy 2-level
+* or a PAE 3-level page table.
+*/
for (i = 0; i  4; ++i) {
hpa_t root = vcpu-arch.mmu.pae_root[i];
 
@@ -2406,16 +2441,11 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
root_gfn = pdptr  PAGE_SHIFT;
if (mmu_check_root(vcpu, root_gfn))
return 1;
-   } else if (vcpu-arch.mmu.root_level == 0)
-   root_gfn = 0;
-   if (vcpu-arch.mmu.direct_map) {
-   direct = 1;
-   root_gfn = i  30;
}
spin_lock(vcpu-kvm-mmu_lock);
kvm_mmu_free_some_pages(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, i  30,
- PT32_ROOT_LEVEL, direct,
+ PT32_ROOT_LEVEL, 0,
  ACC_ALL, NULL);
root = __pa(sp-spt);
++sp-root_count;
@@ -2427,6 +2457,14 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
return 0;

[COMMIT master] KVM: MMU: Introduce kvm_pdptr_read_mmu

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This function is implemented to load the pdptr pointers of
the currently running guest (l1 or l2 guest). Therefore it
takes care about the current paging mode and can read pdptrs
out of l2 guest physical memory.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index a37abe2..975bb45 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -45,6 +45,13 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int 
index)
return vcpu-arch.walk_mmu-pdptrs[index];
 }
 
+static inline u64 kvm_pdptr_read_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu 
*mmu, int index)
+{
+   load_pdptrs(vcpu, mmu, mmu-get_cr3(vcpu));
+
+   return mmu-pdptrs[index];
+}
+
 static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
 {
ulong tmask = mask  KVM_POSSIBLE_CR0_GUEST_BITS;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a26f13b..a25173a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2398,7 +2398,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 
ASSERT(!VALID_PAGE(root));
if (vcpu-arch.mmu.root_level == PT32E_ROOT_LEVEL) {
-   pdptr = kvm_pdptr_read(vcpu, i);
+   pdptr = kvm_pdptr_read_mmu(vcpu, vcpu-arch.mmu, i);
if (!is_present_gpte(pdptr)) {
vcpu-arch.mmu.pae_root[i] = 0;
continue;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index f4e09d3..a28f09b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -137,7 +137,7 @@ walk:
 
 #if PTTYPE == 64
if (walker-level == PT32E_ROOT_LEVEL) {
-   pte = kvm_pdptr_read(vcpu, (addr  30)  3);
+   pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr  30)  3);
trace_kvm_mmu_paging_element(pte, walker-level);
if (!is_present_gpte(pte)) {
present = false;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: MMU: Introduce get_cr3 function pointer

2010-09-13 Thread Avi Kivity

From: Joerg Roedel joerg.roe...@amd.com

This function pointer in the MMU context is required to
implement Nested Nested Paging.

Signed-off-by: Joerg Roedel joerg.roe...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aeeea9c..ab708ee 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -237,6 +237,7 @@ struct kvm_pio_request {
 struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
+   unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a55f8d5..e4a7de4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2365,7 +2365,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
int direct = 0;
u64 pdptr;
 
-   root_gfn = vcpu-arch.cr3  PAGE_SHIFT;
+   root_gfn = vcpu-arch.mmu.get_cr3(vcpu)  PAGE_SHIFT;
 
if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu-arch.mmu.root_hpa;
@@ -2562,6 +2562,11 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu)
mmu_free_roots(vcpu);
 }
 
+static unsigned long get_cr3(struct kvm_vcpu *vcpu)
+{
+   return vcpu-arch.cr3;
+}
+
 static void inject_page_fault(struct kvm_vcpu *vcpu,
  u64 addr,
  u32 err_code)
@@ -2715,6 +2720,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context-root_hpa = INVALID_PAGE;
context-direct_map = true;
context-set_cr3 = kvm_x86_ops-set_tdp_cr3;
+   context-get_cr3 = get_cr3;
 
if (!is_paging(vcpu)) {
context-gva_to_gpa = nonpaging_gva_to_gpa;
@@ -2755,6 +2761,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
vcpu-arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
vcpu-arch.mmu.base_role.cr0_wp  = is_write_protection(vcpu);
vcpu-arch.mmu.set_cr3   = kvm_x86_ops-set_cr3;
+   vcpu-arch.mmu.get_cr3   = get_cr3;
 
return r;
 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index e4ad3dc..13d0c06 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -130,7 +130,7 @@ walk:
present = true;
eperm = rsvd_fault = false;
walker-level = vcpu-arch.mmu.root_level;
-   pte = vcpu-arch.cr3;
+   pte = vcpu-arch.mmu.get_cr3(vcpu);
 #if PTTYPE == 64
if (vcpu-arch.mmu.root_level == PT32E_ROOT_LEVEL) {
pte = kvm_pdptr_read(vcpu, (addr  30)  3);
@@ -143,7 +143,7 @@ walk:
}
 #endif
ASSERT((!is_long_mode(vcpu)  is_pae(vcpu)) ||
-  (vcpu-arch.cr3  CR3_NONPAE_RESERVED_BITS) == 0);
+  (vcpu-arch.mmu.get_cr3(vcpu)  CR3_NONPAE_RESERVED_BITS) == 0);
 
pt_access = ACC_ALL;
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/3] S390: Add virtio hotplug add support

2010-09-13 Thread Martin Schwidefsky

On Mon, 13 Sep 2010 13:05:57 +0930
Rusty Russell ru...@rustcorp.com.au wrote:

 On Sun, 12 Sep 2010 06:30:43 pm Avi Kivity wrote:
On 09/12/2010 02:42 AM, Alexander Graf wrote:
   On 24.08.2010, at 15:48, Alexander Graf wrote:
  
   The one big missing feature in s390-virtio was hotplugging. This is no 
   more.
   This patch implements hotplug add support, so you can on the fly add new 
   devices
   in the guest.
  
   Keep in mind that this needs a patch for qemu to actually leverage the
   functionality.
  
   Signed-off-by: Alexander Grafag...@suse.de
   ping (on the patch set)?
  
  
  Actually Marcelo applied it.  But the natural place for it is Rusty's 
  virtio tree.  Rusty, if you want to take it, let me know and I'll drop 
  it from kvm.git.
 
 I thought it would be in the s390 tree, which is why I didn't take it...
 
 But I'm *always* happy to let do the work!

I didn't pick them up after I saw that Marcelo took them. If others want
to do the work, be my guest..

-- 
blue skies,
   Martin.

Reality continues to ruin my life. - Calvin.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 18/24] Exiting from L2 to L1

2010-09-13 Thread Avi Kivity


 On 09/12/2010 09:51 PM, Nadav Har'El wrote:



There are two cases where VM_ENTRY_INTR_INFO_FIELD can potentially not
be cleared by hardware:
...
If neither of these are valid, the code can be removed.  If only the
second, we might make it conditional.

Again, unless I'm misunderstanding what you mean, the hardware only
modified vmcs02 (the hardware vmcs), not vmcs12. We need to modify vmcs12
as well, to remove the valid bit. If we don't, when L1 enters into the same
L2 again, the same old value will be copied again from vmcs12 to vmcs02,
and cause an injection of the same interrupt again.


Yes, vmcs12 still needs to be updated.  So the code cannot be removed, 
just the vm



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 18/24] Exiting from L2 to L1

2010-09-13 Thread Avi Kivity


 On 09/13/2010 07:53 AM, Sheng Yang wrote:



What can happen is that the contents of the field is transferred to the
IDT_VECTORING_INFO field or VM_EXIT_INTR_INFO field.

(question: on a failed vmentry, is this field cleared?)

I don't know the answer :-)

Sheng?

According to SDM 23.7 VM-ENTRY FAILURES DURING OR AFTER LOADING
GUEST STATE:

Although this process resembles that of a VM exit, many steps taken during a VM
exit do not occur for these VM-entry failures:
• Most VM-exit information fields are not updated (see step 1 above).
• The valid bit in the VM-entry interruption-information field is *not* cleared.
• The guest-state area is not modified.
• No MSRs are saved into the VM-exit MSR-store area.

So VM entry failure would result in _keep_ valid bit of 
VM_ENTRY_INTR_INFO_FIELD.




Ok.  So if the exit was actually due to a failed vmentry, then we do 
need the vmread... (or alternatively, we can avoid clearing the field in 
the first place).


So the following options should work:

1.  vmcs12-vm_entry_intr_info_field = 
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
2.  if (!(exit_reason  FAILED_ENTRY)) vmcs12-vm_exit_intry_info_field 
= ~VALID;
3.  if (exit_reason  FAILED_ENTRY) vmcs12-vm_entry_intr_info_field = 
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 18/24] Exiting from L2 to L1

2010-09-13 Thread Nadav Har'El

On Mon, Sep 13, 2010, Avi Kivity wrote about Re: [PATCH 18/24] Exiting from L2 
to L1:
 So the following options should work:
 
 1.  vmcs12-vm_entry_intr_info_field = 
 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);

Right, this was the original code in the patch.

 2.  if (!(exit_reason  FAILED_ENTRY)) vmcs12-vm_exit_intry_info_field 
 = ~VALID;

I now prefer this code. It doesn't do vmread (but replaces it with a bunch of
extra instructions - which might be even slower overall...).

But the more interesting thing is that it doesn't copy irrelevant bits from
vmcs02 to vmcs12, bits that might not have been set by L1 but rather by L0
which previously injected an interrupt into the same L2. These bits shouldn't
matter (when !valid), but a nosy L1 might notice them...

 3.  if (exit_reason  FAILED_ENTRY) vmcs12-vm_entry_intr_info_field = 
 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);

I think you meant the opposite condition?

if (!(exit_reason  FAILED_ENTRY)) vmcs12-vm_entry_intr_info_field = 
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);

-- 
Nadav Har'El|   Monday, Sep 13 2010, 5 Tishri 5771
n...@math.technion.ac.il |-
Phone +972-523-790466, ICQ 13349191 |Always borrow money from pessimists. They
http://nadav.harel.org.il   |don't expect to be paid back.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH 1/4] Add a new API to virtio-pci

2010-09-13 Thread Michael S. Tsirkin

On Mon, Sep 13, 2010 at 09:50:42AM +0530, Krishna Kumar2 wrote:
 Michael S. Tsirkin m...@redhat.com wrote on 09/12/2010 05:16:37 PM:
 
  Michael S. Tsirkin m...@redhat.com
  09/12/2010 05:16 PM
 
  On Thu, Sep 09, 2010 at 07:19:33PM +0530, Krishna Kumar2 wrote:
   Unfortunately I need a
   constant in vhost for now.
 
  Maybe not even that: you create multiple vhost-net
  devices so vhost-net in kernel does not care about these
  either, right? So this can be just part of vhost_net.h
  in qemu.
 
 Sorry, I didn't understand what you meant.
 
 I can remove all socks[] arrays/constants by pre-allocating
 sockets in vhost_setup_vqs. Then I can remove all socks
 parameters in vhost_net_stop, vhost_net_release and
 vhost_net_reset_owner.
 
 Does this make sense?
 
 Thanks,
 
 - KK

Here's what I mean: each vhost device includes 1 TX
and 1 RX VQ. Instead of teaching vhost about multiqueue,
we could simply open /dev/vhost-net multiple times.
How many times would be up to qemu.

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Autotest] [KVM-AUTOTEST] Patch to install cygwin and run autotest in windows

2010-09-13 Thread yogi

Hello Lucas,

I like to submit patch to do unattended install of cygwin and run
autotest test case on Windows guests using cygwin. 
Cygwin enable running some of test like stress, lmbench, bonnie,
compilebench and netperf directly under windows, but most of the tests
need to be patched before they could run on Windows. So this patch
introduces a new parameter 'test_patch_file' in tests_config  to
indicate the patch file. Two new files are added related to patching
autotest, 'autotest.patch' which is like base patch for autotest, needed
to run autotest in windows and 'iozone.patch' which to used run iozone
on windows. Similar patches could be developed for test cases like
'dacapo'  and sysbench which would enable running tests related to java
and  mysql/postgresql, i will be happy to write patches for them too.

This patch also adds two new files 'scripts/cyg_install.py' and
'tests/cyg_install'. 'scripts/cyg_install.py' is used to do unattended
install on cygwin. The parameter 'cyg_path' indicates the path of the
setup file. Installation can be done either local, using
'cyg_install.local' or remotly,using cyg_install.remote'.
Local installation requires both the cygwin setup and packages files to
be present locally at the path indicated by  'cyg_path'. The parameter
'cyg_param' is used to pass the command line options for cygwin setup.
So it can contain options for packages to be installed, proxy server to
be used to do remote installation and so on. An iso image cyg.iso
containing the setup and packages in case of local install and just the
setup in case of remote install, is created on the fly. Installation is
started using telnet.

This patch acts as complement to the existing support for running tests
using Autoit.I will be happy to re-implement or modify the patch based
to your comments and also write patch for other test cases like
sysbench, dacapo.

From 745d87681a33cc14431dc1b6b35cd977112b0fee Mon Sep 17 00:00:00 2001
From: Yogananth Subramanian anant...@linux.vnet.ibm.com
Date: Fri, 10 Sep 2010 20:40:16 +0530
Subject: [PATCH] This patch enables installing cygwin and running
autotest in windows guests

The patch creates two new files 'scripts/cyg_install.py' and
'tests/cyg_install'. 'scripts/cyg_install.py' is used to install cygwin
in
unattended mode. This patch also introduces a new parameter
'patch_file'to
run_autotest() in kvm_test_utils.py file, to install patch for autotest
to
run in windows. The file 'autotest_control/autotest.patch' is base patch
to
run any autotest test cases in windows and file
'autotest_control/iozone.patch'
is a patch to run iozone on windows.

Signed-off-by: Yogananth Subramanian anant...@linux.vnet.ibm.com
---
 client/tests/kvm/autotest_control/autotest.patch |   15 
 client/tests/kvm/autotest_control/iozone.patch   |   12 +++
 client/tests/kvm/kvm_test_utils.py   |   10 ++-
 client/tests/kvm/scripts/cyg_install.py  |   98
++
 client/tests/kvm/tests/autotest.py   |   21 -
 client/tests/kvm/tests/cyg_install.py|   36 
 client/tests/kvm/tests_base.cfg.sample   |   43 +-
 7 files changed, 231 insertions(+), 4 deletions(-)
 create mode 100644 client/tests/kvm/autotest_control/autotest.patch
 create mode 100644 client/tests/kvm/autotest_control/iozone.patch
 create mode 100755 client/tests/kvm/scripts/cyg_install.py
 create mode 100644 client/tests/kvm/tests/cyg_install.py

diff --git a/client/tests/kvm/autotest_control/autotest.patch
b/client/tests/kvm/autotest_control/autotest.patch
new file mode 100644
index 000..f562f95
--- /dev/null
+++ b/client/tests/kvm/autotest_control/autotest.patch
@@ -0,0 +1,15 @@
+diff -aurpN client/bin/job.py client-new/bin/job.py
+--- client/bin/job.py   2010-08-25 01:42:27.0 -0400
 client-new/bin/job.py   2010-09-07 09:54:30.0 -0400
+@@ -296,7 +296,10 @@ class base_client_job(base_job.base_job)
+
+ # extract console= and other args from cmdline and add them
into the
+ # base args that we use for all kernels we install
+-cmdline = utils.read_one_line('/proc/cmdline')
++if os.path.exists('/proc/cmdline'):
++cmdline = utils.read_one_line('/proc/cmdline')
++else:
++return
+ kernel_args = []
+ for karg in cmdline.split():
+ for param in copy_cmdline:
diff --git a/client/tests/kvm/autotest_control/iozone.patch
b/client/tests/kvm/autotest_control/iozone.patch
new file mode 100644
index 000..6229205
--- /dev/null
+++ b/client/tests/kvm/autotest_control/iozone.patch
@@ -0,0 +1,12 @@
+diff -aurpN client/tests/iozone/iozone.py
client-new/tests/iozone/iozone.py
+--- client/tests/iozone/iozone.py   2010-08-25 01:42:27.0
-0400
 client-new/tests/iozone/iozone.py   2010-09-02 11:38:42.0
-0400
+@@ -43,7 +43,7 @@ class iozone(test.test):
+ elif (arch == 'x86_64'):
+ utils.system('make linux-AMD64')
+

Re: [PATCH 18/24] Exiting from L2 to L1

2010-09-13 Thread Avi Kivity


 On 09/13/2010 11:01 AM, Nadav Har'El wrote:



3.  if (exit_reason  FAILED_ENTRY) vmcs12-vm_entry_intr_info_field =
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);

I think you meant the opposite condition?

if (!(exit_reason  FAILED_ENTRY)) vmcs12-vm_entry_intr_info_field =
vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);



Dunno, I think both are subtly broken.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/3] S390: Add virtio hotplug add support

2010-09-13 Thread Avi Kivity


 On 09/13/2010 09:41 AM, Martin Schwidefsky wrote:

Actually Marcelo applied it.  But the natural place for it is Rusty's
virtio tree.  Rusty, if you want to take it, let me know and I'll drop
it from kvm.git.

I thought it would be in the s390 tree, which is why I didn't take it...

But I'm *always* happy to let do the work!

I didn't pick them up after I saw that Marcelo took them. If others want
to do the work, be my guest..



I just hope that all this generosity doesn't lead to merge conflicts 
later, or people basing their stuff on stale code.  But it isn't like 
this is a high churn area.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Autotest] [KVM-AUTOTEST] Patch to install cygwin and run autotest in windows

2010-09-13 Thread Lucas Meneghel Rodrigues

On Mon, 2010-09-13 at 15:01 +0530, yogi wrote:
 Hello Lucas,
 
 I like to submit patch to do unattended install of cygwin and run
 autotest test case on Windows guests using cygwin. 
 Cygwin enable running some of test like stress, lmbench, bonnie,
 compilebench and netperf directly under windows, but most of the tests
 need to be patched before they could run on Windows. So this patch
 introduces a new parameter 'test_patch_file' in tests_config  to
 indicate the patch file. Two new files are added related to patching
 autotest, 'autotest.patch' which is like base patch for autotest, needed
 to run autotest in windows and 'iozone.patch' which to used run iozone
 on windows. Similar patches could be developed for test cases like
 'dacapo'  and sysbench which would enable running tests related to java
 and  mysql/postgresql, i will be happy to write patches for them too.

Hi Yogi,

The idea is very interesting! However, your mail client chewed the
patch, you apparently pasted the diffs generated by git format-patch
into your mail client window, not an optimal way to do things.

Please refer to http://autotest.kernel.org/wiki/GitWorkflow for a quick
guide on how to configure git to use git send-email, so your patches
will be perfectly mailed to the mailing list.

Your patches touch some core infrastructure of autotest, I'll have to
review the strategy used very carefully, so please be patient.

Thanks for your contribution,

Lucas


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [ANNOUNCE] qemu-kvm-0.13.0-rc1

2010-09-13 Thread Glauber Costa

On Sun, Sep 12, 2010 at 05:59:45PM +0200, Avi Kivity wrote:
  On 09/12/2010 05:31 PM, Anthony Liguori wrote:
 On 09/12/2010 01:11 AM, Avi Kivity wrote:
  On 09/10/2010 10:48 PM, Anthony Liguori wrote:
 I agree, is there any reason not to enable compiling less
 into the binary?
 There are folks interested in eliminating as much as
 possible to reduce
 the attack surface and auditing requirements, for example.
 
 It's not a bad idea, it's just that what
 --disable-cpu-emulation does is evil.  Being that I wrote the
 implementation, I'm quite confident in declare it as such :-)
 
 
 Oh, I thought you were against the idea in itself for some reason.
 
 I'll patch it for 0.13, but any ideas on how it should be rework
 for master?
 
 Glauber's old Accel interface was close to the right approach.  We
 need to abstract the exec.c interfaces to use a function pointer
 table and have a TCG and KVM implementation.  The function pointer
 tables can then be registered by a module_init() and we can simply
 not include the kvm or TCG files are build time to disable the
 functionality.
 
 Yes, I remember it now.
 
 Glauber, can you bring those patches back from the land of the dead?
I could, but I myself was not entirely sure about the correct approach
in terms of granularity.

The first version was too fine grained, since I was hooking into every possible
kqemu operation, (the goal at the time was to take _that_ out, not tcg), and 
then second version got too coarse, with we having to rewrite whole parts
of memory. Now that kqemu is gone, it surely gets easier...

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vhost, something changed between 2.6.35 and 2.6.36 ?

2010-09-13 Thread Michael S. Tsirkin

On Mon, Sep 13, 2010 at 04:25:13PM +0200, Dhaval Giani wrote:
 (BTW, this is a regression from 2.6.35 at least. I will try to figure
 out the last working version if you would like a bisect!)

Sure, that's helpful.
BTW, does latest upstream qemu-kvm have this issue as well for you?


 On Sun, Sep 12, 2010 at 4:40 PM, Michael S. Tsirkin m...@redhat.com wrote:
  On Sun, Sep 12, 2010 at 04:39:29PM +0200, Dhaval Giani wrote:
  On Sun, Sep 12, 2010 at 2:05 PM, Michael S. Tsirkin m...@redhat.com 
  wrote:
   On Fri, Sep 10, 2010 at 03:37:36PM +0200, Dhaval Giani wrote:
   Hi,
  
   I have been trying to get vhost+macvtap to work for me. I run it as
  
   /root/qemu-kvm-vhost-net/bin/qemu-system-x86_64 -hda $IMAGE  -serial
   stdio -monitor telnet::,server,nowait -vnc :4: -m 3G -net
   nic,model=virtio,macaddr=$MACADDR,netdev=macvtap0 -netdev
   tap,id=macvtap0,vhost=on,fd=3 3 /dev/tap5
  
   in 2.6.35, which worked just fine. On the other hand, with 2.6.36, i
   don't have working networking. I am using the same image and same
   macaddress. The qemu is the version from
   git://git.kernel.org/pub/scm/linux/kernel/git/mst/qemu-kvm.git vhost .
  
   BTW, by now, all these patches are merged so upstream qemu-kvm should 
   work
   just fine for you as well.
  
   Any suggestions will be welcome!
  
   Thanks,
   Dhaval
  
   You are running this as non-root user, correct?
 
  nope as root.
 
   This could be the permission issue that got fixed
   by 87d6a412bd1ed82c14cabd4b408003b23bbd2880.
   Could you please check the latest master from Linus,
   and let me and the list know? Thanks!
  
 
  this is with git of friday evening CEST.
 
   Another thing to try if this does *not* help:
  
   enable CONFIG_DYNAMIC_DEBUG in kernel,
   rebuild the kernel,
   mount debugfs:
  
          mount -t debugfs none /sys/kernel/debug
  
   and then enable debug for vhost_net as described in
   Documentation/dynamic-debug-howto.txt:
  
 
  I will give this a run on monday morning when i am at the lab again.
 
 
 So nothing comes out with this.
 
          echo 'module vhost_net +p'  
   /sys/kernel/debug/dynamic_debug/control
  
   Then start qemu, and after running a test, run dmesg and see if there
   are any messages from vhost_net. If yes please send them to
   me and to the list.
  
   Thanks!
  
 
 
  thanks!
  Dhaval
 
  Another thing to try check is generic net core issues.
 
  For this, try running tcpdump on both tap in host
  and on virtio net device in guest. Then
  send packets to host from guest and back, and check whether
  they appears on virtio and on tap.
 
 
 tcpdump -i macvtap0 on the host leads to nothing.
 
 tcpdump -i eth0 on the guest leads to ARP requests, with no responses.
 
 Anything more I can try?
 
 Thanks!
 Dhaval

So nothing is passed to tap, but no errors either.
It might be helpful to enable function tracer and trace functions in our
module. Alternatively, let's see if we get anything from the ring (don't
forget to unload and reload vhost-net).

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 29e850a..6400cd5 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -152,6 +152,8 @@ static void handle_tx(struct vhost_net *net)
 ARRAY_SIZE(vq-iov),
 out, in,
 NULL, NULL);
+   printk(KERN_ERR vhost_get_vq_desc returned %d\n, head);
+
/* On error, stop handling until the next kick. */
if (unlikely(head  0))
break;
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: MMU: Fix regression with ept memory types merged into non-ept page tables

2010-09-13 Thread Avi Kivity

Commit KVM: MMU: Make tdp_enabled a mmu-context parameter made real-mode
set -direct_map, and changed the code that merges in the memory type depend
on direct_map instead of tdp_enabled.  However, in this case what really
matters is tdp, not direct_map, since tdp changes the pte format regardless
of whether the mapping is direct or not.

As a result, real-mode shadow mappings got corrupted with ept memory types.
The result was a huge slowdown, likely due to the cache being disabled.

Change it back as the simplest fix for the regression (real fix is to move
all that to vmx code, and not use tdp_enabled as a synonym for ept).

Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/kvm/mmu.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6e248d8..3ce56bf 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1980,7 +1980,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= shadow_user_mask;
if (level  PT_PAGE_TABLE_LEVEL)
spte |= PT_PAGE_SIZE_MASK;
-   if (vcpu-arch.mmu.direct_map)
+   if (tdp_enabled)
spte |= kvm_x86_ops-get_mt_mask(vcpu, gfn,
kvm_is_mmio_pfn(pfn));
 
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

KVM call agenda for Sept 14

2010-09-13 Thread Chris Wright

Please send in any agenda items you are interested in covering.

thanks,
-chris
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH 1/4] Add a new API to virtio-pci

2010-09-13 Thread Anthony Liguori


On 09/13/2010 04:04 AM, Michael S. Tsirkin wrote:

On Mon, Sep 13, 2010 at 09:50:42AM +0530, Krishna Kumar2 wrote:
   

Michael S. Tsirkinm...@redhat.com  wrote on 09/12/2010 05:16:37 PM:

 

Michael S. Tsirkinm...@redhat.com
09/12/2010 05:16 PM

On Thu, Sep 09, 2010 at 07:19:33PM +0530, Krishna Kumar2 wrote:
   

Unfortunately I need a
constant in vhost for now.
 

Maybe not even that: you create multiple vhost-net
devices so vhost-net in kernel does not care about these
either, right? So this can be just part of vhost_net.h
in qemu.
   

Sorry, I didn't understand what you meant.

I can remove all socks[] arrays/constants by pre-allocating
sockets in vhost_setup_vqs. Then I can remove all socks
parameters in vhost_net_stop, vhost_net_release and
vhost_net_reset_owner.

Does this make sense?

Thanks,

- KK
 

Here's what I mean: each vhost device includes 1 TX
and 1 RX VQ. Instead of teaching vhost about multiqueue,
we could simply open /dev/vhost-net multiple times.
How many times would be up to qemu.
   


Trouble is, each vhost-net device is associated with 1 tun/tap device 
which means that each vhost-net device is associated with a transmit and 
receive queue.


I don't know if you'll always have an equal number of transmit and 
receive queues but there's certainly  challenge in terms of flexibility 
with this model.


Regards,

Anthony Liguori


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM call agenda for Sept 14

2010-09-13 Thread Anthony Liguori


On 09/13/2010 10:59 AM, Chris Wright wrote:

Please send in any agenda items you are interested in covering.
   


1) 0.13.0

I'll be collecting patches for the next 24 hours so if there are fixes 
you care about, please ping me between now and then.  
http://wiki.qemu.org/Releases/0.13.0


Regards,

Anthony Liguori


thanks,
-chris
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
   


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch to install cygwin and run autotest in windows

2010-09-13 Thread Yogananth Subramanian

Hello Lucas,
Submitting the patch again using git send-email, there was some issue with 
git config, its resolved now.Thanks for the link.

I tried not to make any drastic change to the kvm-autotest infrastructure, 
run_autotest() accepts an additonal parameter, other then that rest of the
patch does not make any  major change to existing code. 

Pasting below the contents of my previous mail, for reference.
Autotest part of the patch: 
This patch introduces a new parameter 'test_patch_file' in tests_config  to
indicate the patch file. Two new files are added related to patching
autotest, 'autotest.patch' which is like base patch for autotest, needed
to run autotest in windows and 'iozone.patch' which to used run iozone
on windows. Similar patches could be developed for test cases like
'dacapo'  and sysbench which would enable running tests related to java
and  mysql/postgresql, i will be happy to write patches for them too.

Cygwin installtion part of the patch:
This patch also adds two new files 'scripts/cyg_install.py' and
'tests/cyg_install'. 'scripts/cyg_install.py' is used to do unattended
install on cygwin. The parameter 'cyg_path' indicates the path of the
setup file. Installation can be done either local, using
'cyg_install.local' or remotly,using cyg_install.remote'.
Local installation requires both the cygwin setup and packages files to
be present locally at the path indicated by  'cyg_path'. The parameter
'cyg_param' is used to pass the command line options for cygwin setup.
So it can contain options for packages to be installed, proxy server to
be used to do remote installation and so on. An iso image cyg.iso
containing the setup and packages in case of local install and just the
setup in case of remote install, is created on the fly. Installation is
started using telnet.

This patch acts as complement to the existing support for running tests
using Autoit.I will be happy to re-implement or modify the patch based
to your comments and also write patch for other test cases like
sysbench, dacapo.

Thanks
yogi
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] This patch enables installing cygwin and running autotest in windows guests

2010-09-13 Thread Yogananth Subramanian

From: Yogananth subramanian anant...@linux.vnet.ibm.com

The patch creates two new files 'scripts/cyg_install.py' and
'tests/cyg_install'. 'scripts/cyg_install.py' is used to install cygwin in
unattended mode. This patch also introduces a new parameter 'patch_file'to
run_autotest() in kvm_test_utils.py file, to install patch for autotest to
run in windows. The file 'autotest_control/autotest.patch' is base patch to
run any autotest test cases in windows and file 'autotest_control/iozone.patch'
is a patch to run iozone on windows.

Signed-off-by: Yogananth subramanian anant...@linux.vnet.ibm.com
---
 client/tests/kvm/autotest_control/autotest.patch |   15 
 client/tests/kvm/autotest_control/iozone.patch   |   12 +++
 client/tests/kvm/kvm_test_utils.py   |   10 ++-
 client/tests/kvm/scripts/cyg_install.py  |   98 ++
 client/tests/kvm/tests/autotest.py   |   21 -
 client/tests/kvm/tests/cyg_install.py|   36 
 client/tests/kvm/tests_base.cfg.sample   |   43 +-
 7 files changed, 231 insertions(+), 4 deletions(-)
 create mode 100644 client/tests/kvm/autotest_control/autotest.patch
 create mode 100644 client/tests/kvm/autotest_control/iozone.patch
 create mode 100644 client/tests/kvm/scripts/cyg_install.py
 create mode 100644 client/tests/kvm/tests/cyg_install.py

diff --git a/client/tests/kvm/autotest_control/autotest.patch 
b/client/tests/kvm/autotest_control/autotest.patch
new file mode 100644
index 000..f562f95
--- /dev/null
+++ b/client/tests/kvm/autotest_control/autotest.patch
@@ -0,0 +1,15 @@
+diff -aurpN client/bin/job.py client-new/bin/job.py
+--- client/bin/job.py   2010-08-25 01:42:27.0 -0400
 client-new/bin/job.py   2010-09-07 09:54:30.0 -0400
+@@ -296,7 +296,10 @@ class base_client_job(base_job.base_job)
+
+ # extract console= and other args from cmdline and add them into the
+ # base args that we use for all kernels we install
+-cmdline = utils.read_one_line('/proc/cmdline')
++if os.path.exists('/proc/cmdline'):
++cmdline = utils.read_one_line('/proc/cmdline')
++else:
++return
+ kernel_args = []
+ for karg in cmdline.split():
+ for param in copy_cmdline:
diff --git a/client/tests/kvm/autotest_control/iozone.patch 
b/client/tests/kvm/autotest_control/iozone.patch
new file mode 100644
index 000..6229205
--- /dev/null
+++ b/client/tests/kvm/autotest_control/iozone.patch
@@ -0,0 +1,12 @@
+diff -aurpN client/tests/iozone/iozone.py client-new/tests/iozone/iozone.py
+--- client/tests/iozone/iozone.py   2010-08-25 01:42:27.0 -0400
 client-new/tests/iozone/iozone.py   2010-09-02 11:38:42.0 -0400
+@@ -43,7 +43,7 @@ class iozone(test.test):
+ elif (arch == 'x86_64'):
+ utils.system('make linux-AMD64')
+ else:
+-utils.system('make linux')
++utils.system('make Windows')
+ 
+ 
+ def run_once(self, dir=None, args=None):
diff --git a/client/tests/kvm/kvm_test_utils.py 
b/client/tests/kvm/kvm_test_utils.py
index 5412aac..34bccb8 100644
--- a/client/tests/kvm/kvm_test_utils.py
+++ b/client/tests/kvm/kvm_test_utils.py
@@ -336,7 +336,7 @@ def get_memory_info(lvms):
 return meminfo
 
 
-def run_autotest(vm, session, control_path, timeout, outputdir):
+def run_autotest(vm, session, control_path, timeout, outputdir, patch_file):
 
 Run an autotest control file inside a guest (linux only utility).
 
@@ -346,6 +346,7 @@ def run_autotest(vm, session, control_path, timeout, 
outputdir):
 @param timeout: Timeout under which the autotest control file must 
complete.
 @param outputdir: Path on host where we should copy the guest autotest
 results to.
+@param patch_file: A path to an autotest patch file.
 
 def copy_if_hash_differs(vm, local_path, remote_path):
 
@@ -460,12 +461,19 @@ def run_autotest(vm, session, control_path, timeout, 
outputdir):
 os.path.join(autotest_path, 'control')):
 raise error.TestFail(Could not copy the test control file to guest)
 
+if not patch_file == :
+if not vm.copy_files_to(patch_file,
+os.path.join(autotest_path, '../test.patch')):
+raise error.TestFail(Could not copy the test patch file to guest)
+
 # Run the test
 logging.info(Running autotest control file %s on guest, timeout %ss,
  os.path.basename(control_path), timeout)
 session.get_command_output(cd %s % autotest_path)
 session.get_command_output(rm -f control.state)
 session.get_command_output(rm -rf results/*)
+if not patch_file == :
+session.get_command_output(patch -p1 ../test.patch)
 logging.info( Test output )
 status = session.get_command_status(bin/autotest control,

Re: [RFC PATCH 0/4] Implement multiqueue virtio-net

2010-09-13 Thread Krishna Kumar2

Michael S. Tsirkin m...@redhat.com wrote on 09/13/2010 05:20:55 PM:

  Results with the original kernel:
  _
  #   BW  SD  RSD
  __
  1   20903   1   6
  2   21963   6   25
  4   22042   23  102
  8   21674   97  419
  16  22281   379 1663
  24  22521   857 3748
  32  22976   15286594
  40  23197   239010239
  48  22973   354215074
  64  23809   648627244
  80  23564   10169   43118
  96  22977   14954   62948
  128 23649   27067   113892
  
 
  With higher number of threads running in parallel, SD
  increased. In this case most threads run in parallel
  only till __dev_xmit_skb (#numtxqs=1). With mq TX patch,
  higher number of threads run in parallel through
  ndo_start_xmit. I *think* the increase in SD is to do
  with higher # of threads running for larger code path
  From the numbers I posted with the patch (cut-n-paste
  only the % parts), BW increased much more than the SD,
  sometimes more than twice the increase in SD.

 Service demand is BW/CPU, right? So if BW goes up by 50%
 and SD by 40%, this means that CPU more than doubled.

I think the SD calculation might be more complicated,
I think it does it based on adding up averages sampled
and stored during the run. But, I still don't see how CPU
can double?? e.g.
BW: 1000 - 1500 (50%)
SD: 100 - 140 (40%)
CPU: 10 - 10.71 (7.1%)

  N#  BW% SD%  RSD%
  4   54.30   40.00-1.16
  8   71.79   46.59-2.68
  16  71.89   50.40-2.50
  32  72.24   34.26-14.52
  48  70.10   31.51-14.35
  64  69.01   38.81-9.66
  96  70.68   71.2610.74
 
  I also think SD calculation gets skewed for guest-local
  host testing.

 If it's broken, let's fix it?

  For this test, I ran a guest with numtxqs=16.
  The first result below is with my patch, which creates 16
  vhosts. The second result is with a modified patch which
  creates only 2 vhosts (testing with #netperfs = 64):

 My guess is it's not a good idea to have more TX VQs than guest CPUs.

Definitely, I will try to run tomorrow with more reasonable
values, also will test with my second version of the patch
that creates restricted number of vhosts and post results.

 I realize for management it's easier to pass in a single vhost fd, but
 just for testing it's probably easier to add code in userspace to open
 /dev/vhost multiple times.

 
  #vhosts  BW% SD%RSD%
  16   20.79   186.01 149.74
  230.89   34.55  18.44
 
  The remote SD increases with the number of vhost threads,
  but that number seems to correlate with guest SD. So though
  BW% increased slightly from 20% to 30%, SD fell drastically
  from 186% to 34%. I think it could be a calculation skew
  with host SD, which also fell from 150% to 18%.

 I think by default netperf looks in /proc/stat for CPU utilization data:
 so host CPU utilization will include the guest CPU, I think?

It appears that way to me too, but the data above seems to
suggest the opposite...

 I would go further and claim that for host/guest TCP
 CPU utilization and SD should always be identical.
 Makes sense?

It makes sense to me, but once again I am not sure how SD
is really done, or whether it is linear to CPU. Cc'ing Rick
in case he can comment


 
  I am planning to submit 2nd patch rev with restricted
  number of vhosts.
 
Likely cause for the 1 stream degradation with multiple
vhost patch:
   
1. Two vhosts run handling the RX and TX respectively.
   I think the issue is related to cache ping-pong esp
   since these run on different cpus/sockets.
  
   Right. With TCP I think we are better off handling
   TX and RX for a socket by the same vhost, so that
   packet and its ack are handled by the same thread.
   Is this what happens with RX multiqueue patch?
   How do we select an RX queue to put the packet on?
 
  My (unsubmitted) RX patch doesn't do this yet, that is
  something I will check.
 
  Thanks,
 
  - KK

 You'll want to work on top of net-next, I think there's
 RX flow filtering work going on there.

Thanks Michael, I will follow up on that for the RX patch,
plus your suggestion on tying RX with TX.

Thanks,

- KK

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH 1/4] Add a new API to virtio-pci

2010-09-13 Thread Michael S. Tsirkin

On Mon, Sep 13, 2010 at 10:59:34AM -0500, Anthony Liguori wrote:
 On 09/13/2010 04:04 AM, Michael S. Tsirkin wrote:
 On Mon, Sep 13, 2010 at 09:50:42AM +0530, Krishna Kumar2 wrote:
 Michael S. Tsirkinm...@redhat.com  wrote on 09/12/2010 05:16:37 PM:
 
 Michael S. Tsirkinm...@redhat.com
 09/12/2010 05:16 PM
 
 On Thu, Sep 09, 2010 at 07:19:33PM +0530, Krishna Kumar2 wrote:
 Unfortunately I need a
 constant in vhost for now.
 Maybe not even that: you create multiple vhost-net
 devices so vhost-net in kernel does not care about these
 either, right? So this can be just part of vhost_net.h
 in qemu.
 Sorry, I didn't understand what you meant.
 
 I can remove all socks[] arrays/constants by pre-allocating
 sockets in vhost_setup_vqs. Then I can remove all socks
 parameters in vhost_net_stop, vhost_net_release and
 vhost_net_reset_owner.
 
 Does this make sense?
 
 Thanks,
 
 - KK
 Here's what I mean: each vhost device includes 1 TX
 and 1 RX VQ. Instead of teaching vhost about multiqueue,
 we could simply open /dev/vhost-net multiple times.
 How many times would be up to qemu.
 
 Trouble is, each vhost-net device is associated with 1 tun/tap
 device which means that each vhost-net device is associated with a
 transmit and receive queue.
 
 I don't know if you'll always have an equal number of transmit and
 receive queues but there's certainly  challenge in terms of
 flexibility with this model.
 
 Regards,
 
 Anthony Liguori

Not really, TX and RX can be mapped to different devices,
or you can only map one of these. What is the trouble?
What other features would you desire in terms of flexibility?

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Exceed 1GB/s with virtio-net ?

2010-09-13 Thread Alex Williamson

On Mon, Sep 13, 2010 at 4:32 AM, Thibault VINCENT
thibault.vinc...@smartjog.com wrote:
 Hello

 I'm trying to achieve higher than gigabit transferts over a virtio NIC
 with no success, and I can't find a recent bug or discussion about such
 an issue.

 The simpler test consist of two VM running on a high-end blade server
 with 4 cores and 4GB RAM each, and a virtio NIC dedicated to the
 inter-VM communication. On the host, the two vnet interfaces are
 enslaved into a bridge. I use a combination of 2.6.35 on the host and
 2.6.32 in the VMs.
 Running iperf or netperf on these VMs, with TCP or UDP, result in
 ~900Mbits/s transferts. This is what could be expected of a 1G
 interface, and indeed the e1000 emulation performs similar.

 Changing the txqueuelen, MTU, and offloading settings on every interface
 (bridge/tap/virtio_net) didn't improve the speed, nor did the
 installation of irqbalance and the increase in CPU and RAM.

 Is this normal ? Is the multiple queue patch intended to address this ?
 It's quite possible I missed something :)

I'm able to achieve quite a bit more than 1Gbps using virtio-net
between 2 guests on the same host connected via an internal bridge.
With the virtio-net TX bottom half handler I can easily hit 7Gbps TCP
and 10+Gbps UDP using netperf (TCP_STREAM/UDP_STREAM tests).  Even
without the bottom half patches (not yet in qemu-kvm.git), I can get
~5Gbps.  Maybe you could describe your setup further, host details,
bridge setup, guests, specific tests, etc...  Thanks,

Alex
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH 1/4] Add a new API to virtio-pci

2010-09-13 Thread Anthony Liguori


On 09/13/2010 11:30 AM, Michael S. Tsirkin wrote:

On Mon, Sep 13, 2010 at 10:59:34AM -0500, Anthony Liguori wrote:
   

On 09/13/2010 04:04 AM, Michael S. Tsirkin wrote:
 

On Mon, Sep 13, 2010 at 09:50:42AM +0530, Krishna Kumar2 wrote:
   

Michael S. Tsirkinm...@redhat.com   wrote on 09/12/2010 05:16:37 PM:

 

Michael S. Tsirkinm...@redhat.com
09/12/2010 05:16 PM

On Thu, Sep 09, 2010 at 07:19:33PM +0530, Krishna Kumar2 wrote:
   

Unfortunately I need a
constant in vhost for now.
 

Maybe not even that: you create multiple vhost-net
devices so vhost-net in kernel does not care about these
either, right? So this can be just part of vhost_net.h
in qemu.
   

Sorry, I didn't understand what you meant.

I can remove all socks[] arrays/constants by pre-allocating
sockets in vhost_setup_vqs. Then I can remove all socks
parameters in vhost_net_stop, vhost_net_release and
vhost_net_reset_owner.

Does this make sense?

Thanks,

- KK
 

Here's what I mean: each vhost device includes 1 TX
and 1 RX VQ. Instead of teaching vhost about multiqueue,
we could simply open /dev/vhost-net multiple times.
How many times would be up to qemu.
   

Trouble is, each vhost-net device is associated with 1 tun/tap
device which means that each vhost-net device is associated with a
transmit and receive queue.

I don't know if you'll always have an equal number of transmit and
receive queues but there's certainly  challenge in terms of
flexibility with this model.

Regards,

Anthony Liguori
 

Not really, TX and RX can be mapped to different devices,
   


It's just a little odd.  Would you bond multiple tun tap devices to 
achieve multi-queue TX?  For RX, do you somehow limit RX to only one of 
those devices?


If we were doing this in QEMU (and btw, there needs to be userspace 
patches before we implement this in the kernel side), I think it would 
make more sense to just rely on doing a multithreaded write to a single 
tun/tap device and then to hope that in can be made smarter at the 
macvtap layer.


Regards,

Anthony Liguori

Regards,

Anthony Liguori


or you can only map one of these. What is the trouble?
What other features would you desire in terms of flexibility?

   


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[KVM-AUTOTEST PATCH] tests/kvm: fix -net syntax for new qemu

2010-09-13 Thread Michael S. Tsirkin

netdev option in new qemu is mutually exclusive with vlan.
Only pass vlan if netdev option is missing.

Signed-off-by: Michael S. Tsirkin m...@redhat.com

---

diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py
index bdc9aab..7e76ed5 100755
--- a/client/tests/kvm/kvm_vm.py
+++ b/client/tests/kvm/kvm_vm.py
@@ -235,9 +235,10 @@ class VM:
 return cmd
 
 def add_nic(help, vlan, model=None, mac=None, netdev_id=None):
-cmd =  -net nic,vlan=%d % vlan
 if has_option(help, netdev):
-cmd +=,netdev=%s % netdev_id
+cmd =  -net nic,netdev=%s % netdev_id
+else:
+cmd =  -net nic,vlan=%d % vlan
 if model: cmd += ,model=%s % model
 if mac: cmd += ,macaddr='%s' % mac
 return cmd
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Rename KVM_UPSTREAM to OBSOLETE_KVM_IMPL

2010-09-13 Thread Avi Kivity

The symbol KVM_UPSTREAM is used to mark sections of code that are part of
the upstream kvm implemetation that is not used in qemu-kvm.  However the
name becomes ambiguous if qemu-kvm is merged upstream.

Rename the symbol to avoid confusion.

Signed-off-by: Avi Kivity a...@redhat.com
---
 cpus.c|2 +-
 kvm-all.c |   16 
 kvm.h |6 +++---
 target-i386/kvm.c |   10 +-
 vl.c  |4 ++--
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/cpus.c b/cpus.c
index c545a62..99c04d1 100644
--- a/cpus.c
+++ b/cpus.c
@@ -299,7 +299,7 @@ void qemu_notify_event(void)
 }
 }
 
-#if defined(KVM_UPSTREAM) || !defined(CONFIG_KVM)
+#if defined(OBSOLETE_KVM_IMPL) || !defined(CONFIG_KVM)
 void qemu_mutex_lock_iothread(void) {}
 void qemu_mutex_unlock_iothread(void) {}
 #endif
diff --git a/kvm-all.c b/kvm-all.c
index 4ff75c4..d4b0861 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -41,7 +41,7 @@
 do { } while (0)
 #endif
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 
 typedef struct KVMSlot
 {
@@ -156,7 +156,7 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot 
*slot)
 return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem);
 }
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 static void kvm_reset_vcpu(void *opaque)
 {
 CPUState *env = opaque;
@@ -176,7 +176,7 @@ int kvm_pit_in_kernel(void)
 }
 
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 int kvm_init_vcpu(CPUState *env)
 {
 KVMState *s = kvm_state;
@@ -594,7 +594,7 @@ void kvm_cpu_register_phys_memory_client(void)
 cpu_register_phys_memory_client(kvm_cpu_phys_memory_client);
 }
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 
 int kvm_init(int smp_cpus)
 {
@@ -816,7 +816,7 @@ void kvm_flush_coalesced_mmio_buffer(void)
 #endif
 }
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 
 static void do_kvm_cpu_synchronize_state(void *_env)
 {
@@ -1038,7 +1038,7 @@ int kvm_has_debugregs(void)
 return kvm_state-debugregs;
 }
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 int kvm_has_xsave(void)
 {
 return kvm_state-xsave;
@@ -1069,10 +1069,10 @@ void kvm_setup_guest_memory(void *start, size_t size)
 }
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
-#ifndef KVM_UPSTREAM
+#ifndef OBSOLETE_KVM_IMPL
 #define run_on_cpu on_vcpu
 static void on_vcpu(CPUState *env, void (*func)(void *data), void *data);
-#endif /* !KVM_UPSTREAM */
+#endif /* !OBSOLETE_KVM_IMPL */
 
 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
  target_ulong pc)
diff --git a/kvm.h b/kvm.h
index d321fce..56236ae 100644
--- a/kvm.h
+++ b/kvm.h
@@ -31,13 +31,13 @@ extern int kvm_allowed;
 #define kvm_enabled() (0)
 #endif
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 struct kvm_run;
 
 /* external API */
 
 int kvm_init(int smp_cpus);
-#endif /* KVM_UPSTREAM */
+#endif /* OBSOLETE_KVM_IMPL */
 
 int kvm_has_sync_mmu(void);
 int kvm_has_vcpu_events(void);
@@ -96,7 +96,7 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run);
 
 int kvm_arch_pre_run(CPUState *env, struct kvm_run *run);
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 int kvm_arch_process_irqchip_events(CPUState *env);
 #endif
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index b00e80d..f4fc063 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -188,7 +188,7 @@ int kvm_arch_init_vcpu(CPUState *env)
 return r;
 }
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 
 env-mp_state = KVM_MP_STATE_RUNNABLE;
 
@@ -304,7 +304,7 @@ void kvm_arch_reset_vcpu(CPUState *env)
 env-mp_state = KVM_MP_STATE_RUNNABLE;
 }
 }
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 
 static int kvm_has_msr_star(CPUState *env)
 {
@@ -644,7 +644,7 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
 entry-data = value;
 }
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 static int kvm_put_msrs(CPUState *env, int level)
 {
 struct {
@@ -1104,7 +1104,7 @@ static int kvm_get_debugregs(CPUState *env)
 return 0;
 }
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 int kvm_arch_put_registers(CPUState *env, int level)
 {
 int ret;
@@ -1242,7 +1242,7 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
 return 0;
 }
 
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 
 int kvm_arch_process_irqchip_events(CPUState *env)
 {
diff --git a/vl.c b/vl.c
index 22a3616..378a176 100644
--- a/vl.c
+++ b/vl.c
@@ -2466,7 +2466,7 @@ int main(int argc, char **argv, char **envp)
 case QEMU_OPTION_smbios:
 do_smbios_option(optarg);
 break;
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
 case QEMU_OPTION_enable_kvm:
 kvm_allowed = 1;
 #endif
@@ -2803,7 +2803,7 @@ int main(int argc, char **argv, char **envp)
 if (kvm_allowed) {
 int ret = kvm_init(smp_cpus);
 if (ret  0) {
-#if defined(KVM_UPSTREAM) || defined(CONFIG_NO_CPU_EMULATION)
+#if

Re: [PATCH] KVM: MMU: Fix regression with ept memory types merged into non-ept page tables

2010-09-13 Thread Joerg Roedel

On Mon, Sep 13, 2010 at 04:56:19PM +0200, Avi Kivity wrote:
 Commit KVM: MMU: Make tdp_enabled a mmu-context parameter made real-mode
 set -direct_map, and changed the code that merges in the memory type depend
 on direct_map instead of tdp_enabled.  However, in this case what really
 matters is tdp, not direct_map, since tdp changes the pte format regardless
 of whether the mapping is direct or not.
 
 As a result, real-mode shadow mappings got corrupted with ept memory types.
 The result was a huge slowdown, likely due to the cache being disabled.
 
 Change it back as the simplest fix for the regression (real fix is to move
 all that to vmx code, and not use tdp_enabled as a synonym for ept).
 
 Signed-off-by: Avi Kivity a...@redhat.com
 ---
  arch/x86/kvm/mmu.c |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)
 
 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index 6e248d8..3ce56bf 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -1980,7 +1980,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
   spte |= shadow_user_mask;
   if (level  PT_PAGE_TABLE_LEVEL)
   spte |= PT_PAGE_SIZE_MASK;
 - if (vcpu-arch.mmu.direct_map)
 + if (tdp_enabled)
   spte |= kvm_x86_ops-get_mt_mask(vcpu, gfn,
   kvm_is_mmio_pfn(pfn));

Oh, indeed. Thanks for fixing this.

Acked-by: Joerg Roedel joerg.roe...@amd.com

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Rename KVM_UPSTREAM to OBSOLETE_KVM_IMPL

2010-09-13 Thread Jan Kiszka

Am 13.09.2010 19:54, Avi Kivity wrote:
 The symbol KVM_UPSTREAM is used to mark sections of code that are part of
 the upstream kvm implemetation that is not used in qemu-kvm.  However the
 name becomes ambiguous if qemu-kvm is merged upstream.

I doubt this is describing all cases correctly as well. Some changes
should rather happen the other way around (e.g. you surely don't want to
obsolete x86 kvm_arch_put/get_registers in favor of
kvm_arch_load/save_regs, do you?).

Jan

 
 Rename the symbol to avoid confusion.
 
 Signed-off-by: Avi Kivity a...@redhat.com
 ---
  cpus.c|2 +-
  kvm-all.c |   16 
  kvm.h |6 +++---
  target-i386/kvm.c |   10 +-
  vl.c  |4 ++--
  5 files changed, 19 insertions(+), 19 deletions(-)
 
 diff --git a/cpus.c b/cpus.c
 index c545a62..99c04d1 100644
 --- a/cpus.c
 +++ b/cpus.c
 @@ -299,7 +299,7 @@ void qemu_notify_event(void)
  }
  }
  
 -#if defined(KVM_UPSTREAM) || !defined(CONFIG_KVM)
 +#if defined(OBSOLETE_KVM_IMPL) || !defined(CONFIG_KVM)
  void qemu_mutex_lock_iothread(void) {}
  void qemu_mutex_unlock_iothread(void) {}
  #endif
 diff --git a/kvm-all.c b/kvm-all.c
 index 4ff75c4..d4b0861 100644
 --- a/kvm-all.c
 +++ b/kvm-all.c
 @@ -41,7 +41,7 @@
  do { } while (0)
  #endif
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  
  typedef struct KVMSlot
  {
 @@ -156,7 +156,7 @@ static int kvm_set_user_memory_region(KVMState *s, 
 KVMSlot *slot)
  return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem);
  }
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  static void kvm_reset_vcpu(void *opaque)
  {
  CPUState *env = opaque;
 @@ -176,7 +176,7 @@ int kvm_pit_in_kernel(void)
  }
  
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  int kvm_init_vcpu(CPUState *env)
  {
  KVMState *s = kvm_state;
 @@ -594,7 +594,7 @@ void kvm_cpu_register_phys_memory_client(void)
  cpu_register_phys_memory_client(kvm_cpu_phys_memory_client);
  }
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  
  int kvm_init(int smp_cpus)
  {
 @@ -816,7 +816,7 @@ void kvm_flush_coalesced_mmio_buffer(void)
  #endif
  }
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  
  static void do_kvm_cpu_synchronize_state(void *_env)
  {
 @@ -1038,7 +1038,7 @@ int kvm_has_debugregs(void)
  return kvm_state-debugregs;
  }
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  int kvm_has_xsave(void)
  {
  return kvm_state-xsave;
 @@ -1069,10 +1069,10 @@ void kvm_setup_guest_memory(void *start, size_t size)
  }
  
  #ifdef KVM_CAP_SET_GUEST_DEBUG
 -#ifndef KVM_UPSTREAM
 +#ifndef OBSOLETE_KVM_IMPL
  #define run_on_cpu on_vcpu
  static void on_vcpu(CPUState *env, void (*func)(void *data), void *data);
 -#endif /* !KVM_UPSTREAM */
 +#endif /* !OBSOLETE_KVM_IMPL */
  
  struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
   target_ulong pc)
 diff --git a/kvm.h b/kvm.h
 index d321fce..56236ae 100644
 --- a/kvm.h
 +++ b/kvm.h
 @@ -31,13 +31,13 @@ extern int kvm_allowed;
  #define kvm_enabled() (0)
  #endif
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  struct kvm_run;
  
  /* external API */
  
  int kvm_init(int smp_cpus);
 -#endif /* KVM_UPSTREAM */
 +#endif /* OBSOLETE_KVM_IMPL */
  
  int kvm_has_sync_mmu(void);
  int kvm_has_vcpu_events(void);
 @@ -96,7 +96,7 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run 
 *run);
  
  int kvm_arch_pre_run(CPUState *env, struct kvm_run *run);
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  int kvm_arch_process_irqchip_events(CPUState *env);
  #endif
  
 diff --git a/target-i386/kvm.c b/target-i386/kvm.c
 index b00e80d..f4fc063 100644
 --- a/target-i386/kvm.c
 +++ b/target-i386/kvm.c
 @@ -188,7 +188,7 @@ int kvm_arch_init_vcpu(CPUState *env)
  return r;
  }
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  
  env-mp_state = KVM_MP_STATE_RUNNABLE;
  
 @@ -304,7 +304,7 @@ void kvm_arch_reset_vcpu(CPUState *env)
  env-mp_state = KVM_MP_STATE_RUNNABLE;
  }
  }
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  
  static int kvm_has_msr_star(CPUState *env)
  {
 @@ -644,7 +644,7 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
  entry-data = value;
  }
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  static int kvm_put_msrs(CPUState *env, int level)
  {
  struct {
 @@ -1104,7 +1104,7 @@ static int kvm_get_debugregs(CPUState *env)
  return 0;
  }
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  int kvm_arch_put_registers(CPUState *env, int level)
  {
  int ret;
 @@ -1242,7 +1242,7 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run 
 *run)
  return 0;
  }
  
 -#ifdef KVM_UPSTREAM
 +#ifdef OBSOLETE_KVM_IMPL
  
  int kvm_arch_process_irqchip_events(CPUState *env)
  {
 diff --git a/vl.c b/vl.c
 index 22a3616..378a176 100644
 --- a/vl.c
 +++ b/vl.c
 @@ -2466,7 +2466,7 @@ int main(int argc, char **argv, char **envp)

Re: [PATCH] Rename KVM_UPSTREAM to OBSOLETE_KVM_IMPL

2010-09-13 Thread Anthony Liguori


On 09/13/2010 01:52 PM, Jan Kiszka wrote:

Am 13.09.2010 19:54, Avi Kivity wrote:
   

The symbol KVM_UPSTREAM is used to mark sections of code that are part of
the upstream kvm implemetation that is not used in qemu-kvm.  However the
name becomes ambiguous if qemu-kvm is merged upstream.
 

I doubt this is describing all cases correctly as well. Some changes
should rather happen the other way around (e.g. you surely don't want to
obsolete x86 kvm_arch_put/get_registers in favor of
kvm_arch_load/save_regs, do you?).
   


There's really no perfect name to describe what we're actually doing 
here.  It's probably not a detail worth worrying that much about.


Regards,

Anthony Liguori


Jan

   

Rename the symbol to avoid confusion.

Signed-off-by: Avi Kivitya...@redhat.com
---
  cpus.c|2 +-
  kvm-all.c |   16 
  kvm.h |6 +++---
  target-i386/kvm.c |   10 +-
  vl.c  |4 ++--
  5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/cpus.c b/cpus.c
index c545a62..99c04d1 100644
--- a/cpus.c
+++ b/cpus.c
@@ -299,7 +299,7 @@ void qemu_notify_event(void)
  }
  }

-#if defined(KVM_UPSTREAM) || !defined(CONFIG_KVM)
+#if defined(OBSOLETE_KVM_IMPL) || !defined(CONFIG_KVM)
  void qemu_mutex_lock_iothread(void) {}
  void qemu_mutex_unlock_iothread(void) {}
  #endif
diff --git a/kvm-all.c b/kvm-all.c
index 4ff75c4..d4b0861 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -41,7 +41,7 @@
  do { } while (0)
  #endif

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL

  typedef struct KVMSlot
  {
@@ -156,7 +156,7 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot 
*slot)
  return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION,mem);
  }

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
  static void kvm_reset_vcpu(void *opaque)
  {
  CPUState *env = opaque;
@@ -176,7 +176,7 @@ int kvm_pit_in_kernel(void)
  }


-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
  int kvm_init_vcpu(CPUState *env)
  {
  KVMState *s = kvm_state;
@@ -594,7 +594,7 @@ void kvm_cpu_register_phys_memory_client(void)
  cpu_register_phys_memory_client(kvm_cpu_phys_memory_client);
  }

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL

  int kvm_init(int smp_cpus)
  {
@@ -816,7 +816,7 @@ void kvm_flush_coalesced_mmio_buffer(void)
  #endif
  }

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL

  static void do_kvm_cpu_synchronize_state(void *_env)
  {
@@ -1038,7 +1038,7 @@ int kvm_has_debugregs(void)
  return kvm_state-debugregs;
  }

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
  int kvm_has_xsave(void)
  {
  return kvm_state-xsave;
@@ -1069,10 +1069,10 @@ void kvm_setup_guest_memory(void *start, size_t size)
  }

  #ifdef KVM_CAP_SET_GUEST_DEBUG
-#ifndef KVM_UPSTREAM
+#ifndef OBSOLETE_KVM_IMPL
  #define run_on_cpu on_vcpu
  static void on_vcpu(CPUState *env, void (*func)(void *data), void *data);
-#endif /* !KVM_UPSTREAM */
+#endif /* !OBSOLETE_KVM_IMPL */

  struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
   target_ulong pc)
diff --git a/kvm.h b/kvm.h
index d321fce..56236ae 100644
--- a/kvm.h
+++ b/kvm.h
@@ -31,13 +31,13 @@ extern int kvm_allowed;
  #define kvm_enabled() (0)
  #endif

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
  struct kvm_run;

  /* external API */

  int kvm_init(int smp_cpus);
-#endif /* KVM_UPSTREAM */
+#endif /* OBSOLETE_KVM_IMPL */

  int kvm_has_sync_mmu(void);
  int kvm_has_vcpu_events(void);
@@ -96,7 +96,7 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run);

  int kvm_arch_pre_run(CPUState *env, struct kvm_run *run);

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
  int kvm_arch_process_irqchip_events(CPUState *env);
  #endif

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index b00e80d..f4fc063 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -188,7 +188,7 @@ int kvm_arch_init_vcpu(CPUState *env)
  return r;
  }

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL

  env-mp_state = KVM_MP_STATE_RUNNABLE;

@@ -304,7 +304,7 @@ void kvm_arch_reset_vcpu(CPUState *env)
  env-mp_state = KVM_MP_STATE_RUNNABLE;
  }
  }
-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL

  static int kvm_has_msr_star(CPUState *env)
  {
@@ -644,7 +644,7 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
  entry-data = value;
  }

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
  static int kvm_put_msrs(CPUState *env, int level)
  {
  struct {
@@ -1104,7 +1104,7 @@ static int kvm_get_debugregs(CPUState *env)
  return 0;
  }

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL
  int kvm_arch_put_registers(CPUState *env, int level)
  {
  int ret;
@@ -1242,7 +1242,7 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
  return 0;
  }

-#ifdef KVM_UPSTREAM
+#ifdef OBSOLETE_KVM_IMPL

  int kvm_arch_process_irqchip_events(CPUState *env)
  {
diff --git a/vl.c b/vl.c
index 22a3616..378a176 100644
---

Re: [PATCH] Rename KVM_UPSTREAM to OBSOLETE_KVM_IMPL

2010-09-13 Thread Jan Kiszka

Am 13.09.2010 20:56, Anthony Liguori wrote:
 On 09/13/2010 01:52 PM, Jan Kiszka wrote:
 Am 13.09.2010 19:54, Avi Kivity wrote:
   
 The symbol KVM_UPSTREAM is used to mark sections of code that are
 part of
 the upstream kvm implemetation that is not used in qemu-kvm.  However
 the
 name becomes ambiguous if qemu-kvm is merged upstream.
  
 I doubt this is describing all cases correctly as well. Some changes
 should rather happen the other way around (e.g. you surely don't want to
 obsolete x86 kvm_arch_put/get_registers in favor of
 kvm_arch_load/save_regs, do you?).

 
 There's really no perfect name to describe what we're actually doing
 here.  It's probably not a detail worth worrying that much about.

I don't mind the name as long as it doesn't reflect the strategy (but
why this change at all then?).

Jan (who would prefer to have the time for doing the cleanups)



signature.asc
Description: OpenPGP digital signature

[PATCH RFC] dma_rw.h (was Re: [PATCH 0/7] AMD IOMMU emulation patchset v4)

2010-09-13 Thread Michael S. Tsirkin

So I think the following will give the idea of what an API
might look like that will let us avoid the scary hacks in
e.g. the ide layer and other generic layers that need to do DMA,
without either binding us to pci, adding more complexity with
callbacks, or losing type safety with casts and void*.

Basically we have DMADevice that we can use container_of on
to get a PCIDevice from, and DMAMmu that will get instanciated
in a specific MMU.

This is not complete code - just a header - I might complete
this later if/when there's interest or hopefully someone interested
in iommu emulation will.

Notes:
the IOMMU_PERM_RW code seem unused, so I replaced
this with plain is_write. Is it ever useful?

It seems that invalidate callback should be able to
get away with just a device, so I switched to that
from a void pointer for type safety.
Seems enough for the users I saw.

I saw devices do stl_le_phys and such, these
might need to be wrapped as well.

Signed-off-by: Michael S. Tsirkin m...@redhat.com

---

diff --git a/hw/dma_rw.h b/hw/dma_rw.h
new file mode 100644
index 000..d63fd17
--- /dev/null
+++ b/hw/dma_rw.h
@@ -0,0 +1,122 @@
+#ifndef DMA_RW_H
+#define DMA_RW_H
+
+#include qemu-common.h
+
+/* We currently only have pci mmus, but using
+   a generic type makes it possible to use this
+   e.g. from the generic ide code without callbacks. */
+typedef uint64_t dma_addr_t;
+
+typedef struct DMAMmu DMAMmu;
+typedef struct DMADevice DMADevice;
+
+typedef int DMATranslateFunc(DMAMmu *mmu,
+ DMADevice *dev,
+ dma_addr_t addr,
+ dma_addr_t *paddr,
+ dma_addr_t *len,
+ int is_write);
+
+typedef int DMAInvalidateMapFunc(DMADevice *);
+struct DMAMmu {
+   /* invalidate, etc. */
+   DmaTranslateFunc *translate;
+};
+
+struct DMADevice {
+   DMAMmu *mmu;
+   DMAInvalidateMapFunc *invalidate;
+};
+
+void dma_device_init(DMADevice *, DMAMmu *, DMAInvalidateMapFunc *);
+
+static inline void dma_memory_rw(DMADevice *dev,
+dma_addr_t addr,
+void *buf,
+uint32_t len,
+int is_write)
+{
+uint32_t plen;
+/* Fast-path non-iommu.
+ * More importantly, makes it obvious what this function does. */
+if (!dev-mmu) {
+   cpu_physical_memory_rw(paddr, buf, plen, is_write);
+   return;
+}
+while (len) {
+err = dev-mmu-translate(iommu, dev, addr, paddr, plen, is_write);
+if (err) {
+return;
+}
+  
+/* The translation might be valid for larger regions. */
+if (plen  len) {
+plen = len;
+}
+
+cpu_physical_memory_rw(paddr, buf, plen, is_write);
+
+len -= plen;
+addr += plen;
+buf += plen;
+}
+}
+
+void *dma_memory_map(DMADevice *dev,
+dma_addr_t addr,
+uint32_t *len,
+int is_write);
+void dma_memory_unmap(DMADevice *dev,
+ void *buffer,
+ uint32_t len,
+ int is_write,
+ uint32_t access_len);
+
+
++#define DEFINE_DMA_LD(suffix, size)   \
++uint##size##_t dma_ld##suffix(DMADevice *dev, dma_addr_t addr)\
++{ \
++int err;  \
++target_phys_addr_t paddr, plen;   \
++if (!dev-mmu) {  \
++return ld##suffix##_phys(addr, val);  \
++} \
++  \
++err = dev-mmu-translate(dev-bus-iommu, dev,   \
++  addr, paddr, plen, IOMMU_PERM_READ);  \
++if (err || (plen  size / 8)) \
++return 0; \
++  \
++return ld##suffix##_phys(paddr);  \
++}
++
++#define DEFINE_DMA_ST(suffix, size)   \
++void dma_st##suffix(DMADevice *dev, dma_addr_t addr, uint##size##_t val)  \
++{ \
++int err;  \
++target_phys_addr_t paddr, plen;   \
++  \
++if (!dev-mmu) {

[RFC PATCH 0/1] macvtap TX zero copy between guest and host kernel

2010-09-13 Thread Shirley Ma

This patch induces a new sock flag ZEROCOPY to avoid copy between
userspace and kernel. macvtap is the first user of zero copy between
guest and host kernel. It only uses when the lower device supports high
memory DMA. The first set of patch only addresses transmission TX side.
The test has shown big improvement on either CPU utilization reduction
or BW increase on 10GbE Intel NIC. Performance data will be submitted in
the coming email.

thanks
Shirley

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH RFC] dma_rw.h (was Re: [PATCH 0/7] AMD IOMMU emulation patchset v4)

2010-09-13 Thread Anthony Liguori


On 09/13/2010 03:01 PM, Michael S. Tsirkin wrote:

So I think the following will give the idea of what an API
might look like that will let us avoid the scary hacks in
e.g. the ide layer and other generic layers that need to do DMA,
without either binding us to pci, adding more complexity with
callbacks, or losing type safety with casts and void*.

Basically we have DMADevice that we can use container_of on
to get a PCIDevice from, and DMAMmu that will get instanciated
in a specific MMU.

This is not complete code - just a header - I might complete
this later if/when there's interest or hopefully someone interested
in iommu emulation will.

Notes:
the IOMMU_PERM_RW code seem unused, so I replaced
this with plain is_write. Is it ever useful?

It seems that invalidate callback should be able to
get away with just a device, so I switched to that
from a void pointer for type safety.
Seems enough for the users I saw.

I saw devices do stl_le_phys and such, these
might need to be wrapped as well.

Signed-off-by: Michael S. Tsirkinm...@redhat.com
   


One of the troubles with an interface like this is that I'm not sure a 
generic model universally works.


For instance, I know some PCI busses do transparent byte swapping.  For 
this to work, there has to be a notion of generic memory reads/writes 
vs. reads of a 32-bit, 16-bit, and 8-bit value.


With a generic API, we lose the flexibility to do this type of bus 
interface.


Regards,

Anthony Liguori


---

diff --git a/hw/dma_rw.h b/hw/dma_rw.h
new file mode 100644
index 000..d63fd17
--- /dev/null
+++ b/hw/dma_rw.h
@@ -0,0 +1,122 @@
+#ifndef DMA_RW_H
+#define DMA_RW_H
+
+#include qemu-common.h
+
+/* We currently only have pci mmus, but using
+   a generic type makes it possible to use this
+   e.g. from the generic ide code without callbacks. */
+typedef uint64_t dma_addr_t;
+
+typedef struct DMAMmu DMAMmu;
+typedef struct DMADevice DMADevice;
+
+typedef int DMATranslateFunc(DMAMmu *mmu,
+ DMADevice *dev,
+ dma_addr_t addr,
+ dma_addr_t *paddr,
+ dma_addr_t *len,
+ int is_write);
+
+typedef int DMAInvalidateMapFunc(DMADevice *);
+struct DMAMmu {
+   /* invalidate, etc. */
+   DmaTranslateFunc *translate;
+};
+
+struct DMADevice {
+   DMAMmu *mmu;
+   DMAInvalidateMapFunc *invalidate;
+};
+
+void dma_device_init(DMADevice *, DMAMmu *, DMAInvalidateMapFunc *);
+
+static inline void dma_memory_rw(DMADevice *dev,
+dma_addr_t addr,
+void *buf,
+uint32_t len,
+int is_write)
+{
+uint32_t plen;
+/* Fast-path non-iommu.
+ * More importantly, makes it obvious what this function does. */
+if (!dev-mmu) {
+   cpu_physical_memory_rw(paddr, buf, plen, is_write);
+   return;
+}
+while (len) {
+err = dev-mmu-translate(iommu, dev, addr,paddr,plen, is_write);
+if (err) {
+return;
+}
+
+/* The translation might be valid for larger regions. */
+if (plen  len) {
+plen = len;
+}
+
+cpu_physical_memory_rw(paddr, buf, plen, is_write);
+
+len -= plen;
+addr += plen;
+buf += plen;
+}
+}
+
+void *dma_memory_map(DMADevice *dev,
+dma_addr_t addr,
+uint32_t *len,
+int is_write);
+void dma_memory_unmap(DMADevice *dev,
+ void *buffer,
+ uint32_t len,
+ int is_write,
+ uint32_t access_len);
+
+
++#define DEFINE_DMA_LD(suffix, size)   \
++uint##size##_t dma_ld##suffix(DMADevice *dev, dma_addr_t addr)\
++{ \
++int err;  \
++target_phys_addr_t paddr, plen;   \
++if (!dev-mmu) {  \
++return ld##suffix##_phys(addr, val);  \
++} \
++  \
++err = dev-mmu-translate(dev-bus-iommu, dev,   \
++  addr,paddr,plen, IOMMU_PERM_READ);  \
++if (err || (plen  size / 8)) \
++return 0; \
++  \
++return ld##suffix##_phys(paddr);  \
++}
++
++#define DEFINE_DMA_ST(suffix, size)

RFC PATCH 1/2] macvtap: A new sock zero copy flag

2010-09-13 Thread Shirley Ma

/* Add a new flag to support sock zero copy from user space to kernel */

Signed-off-by: Shirley Ma x...@us.ibm.com
---

 include/net/sock.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index adab9dc..80172de 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -543,6 +543,7 @@ enum sock_flags {
SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
SOCK_FASYNC, /* fasync() active */
SOCK_RXQ_OVFL,
+   SOCK_ZEROCOPY, /* zerocopy from user space to kernel */
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel

2010-09-13 Thread Shirley Ma

Add zero copy feature between userspace and kernel in macvtap when lower device 
supports
high memory DMA.


Signed-off-by: Shirley Ma x...@us.ibm.com
---

 drivers/net/macvtap.c |  136 +
 1 files changed, 126 insertions(+), 10 deletions(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 3b1c54a..186cde1 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -274,6 +274,7 @@ static int macvtap_open(struct inode *inode, struct file 
*file)
struct net *net = current-nsproxy-net_ns;
struct net_device *dev = dev_get_by_index(net, iminor(inode));
struct macvtap_queue *q;
+   struct macvlan_dev *vlan = netdev_priv(dev);
int err;
 
err = -ENODEV;
@@ -302,6 +303,17 @@ static int macvtap_open(struct inode *inode, struct file 
*file)
q-flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
q-vnet_hdr_sz = sizeof(struct virtio_net_hdr);
 
+   /*
+* so far only VM uses macvtap, enable zero copy between guest
+* kernel and host kernel when lower device supports high memory
+* DMA
+*/
+   if (vlan) {
+   if ((vlan-lowerdev-features  NETIF_F_HIGHDMA) 
+   (vlan-lowerdev-features  NETIF_F_SG))
+   sock_set_flag(q-sk, SOCK_ZEROCOPY);
+   }
+
err = macvtap_set_queue(dev, file, q);
if (err)
sock_put(q-sk);
@@ -343,6 +355,24 @@ out:
return mask;
 }
 
+#define GOODCOPY_LEN  (L1_CACHE_BYTES  64 ? 64 : L1_CACHE_BYTES)
+
+static inline struct sk_buff *macvtap_alloc_skb_goodcopy(struct sock *sk,
+size_t prepad, size_t copy,
+int noblock, int *err)
+{
+   struct sk_buff *skb;
+
+   skb = sock_alloc_send_pskb(sk, prepad + copy, 0, noblock, err);
+   if (!skb)
+   return NULL;
+   skb_reserve(skb, prepad);
+   skb_put(skb, copy);
+
+   return skb;
+
+}
+
 static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
size_t len, size_t linear,
int noblock, int *err)
@@ -447,15 +477,91 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff 
*skb,
return 0;
 }
 
+/* set skb frags from iovec, this can move to core network code for reuse */
+static int set_sg_from_iovec_zerocopy(struct sk_buff *skb,
+ const struct iovec *from, int offset,
+ size_t count)
+{
+   int len = iov_length(from, count) - offset;
+   int copy = skb_headlen(skb);
+   int size, offset1 = 0;
+   int i = 0;
+   skb_frag_t *f;
+
+   /* Skip over from offset */
+   while (offset = from-iov_len) {
+   offset -= from-iov_len;
+   ++from;
+   --count;
+   }
+
+   /* copy up to skb headlen */
+   while (copy  0) {
+   size = min_t(unsigned int, copy, from-iov_len - offset);
+   if (copy_from_user(skb-data + offset1, from-iov_base + offset,
+  size))
+   return -EFAULT;
+   if (copy  size) {
+   ++from;
+   --count;
+   }
+   copy -= size;
+   offset1 += size;
+   offset = 0;
+   }
+
+   if (len == offset1)
+   return 0;
+
+   while (count--) {
+   struct page *page[MAX_SKB_FRAGS];
+   int num_pages;
+   unsigned long base;
+
+   len = from-iov_len - offset1;
+   if (!len) {
+   offset1 = 0;
+   ++from;
+   continue;
+   }
+   base = (unsigned long)from-iov_base + offset1;
+   size = ((base  ~PAGE_MASK) + len + ~PAGE_MASK)  PAGE_SHIFT;
+   num_pages = get_user_pages_fast(base, size, 0, page[i]);
+   if ((num_pages != size) ||
+   (num_pages  MAX_SKB_FRAGS - skb_shinfo(skb)-nr_frags))
+   /* put_page is in skb free */
+   return -EFAULT;
+   while (len) {
+   f = skb_shinfo(skb)-frags[i];
+   f-page = page[i];
+   f-page_offset = base  ~PAGE_MASK;
+   f-size = min_t(int, len, PAGE_SIZE - f-page_offset);
+   skb-data_len += f-size;
+   skb-len += f-size;
+   skb-truesize += f-size;
+   skb_shinfo(skb)-nr_frags++;
+   /* increase sk_wmem_alloc */
+   if (skb-sk  skb-destructor == sock_wfree)
+   atomic_add(f-size, skb-sk-sk_wmem_alloc);
+

Re: [PATCH] [RFC] Add support for a USB audio device model

2010-09-13 Thread Amos Kong

On Fri, Sep 10, 2010 at 02:47:56PM -0700, H. Peter Anvin wrote:
 I discovered that none of the audio device models supported by current
 Qemu/KVM appear to be supported out of the box on Win7 64 bit (AC97
 works fine on 32 bit).  The most logical ways to fix that would be to
 add a long-term supportable audio device model.  Intel HD Audio and
 USB Audio seemed like the most reasonable options, but I opted for USB
 Audio for a few reasons:

...
 diff --git a/configure b/configure
 index 8228c1c..4fcb829 100755
 --- a/configure
 +++ b/configure
 @@ -71,8 +71,8 @@ sparc_cpu=
  cross_prefix=
  cc=gcc
  audio_drv_list=
 -audio_card_list=ac97 es1370 sb16
 -audio_possible_cards=ac97 es1370 sb16 cs4231a adlib gus
 +audio_card_list=ac97 es1370 sb16 usb-audio
 +audio_possible_cards=ac97 es1370 sb16 cs4231a adlib gus usb-audio
  block_drv_whitelist=
  host_cc=gcc
  ar=ar
 @@ -2414,7 +2414,7 @@ if test $vde = yes ; then
  fi
  for card in $audio_card_list; do
  def=CONFIG_`echo $card | tr '[:lower:]' '[:upper:]'`
 -echo $def=y  $config_host_mak
 +echo ${def//-/_}=y  $config_host_mak
  done
  echo CONFIG_AUDIO_DRIVERS=$audio_drv_list  $config_host_mak
  for drv in $audio_drv_list; do

# patch -p1  /tmp/usb-audio.patch
# ./configure
...
...
preadv supportyes
fdatasync yes
uuid support  no
vhost-net support no
Trace backend nop
Trace output file trace-pid
./configure: 2276: Bad substitution


 diff --git a/create_config b/create_config
 index 0098e68..1caa25b 100755
 --- a/create_config
 +++ b/create_config
 @@ -25,7 +25,7 @@ case $line in
   CONFIG_AUDIO_DRIVERS=*)
  drivers=${line#*=}
  echo #define CONFIG_AUDIO_DRIVERS \\
 -for drv in $drivers; do
 +for drv in ${drivers//-/_}; do
echo ${drv}_audio_driver,\\
  done
  echo 
 @@ -39,10 +39,12 @@ case $line in
  ;;
   CONFIG_*=y) # configuration
  name=${line%=*}
 +name=${name//-/_}
  echo #define $name 1
  ;;
   CONFIG_*=*) # configuration
  name=${line%=*}
 +name=${name//-/_}
  value=${line#*=}
  echo #define $name $value
  ;;

...

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [RFC] Add support for a USB audio device model

2010-09-13 Thread H. Peter Anvin

On 09/13/2010 01:53 PM, Amos Kong wrote:
 
 # patch -p1  /tmp/usb-audio.patch
 # ./configure
 ...
 ...
 preadv supportyes
 fdatasync yes
 uuid support  no
 vhost-net support no
 Trace backend nop
 Trace output file trace-pid
 ./configure: 2276: Bad substitution
 

What shell is your /bin/sh?

-hpa
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [RFC] Add support for a USB audio device model

2010-09-13 Thread H. Peter Anvin

On 09/13/2010 01:53 PM, Amos Kong wrote:
 
 # patch -p1  /tmp/usb-audio.patch
 # ./configure
 ...
 ...
 preadv supportyes
 fdatasync yes
 uuid support  no
 vhost-net support no
 Trace backend nop
 Trace output file trace-pid
 ./configure: 2276: Bad substitution
 
 
 diff --git a/create_config b/create_config
 index 0098e68..1caa25b 100755
 --- a/create_config
 +++ b/create_config
 @@ -25,7 +25,7 @@ case $line in
   CONFIG_AUDIO_DRIVERS=*)
  drivers=${line#*=}
  echo #define CONFIG_AUDIO_DRIVERS \\
 -for drv in $drivers; do
 +for drv in ${drivers//-/_}; do
echo ${drv}_audio_driver,\\
  done
  echo 
 @@ -39,10 +39,12 @@ case $line in
  ;;
   CONFIG_*=y) # configuration
  name=${line%=*}
 +name=${name//-/_}
  echo #define $name 1
  ;;
   CONFIG_*=*) # configuration
  name=${line%=*}
 +name=${name//-/_}
  value=${line#*=}
  echo #define $name $value
  ;;

Looks like ${.../...} is a bashism.

One can replace it with:

name=`echo $name | tr '-' '_'`

and

for drv in `echo $drivers | tr '-' '_'`; do

-hpa
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH v9 00/16] Provide a zero-copy method on KVM virtio-net.

2010-09-13 Thread Shirley Ma

On Fri, 2010-09-03 at 13:52 +0300, Michael S. Tsirkin wrote:
  When binding vhost thread to cpu3, qemu I/O thread to cpu2, macvtap
 zero
  copy patch can get 9.4Gb/s. 
  
  TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
 192.168.10.74 (192.168.10.74) port 0 AF_INET : cpu bind
  Recv   SendSend  Utilization
 Service Demand
  Socket Socket  Message  Elapsed  Send Recv Send
 Recv
  Size   SizeSize Time Throughput  localremote   local
 remote
  bytes  bytes   bytessecs.10^6bits/s  % S  % S  us/KB
 us/KB
  
   87380  16384  6553660.00  9408.19   55.698.45 0.970
 0.589
  
  Shirley
 
 OTOH CPU utilization is up too.

w/i macvtap zero copy patch, the BW can reach link w/i more cpu usage,
w/o macvtap zero copy patch, the BW can't be up to link speed. To
achieve same BW, CPU utilization is lower when using zero copy.

Shirley

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM-AUTOTEST PATCH] tests/kvm: fix -net syntax for new qemu

2010-09-13 Thread Amos Kong

On Mon, Sep 13, 2010 at 07:43:22PM +0200, Michael S. Tsirkin wrote:
 netdev option in new qemu is mutually exclusive with vlan.
 Only pass vlan if netdev option is missing.
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com

This fix looks good for me.
Reviewed-by: Amos Kong ak...@redhat.com

BTW, we try to produce three kinds of cmdline, possible combinations:
1. Old way:   -net nic,model=e1000,vlan=1 -net tap,vlan=1
2. Semi-new:  -device e1000,vlan=1-net tap,vlan=1
3. Best way:  -netdev type=tap,id=netdev1 -device e1000,id=netdev1

If you think this is good, I'll sent a patch to upstream.
 
 ---
 
 diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py
 index bdc9aab..7e76ed5 100755
 --- a/client/tests/kvm/kvm_vm.py
 +++ b/client/tests/kvm/kvm_vm.py
 @@ -235,9 +235,10 @@ class VM:
  return cmd
  
  def add_nic(help, vlan, model=None, mac=None, netdev_id=None):
 -cmd =  -net nic,vlan=%d % vlan
  if has_option(help, netdev):
 -cmd +=,netdev=%s % netdev_id
 +cmd =  -net nic,netdev=%s % netdev_id
 +else:
 +cmd =  -net nic,vlan=%d % vlan
  if model: cmd += ,model=%s % model
  if mac: cmd += ,macaddr='%s' % mac
  return cmd
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [RFC] Add support for a USB audio device model

2010-09-13 Thread Amos Kong

On Mon, Sep 13, 2010 at 02:04:57PM -0700, H. Peter Anvin wrote:
 On 09/13/2010 01:53 PM, Amos Kong wrote:
  
  # patch -p1  /tmp/usb-audio.patch
  # ./configure
  ...
  ...
  preadv supportyes
  fdatasync yes
  uuid support  no
  vhost-net support no
  Trace backend nop
  Trace output file trace-pid
  ./configure: 2276: Bad substitution
  

Hello Peter,
 
 What shell is your /bin/sh?

dash,
It's fine when using bash, I suggest to use a common way.

I've heard wonderful music (guest:win7), but mixed with a litte noise, not so 
fluent.
The following debug msg is normal?

# ./x86_64-softmmu/qemu-system-x86_64 ~/win7-32.qcow2 -m 1024 -vnc :0 
-usbdevice tablet -cpu qemu64  -enable-kvm -bios pc-bios/bios.bin -net 
nic,netdev=idkQlbc8,macaddr=02:BB:3A:D3:b8:29 -netdev 
tap,id=idkQlbc8,ifname=virtio_0_8000,script=/etc/qemu-ifup-vbr0,downscript=no,vhost=on
 -snapshot -usb -soundhw usb 
usb_create: no bus specified, using usb.0 for usb-audio
usb-audio: reset
usb-audio: control transaction: request 0x0005 value 0x0001 index 0x length 
0x
usb-audio: control transaction: request 0x8006 value 0x0100 index 0x length 
0x0008
usb-audio: control transaction: request 0x8006 value 0x0200 index 0x length 
0x0009
usb-audio: control transaction: request 0x8006 value 0x0200 index 0x length 
0x0071
usb-audio: reset
usb-audio: reset
usb-audio: control transaction: request 0x8006 value 0x0100 index 0x length 
0x0040
usb-audio: reset
usb-audio: control transaction: request 0x0005 value 0x0001 index 0x length 
0x
usb-audio: control transaction: request 0x8006 value 0x0100 index 0x length 
0x0012
usb-audio: control transaction: request 0x8006 value 0x0200 index 0x length 
0x00ff
usb-audio: control transaction: request 0x8006 value 0x03ee index 0x length 
0x0012
usb-audio: control transaction: request 0x8006 value 0x0303 index 0x0409 length 
0x00ff
usb-audio: control transaction: request 0x8006 value 0x0300 index 0x length 
0x00ff
usb-audio: control transaction: request 0x8006 value 0x0302 index 0x0409 length 
0x00ff
usb-audio: control transaction: request 0x8006 value 0x0600 index 0x length 
0x000a
usb-audio: failed control transaction: request 0x8006 value 0x0600 index 0x 
length 0x000a
usb-audio: control transaction: request 0x8006 value 0x0100 index 0x length 
0x0012
usb-audio: control transaction: request 0x8006 value 0x0200 index 0x length 
0x0009
usb-audio: control transaction: request 0x8006 value 0x0200 index 0x length 
0x0071
usb-audio: control transaction: request 0x0009 value 0x0001 index 0x length 
0x
usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 
0x
usb-audio: control transaction: request 0x8006 value 0x0305 index 0x0409 length 
0x0004
usb-audio: control transaction: request 0x8006 value 0x0305 index 0x0409 length 
0x002c
usb-audio: control transaction: request 0x8006 value 0x0307 index 0x0409 length 
0x004a
usb-audio: control transaction: request 0x8006 value 0x0308 index 0x0409 length 
0x0004
usb-audio: control transaction: request 0x8006 value 0x0308 index 0x0409 length 
0x003e
usb-audio: control transaction: request 0xa181 value 0x0100 index 0x0200 length 
0x0001
usb-audio: control transaction: request 0xa181 value 0x0201 index 0x0200 length 
0x0002
usb-audio: control transaction: request 0xa182 value 0x0201 index 0x0200 length 
0x0002
usb-audio: control transaction: request 0xa183 value 0x0201 index 0x0200 length 
0x0002
usb-audio: control transaction: request 0xa184 value 0x0201 index 0x0200 length 
0x0002
usb-audio: control transaction: request 0xa181 value 0x0202 index 0x0200 length 
0x0002
usb-audio: control transaction: request 0xa182 value 0x0202 index 0x0200 length 
0x0002
usb-audio: control transaction: request 0xa183 value 0x0202 index 0x0200 length 
0x0002
usb-audio: control transaction: request 0xa184 value 0x0202 index 0x0200 length 
0x0002
usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 
0x
usb-audio: control transaction: request 0x010b value 0x0001 index 0x0001 length 
0x
usb-audio: set interface 1
usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 
0x
usb-audio: set interface 0
usb-audio: control transaction: request 0x010b value 0x0001 index 0x0001 length 
0x
usb-audio: set interface 1
usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 
0x
usb-audio: set interface 0
usb-audio: control transaction: request 0x010b value 0x0001 index 0x0001 length 
0x
usb-audio: set interface 1
usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 
0x
usb-audio: set interface 0
usb-audio: control transaction: request 0x010b value 0x0001 index 0x0001 length 
0x
usb-audio: set interface 1
usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 
0x
usb-audio: set interface 0
usb-audio: control transaction: request

Re: [KVM-AUTOTEST PATCH] tests/kvm: fix -net syntax for new qemu

2010-09-13 Thread Lucas Meneghel Rodrigues

On Mon, 2010-09-13 at 19:43 +0200, Michael S. Tsirkin wrote:
 netdev option in new qemu is mutually exclusive with vlan.
 Only pass vlan if netdev option is missing.

Looks good to me, applied, thanks!

http://autotest.kernel.org/changeset/4783

 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 
 ---
 
 diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py
 index bdc9aab..7e76ed5 100755
 --- a/client/tests/kvm/kvm_vm.py
 +++ b/client/tests/kvm/kvm_vm.py
 @@ -235,9 +235,10 @@ class VM:
  return cmd
  
  def add_nic(help, vlan, model=None, mac=None, netdev_id=None):
 -cmd =  -net nic,vlan=%d % vlan
  if has_option(help, netdev):
 -cmd +=,netdev=%s % netdev_id
 +cmd =  -net nic,netdev=%s % netdev_id
 +else:
 +cmd =  -net nic,vlan=%d % vlan
  if model: cmd += ,model=%s % model
  if mac: cmd += ,macaddr='%s' % mac
  return cmd
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel

2010-09-13 Thread David Miller

From: Shirley Ma mashi...@us.ibm.com
Date: Mon, 13 Sep 2010 13:48:03 -0700

 + base = (unsigned long)from-iov_base + offset1;
 + size = ((base  ~PAGE_MASK) + len + ~PAGE_MASK)  PAGE_SHIFT;
 + num_pages = get_user_pages_fast(base, size, 0, page[i]);
 + if ((num_pages != size) ||
 + (num_pages  MAX_SKB_FRAGS - skb_shinfo(skb)-nr_frags))
 + /* put_page is in skb free */
 + return -EFAULT;

What keeps the user from writing to these pages in it's address space
after the write call returns?

A write() return of success means:

I wrote what you gave to me

not

I wrote what you gave to me, oh and BTW don't touch these
 pages for a while.

In fact a while isn't even defined in any way, as there is no way
for the write() invoker to know when the networking card is done with
those pages.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH next 00/16] treewide: Use static const char * const where possible

2010-09-13 Thread Joe Perches

Convert the uses that should be const of
char *foo[] = {bar, baz};
to
const char * const foo[] = {bar, baz};
or
static const char * const foo[] = {bar, baz};

Joe Perches (16):
  arch/alpha: Use static const char * const where possible
  arch/ia64: Use static const char * const where possible
  arch/microblaze: Use static const char * const where possible
  arch/x86: Use static const char * const where possible
  drivers/gpu: Use static const char * const where possible
  drivers/isdn: Use static const char * const where possible
  drivers/net: Use static const char * const where possible
  drivers/net/pcmcia: Use static const char * const where possible
  drivers/net/wireless: Use static const char * const where possible
  drivers/scsi: Use static const char * const where possible
  drivers/staging: Use static const char * const where possible
  drivers/usb: Use static const char * const where possible
  drivers/watchdog: Use static const char * const where possible
  fs: Use static const char * const where possible
  net/irda: Use static const char * const where possible
  sound: Use static const char * const where possible

 arch/alpha/kernel/err_ev6.c  |   12 +---
 arch/alpha/kernel/err_marvel.c   |   33 
 arch/alpha/kernel/err_titan.c|   35 ++---
 arch/alpha/kernel/osf_sys.c  |4 +-
 arch/ia64/kernel/palinfo.c   |2 +-
 arch/microblaze/kernel/heartbeat.c   |   10 +++---
 arch/microblaze/kernel/timer.c   |   12 
 arch/x86/kernel/smpboot.c|2 +-
 arch/x86/kvm/mmu.c   |2 +-
 drivers/gpu/drm/ttm/ttm_page_alloc.c |4 ++-
 drivers/isdn/pcbit/edss1.c   |2 +-
 drivers/isdn/pcbit/edss1.h   |2 +-
 drivers/net/3c515.c  |4 ++-
 drivers/net/eth16i.c |4 ++-
 drivers/net/pcmcia/3c589_cs.c|2 +-
 drivers/net/wireless/rt2x00/rt2x00debug.c|2 +-
 drivers/scsi/bfa/rport.c |4 ++-
 drivers/scsi/pcmcia/nsp_debug.c  |2 +-
 drivers/scsi/qla2xxx/qla_nx.c|4 +-
 drivers/scsi/qla4xxx/ql4_nx.c|2 +-
 drivers/staging/ath6kl/os/linux/ar6000_drv.c |   14 +-
 drivers/staging/bcm/Debug.c  |5 ++-
 drivers/usb/host/oxu210hp-hcd.c  |2 +-
 drivers/watchdog/machzwd.c   |2 +-
 fs/binfmt_flat.c |4 ++-
 include/net/irda/irlan_event.h   |2 +-
 net/irda/irlan/irlan_event.c |2 +-
 sound/core/misc.c|5 +---
 sound/core/pcm_native.c  |2 +-
 29 files changed, 99 insertions(+), 83 deletions(-)

-- 
1.7.3.rc1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH next 04/16] arch/x86: Use static const char * const where possible

2010-09-13 Thread Joe Perches

Signed-off-by: Joe Perches j...@perches.com
---
 arch/x86/kernel/smpboot.c |2 +-
 arch/x86/kvm/mmu.c|2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 63a1a55..b745b30 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -498,7 +498,7 @@ static void impress_friends(void)
 void __inquire_remote_apic(int apicid)
 {
unsigned i, regs[] = { APIC_ID  4, APIC_LVR  4, APIC_SPIV  4 };
-   char *names[] = { ID, VERSION, SPIV };
+   const char * const names[] = { ID, VERSION, SPIV };
int timeout;
u32 status;
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d2dad65..2a7d567 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -56,7 +56,7 @@ enum {
AUDIT_POST_PTE_WRITE
 };
 
-char *audit_point_name[] = {
+const char * const audit_point_name[] = {
pre page fault,
post page fault,
pre pte write,
-- 
1.7.3.rc1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [RFC] Add support for a USB audio device model

2010-09-13 Thread H. Peter Anvin

On 09/13/2010 06:37 PM, Amos Kong wrote:
 
 Hello Peter,
  
 What shell is your /bin/sh?
 
 dash,
 It's fine when using bash, I suggest to use a common way.
 

Yes, I'll fix it.

 I've heard wonderful music (guest:win7), but mixed with a litte noise, not so 
 fluent.
 The following debug msg is normal?

Yes, all of that is normal.  I talked to malc earlier today, and I think
I have a pretty good idea for how to deal with the rate-matching issues;
I'm going to try to write it up tomorrow.

-hpa


-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

86 matches

Mail list logo