[COMMIT master] apic: test nmi-after-sti

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

While not required by the spec, some guests (Linux)
rely on nmi being blocked by an IF-enabling sti.  Add
a unit test for this condition.

Signed-off-by: Avi Kivity 
Signed-off-by: Marcelo Tosatti 

diff --git a/x86/apic.c b/x86/apic.c
index 165f820..2207040 100644
--- a/x86/apic.c
+++ b/x86/apic.c
@@ -1,6 +1,7 @@
 #include "libcflat.h"
 #include "apic.h"
 #include "vm.h"
+#include "smp.h"
 
 typedef struct {
 unsigned short offset0;
@@ -274,9 +275,74 @@ static void test_ioapic_simultaneous(void)
g_66 && g_78 && g_66_after_78 && g_66_rip == g_78_rip);
 }
 
+volatile int nmi_counter_private, nmi_counter, nmi_hlt_counter, 
sti_loop_active;
+
+void sti_nop(char *p)
+{
+asm volatile (
+ ".globl post_sti \n\t"
+ "sti \n"
+ /*
+  * vmx won't exit on external interrupt if blocked-by-sti,
+  * so give it a reason to exit by accessing an unmapped page.
+  */
+ "post_sti: testb $0, %0 \n\t"
+ "nop \n\t"
+ "cli"
+ : : "m"(*p)
+ );
+nmi_counter = nmi_counter_private;
+}
+
+static void sti_loop(void *ignore)
+{
+unsigned k = 0;
+
+while (sti_loop_active) {
+   sti_nop((char *)(ulong)((k++ * 4096) % (128 * 1024 * 1024)));
+}
+}
+
+static void nmi_handler(isr_regs_t *regs)
+{
+extern void post_sti(void);
+++nmi_counter_private;
+nmi_hlt_counter += regs->rip == (ulong)post_sti;
+}
+
+static void update_cr3(void *cr3)
+{
+write_cr3((ulong)cr3);
+}
+
+static void test_sti_nmi(void)
+{
+unsigned old_counter;
+
+if (cpu_count() < 2) {
+   return;
+}
+
+set_idt_entry(2, nmi_handler);
+on_cpu(1, update_cr3, (void *)read_cr3());
+
+sti_loop_active = 1;
+on_cpu_async(1, sti_loop, 0);
+while (nmi_counter < 3) {
+   old_counter = nmi_counter;
+   apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 1);
+   while (nmi_counter == old_counter) {
+   ;
+   }
+}
+sti_loop_active = 0;
+report("nmi-after-sti", nmi_hlt_counter == 0);
+}
+
 int main()
 {
 setup_vm();
+smp_init();
 
 test_lapic_existence();
 
@@ -288,6 +354,7 @@ int main()
 
 test_ioapic_intr();
 test_ioapic_simultaneous();
+test_sti_nmi();
 
 printf("\nsummary: %d tests, %d failures\n", g_tests, g_fail);
 
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] apic: use boot idt instead of a locally allocated idt

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

This allows the smp support, which uses the boot idt, to work.

Signed-off-by: Avi Kivity 
Signed-off-by: Marcelo Tosatti 

diff --git a/x86/apic.c b/x86/apic.c
index 48fa0f7..165f820 100644
--- a/x86/apic.c
+++ b/x86/apic.c
@@ -89,7 +89,7 @@ asm (
 #endif
 );
 
-static idt_entry_t idt[256];
+static idt_entry_t *idt = 0;
 
 static int g_fail;
 static int g_tests;
@@ -127,19 +127,6 @@ void test_enable_x2apic(void)
 }
 }
 
-static void init_idt(void)
-{
-struct {
-u16 limit;
-ulong idt;
-} __attribute__((packed)) idt_ptr = {
-sizeof(idt_entry_t) * 256 - 1,
-(ulong)&idt,
-};
-
-asm volatile("lidt %0" : : "m"(idt_ptr));
-}
-
 static void set_idt_entry(unsigned vec, void (*func)(isr_regs_t *regs))
 {
 u8 *thunk = vmalloc(50);
@@ -296,7 +283,6 @@ int main()
 mask_pic_interrupts();
 enable_apic();
 test_enable_x2apic();
-init_idt();
 
 test_self_ipi();
 
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: fast-path msi injection with irqfd

2010-11-23 Thread Avi Kivity
From: Michael S. Tsirkin 

Store irq routing table pointer in the irqfd object,
and use that to inject MSI directly without bouncing out to
a kernel thread.

While we touch this structure, rearrange irqfd fields to make fastpath
better packed for better cache utilization.

This also adds some comments about locking rules and rcu usage in code.

Some notes on the design:
- Use pointer into the rt instead of copying an entry,
  to make it possible to use rcu, thus side-stepping
  locking complexities.  We also save some memory this way.
- Old workqueue code is still used for level irqs.
  I don't think we DTRT with level anyway, however,
  it seems easier to keep the code around as
  it has been thought through and debugged, and fix level later than
  rip out and re-instate it later.

Signed-off-by: Michael S. Tsirkin 
Acked-by: Marcelo Tosatti 
Acked-by: Gregory Haskins 
Signed-off-by: Avi Kivity 

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4bd663d..f17beae 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -240,6 +241,10 @@ struct kvm {
 
struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
+   /*
+* Update side is protected by irq_lock and,
+* if configured, irqfds.lock.
+*/
struct kvm_irq_routing_table __rcu *irq_routing;
struct hlist_head mask_notifier_list;
struct hlist_head irq_ack_notifier_list;
@@ -511,6 +516,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic 
*ioapic,
   unsigned long *deliver_bitmask);
 #endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm 
*kvm,
+   int irq_source_id, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian);
@@ -652,17 +659,26 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) 
{}
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
 void kvm_irqfd_release(struct kvm *kvm);
+void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
 #else
 
 static inline void kvm_eventfd_init(struct kvm *kvm) {}
+
 static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
 {
return -EINVAL;
 }
 
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
+
+static inline void kvm_irq_routing_update(struct kvm *kvm,
+ struct kvm_irq_routing_table *irq_rt)
+{
+   rcu_assign_pointer(kvm->irq_routing, irq_rt);
+}
+
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
return -ENOSYS;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c1f1e3c..2ca4535 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -44,14 +44,19 @@
  */
 
 struct _irqfd {
-   struct kvm   *kvm;
-   struct eventfd_ctx   *eventfd;
-   int   gsi;
-   struct list_head  list;
-   poll_tablept;
-   wait_queue_t  wait;
-   struct work_structinject;
-   struct work_structshutdown;
+   /* Used for MSI fast-path */
+   struct kvm *kvm;
+   wait_queue_t wait;
+   /* Update side is protected by irqfds.lock */
+   struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
+   /* Used for level IRQ fast-path */
+   int gsi;
+   struct work_struct inject;
+   /* Used for setup/shutdown */
+   struct eventfd_ctx *eventfd;
+   struct list_head list;
+   poll_table pt;
+   struct work_struct shutdown;
 };
 
 static struct workqueue_struct *irqfd_cleanup_wq;
@@ -125,14 +130,22 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, 
void *key)
 {
struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
unsigned long flags = (unsigned long)key;
+   struct kvm_kernel_irq_routing_entry *irq;
+   struct kvm *kvm = irqfd->kvm;
 
-   if (flags & POLLIN)
+   if (flags & POLLIN) {
+   rcu_read_lock();
+   irq = rcu_dereference(irqfd->irq_entry);
/* An event has been signaled, inject an interrupt */
-   schedule_work(&irqfd->inject);
+   if (irq)
+   kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1);
+   else
+   schedule_work(&irqfd->inject);
+   rcu_read_unlock();
+   }
 
if (flags & POLLHUP) {
/* The eventfd is closing, detach from KVM */
-   struct kvm *kvm = irqfd->kvm;
unsigned long flags;
 
  

[COMMIT master] KVM: Add instruction-set-specific exit qualifications to kvm_exit trace

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

The exit reason alone is insufficient to understand exactly why an exit
occured; add ISA-specific trace parameters for additional information.

Because fetching these parameters is expensive on vmx, and because these
parameters are fetched even if tracing is disabled, we fetch the
parameters via a callback instead of as traditional trace arguments.

Signed-off-by: Avi Kivity 

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b04c0fa..54e42c8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -594,6 +594,7 @@ struct kvm_x86_ops {
 
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
+   void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
const struct trace_print_flags *exit_reasons_str;
 };
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b83954e..2fd2f4d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2974,6 +2974,14 @@ void dump_vmcb(struct kvm_vcpu *vcpu)
 
 }
 
+static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
+{
+   struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+
+   *info1 = control->exit_info_1;
+   *info2 = control->exit_info_2;
+}
+
 static int handle_exit(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3678,7 +3686,9 @@ static struct kvm_x86_ops svm_x86_ops = {
.get_tdp_level = get_npt_level,
.get_mt_mask = svm_get_mt_mask,
 
+   .get_exit_info = svm_get_exit_info,
.exit_reasons_str = svm_exit_reasons_str,
+
.get_lpage_level = svm_get_lpage_level,
 
.cpuid_update = svm_cpuid_update,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 1061022..1357d7c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -192,18 +192,22 @@ TRACE_EVENT(kvm_exit,
__field(unsigned int,   exit_reason )
__field(unsigned long,  guest_rip   )
__field(u32,isa )
+   __field(u64,info1   )
+   __field(u64,info2   )
),
 
TP_fast_assign(
__entry->exit_reason= exit_reason;
__entry->guest_rip  = kvm_rip_read(vcpu);
__entry->isa= isa;
+   kvm_x86_ops->get_exit_info(vcpu, &__entry->info1,
+  &__entry->info2);
),
 
-   TP_printk("reason %s rip 0x%lx",
+   TP_printk("reason %s rip 0x%lx info %llx %llx",
 ftrace_print_symbols_seq(p, __entry->exit_reason,
  kvm_x86_ops->exit_reasons_str),
-__entry->guest_rip)
+__entry->guest_rip, __entry->info1, __entry->info2)
 );
 
 /*
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4e2b8f3..caa967e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3690,6 +3690,12 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu 
*vcpu) = {
 static const int kvm_vmx_max_exit_handlers =
ARRAY_SIZE(kvm_vmx_exit_handlers);
 
+static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
+{
+   *info1 = vmcs_readl(EXIT_QUALIFICATION);
+   *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
+}
+
 /*
  * The guest has exited.  See if we can fix it or if we need userspace
  * assistance.
@@ -4339,7 +4345,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.get_tdp_level = get_ept_level,
.get_mt_mask = vmx_get_mt_mask,
 
+   .get_exit_info = vmx_get_exit_info,
.exit_reasons_str = vmx_exit_reasons_str,
+
.get_lpage_level = vmx_get_lpage_level,
 
.cpuid_update = vmx_cpuid_update,
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Record instruction set in kvm_exit tracepoint

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

exit_reason's meaning depend on the instruction set; record it so a trace
taken on one machine can be interpreted on another.

Signed-off-by: Avi Kivity 

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c6a7798..b83954e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2980,7 +2980,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
u32 exit_code = svm->vmcb->control.exit_code;
 
-   trace_kvm_exit(exit_code, vcpu);
+   trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
 
if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a6544b8..1061022 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -178,21 +178,26 @@ TRACE_EVENT(kvm_apic,
 #define trace_kvm_apic_read(reg, val)  trace_kvm_apic(0, reg, val)
 #define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val)
 
+#define KVM_ISA_VMX   1
+#define KVM_ISA_SVM   2
+
 /*
  * Tracepoint for kvm guest exit:
  */
 TRACE_EVENT(kvm_exit,
-   TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu),
-   TP_ARGS(exit_reason, vcpu),
+   TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa),
+   TP_ARGS(exit_reason, vcpu, isa),
 
TP_STRUCT__entry(
__field(unsigned int,   exit_reason )
__field(unsigned long,  guest_rip   )
+   __field(u32,isa )
),
 
TP_fast_assign(
__entry->exit_reason= exit_reason;
__entry->guest_rip  = kvm_rip_read(vcpu);
+   __entry->isa= isa;
),
 
TP_printk("reason %s rip 0x%lx",
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 58e5913..4e2b8f3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3700,7 +3700,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
u32 exit_reason = vmx->exit_reason;
u32 vectoring_info = vmx->idt_vectoring_info;
 
-   trace_kvm_exit(exit_reason, vcpu);
+   trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
 
/* If guest state is invalid, start emulating */
if (vmx->emulation_required && emulate_invalid_guest_state)
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git/

2010-11-23 Thread Avi Kivity
From: Marcelo Tosatti 

Conflicts:
arch/x86/kvm/svm.c
kernel/sched.c

Signed-off-by: Marcelo Tosatti 
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: VMX: Fold __vmx_vcpu_run() into vmx_vcpu_run()

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

cea15c2 ("KVM: Move KVM context switch into own function") split vmx_vcpu_run()
to prevent multiple copies of the context switch from being generated (causing
problems due to a label).  This patch folds them back together again and adds
the __noclone attribute to prevent the label from being duplicated.

Signed-off-by: Avi Kivity 

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a9ad174..58e5913 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3904,17 +3904,33 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
 #define Q "l"
 #endif
 
-/*
- * We put this into a separate noinline function to prevent the compiler
- * from duplicating the code. This is needed because this code
- * uses non local labels that cannot be duplicated.
- * Do not put any flow control into this function.
- * Better would be to put this whole monstrosity into a .S file.
- */
-static void noinline do_vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
-   asm volatile(
+
+   /* Record the guest's net vcpu time for enforced NMI injections. */
+   if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
+   vmx->entry_time = ktime_get();
+
+   /* Don't enter VMX if guest state is invalid, let the exit handler
+  start emulation until we arrive back to a valid state */
+   if (vmx->emulation_required && emulate_invalid_guest_state)
+   return;
+
+   if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
+   vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
+   if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
+   vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+
+   /* When single-stepping over STI and MOV SS, we must clear the
+* corresponding interruptibility bits in the guest state. Otherwise
+* vmentry fails as it then expects bit 14 (BS) in pending debug
+* exceptions being set, but that's not correct for the guest debugging
+* case. */
+   if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+   vmx_set_interrupt_shadow(vcpu, 0);
+
+   asm(
/* Store host registers */
"push %%"R"dx; push %%"R"bp;"
"push %%"R"cx \n\t"
@@ -4009,35 +4025,6 @@ static void noinline do_vmx_vcpu_run(struct kvm_vcpu 
*vcpu)
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
 #endif
  );
-}
-
-static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
-{
-   struct vcpu_vmx *vmx = to_vmx(vcpu);
-
-   /* Record the guest's net vcpu time for enforced NMI injections. */
-   if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
-   vmx->entry_time = ktime_get();
-
-   /* Don't enter VMX if guest state is invalid, let the exit handler
-  start emulation until we arrive back to a valid state */
-   if (vmx->emulation_required && emulate_invalid_guest_state)
-   return;
-
-   if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
-   vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
-   if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
-   vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
-
-   /* When single-stepping over STI and MOV SS, we must clear the
-* corresponding interruptibility bits in the guest state. Otherwise
-* vmentry fails as it then expects bit 14 (BS) in pending debug
-* exceptions being set, but that's not correct for the guest debugging
-* case. */
-   if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-   vmx_set_interrupt_shadow(vcpu, 0);
-
-   do_vmx_vcpu_run(vcpu);
 
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
  | (1 << VCPU_EXREG_PDPTR));
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: x86 emulator: drop DPRINTF()

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

Failed emulation is reported via a tracepoint; the cmps printk is pointless.

Signed-off-by: Avi Kivity 
Signed-off-by: Marcelo Tosatti 

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ffd6e01..3325b47 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -22,7 +22,6 @@
 
 #include 
 #include "kvm_cache_regs.h"
-#define DPRINTF(x...) do {} while (0)
 #include 
 #include 
 
@@ -2796,10 +2795,8 @@ done_prefixes:
c->execute = opcode.u.execute;
 
/* Unrecognised? */
-   if (c->d == 0 || (c->d & Undefined)) {
-   DPRINTF("Cannot emulate %02x\n", c->b);
+   if (c->d == 0 || (c->d & Undefined))
return -1;
-   }
 
if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
c->op_bytes = 8;
@@ -3261,7 +3258,6 @@ special_insn:
break;
case 0xa6 ... 0xa7: /* cmps */
c->dst.type = OP_NONE; /* Disable writeback. */
-   DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.addr.mem, 
c->dst.addr.mem);
goto cmp;
case 0xa8 ... 0xa9: /* test ax, imm */
goto test;
@@ -3778,6 +3774,5 @@ twobyte_insn:
goto writeback;
 
 cannot_emulate:
-   DPRINTF("Cannot emulate %02x\n", c->b);
return -1;
 }
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: x86 emulator: do not perform address calculations on linear addresses

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

Linear addresses are supposed to already have segment checks performed on them;
if we play with these addresses the checks become invalid.

Signed-off-by: Avi Kivity 
Signed-off-by: Marcelo Tosatti 

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e967055..bdbbb18 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -568,7 +568,8 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
   ctxt->vcpu, NULL);
if (rc != X86EMUL_CONTINUE)
return rc;
-   rc = ops->read_std(linear(ctxt, addr) + 2, address, op_bytes,
+   addr.ea += 2;
+   rc = ops->read_std(linear(ctxt, addr), address, op_bytes,
   ctxt->vcpu, NULL);
return rc;
 }
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: x86 emulator: preserve an operand's segment identity

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

Currently the x86 emulator converts the segment register associated with
an operand into a segment base which is added into the operand address.
This loss of information results in us not doing segment limit checks properly.

Replace struct operand's addr.mem field by a segmented_address structure
which holds both the effetive address and segment.  This will allow us to
do the limit check at the point of access.

Signed-off-by: Avi Kivity 
Signed-off-by: Marcelo Tosatti 

diff --git a/arch/x86/include/asm/kvm_emulate.h 
b/arch/x86/include/asm/kvm_emulate.h
index b36c6b3..b48c133 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -159,7 +159,10 @@ struct operand {
};
union {
unsigned long *reg;
-   unsigned long mem;
+   struct segmented_address {
+   ulong ea;
+   unsigned seg;
+   } mem;
} addr;
union {
unsigned long val;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 3325b47..e967055 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -410,9 +410,9 @@ address_mask(struct decode_cache *c, unsigned long reg)
 }
 
 static inline unsigned long
-register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
+register_address(struct decode_cache *c, unsigned long reg)
 {
-   return base + address_mask(c, reg);
+   return address_mask(c, reg);
 }
 
 static inline void
@@ -444,26 +444,26 @@ static unsigned long seg_base(struct x86_emulate_ctxt 
*ctxt,
return ops->get_cached_segment_base(seg, ctxt->vcpu);
 }
 
-static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
-  struct x86_emulate_ops *ops,
-  struct decode_cache *c)
+static unsigned seg_override(struct x86_emulate_ctxt *ctxt,
+struct x86_emulate_ops *ops,
+struct decode_cache *c)
 {
if (!c->has_seg_override)
return 0;
 
-   return seg_base(ctxt, ops, c->seg_override);
+   return c->seg_override;
 }
 
-static unsigned long es_base(struct x86_emulate_ctxt *ctxt,
-struct x86_emulate_ops *ops)
+static ulong linear(struct x86_emulate_ctxt *ctxt,
+   struct segmented_address addr)
 {
-   return seg_base(ctxt, ops, VCPU_SREG_ES);
-}
+   struct decode_cache *c = &ctxt->decode;
+   ulong la;
 
-static unsigned long ss_base(struct x86_emulate_ctxt *ctxt,
-struct x86_emulate_ops *ops)
-{
-   return seg_base(ctxt, ops, VCPU_SREG_SS);
+   la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea;
+   if (c->ad_bytes != 8)
+   la &= (u32)-1;
+   return la;
 }
 
 static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
@@ -556,7 +556,7 @@ static void *decode_register(u8 modrm_reg, unsigned long 
*regs,
 
 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
   struct x86_emulate_ops *ops,
-  ulong addr,
+  struct segmented_address addr,
   u16 *size, unsigned long *address, int op_bytes)
 {
int rc;
@@ -564,10 +564,12 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
if (op_bytes == 2)
op_bytes = 3;
*address = 0;
-   rc = ops->read_std(addr, (unsigned long *)size, 2, ctxt->vcpu, NULL);
+   rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2,
+  ctxt->vcpu, NULL);
if (rc != X86EMUL_CONTINUE)
return rc;
-   rc = ops->read_std(addr + 2, address, op_bytes, ctxt->vcpu, NULL);
+   rc = ops->read_std(linear(ctxt, addr) + 2, address, op_bytes,
+  ctxt->vcpu, NULL);
return rc;
 }
 
@@ -760,7 +762,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
break;
}
}
-   op->addr.mem = modrm_ea;
+   op->addr.mem.ea = modrm_ea;
 done:
return rc;
 }
@@ -775,13 +777,13 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt,
op->type = OP_MEM;
switch (c->ad_bytes) {
case 2:
-   op->addr.mem = insn_fetch(u16, 2, c->eip);
+   op->addr.mem.ea = insn_fetch(u16, 2, c->eip);
break;
case 4:
-   op->addr.mem = insn_fetch(u32, 4, c->eip);
+   op->addr.mem.ea = insn_fetch(u32, 4, c->eip);
break;
case 8:
-   op->addr.mem = insn_fetch(u64, 8, c->eip);
+   op->addr.mem.ea = insn_fetch(u64, 8, c->eip);
break;
}
 done:
@@ -800,7 +802,7 @@ static void fetch_bit_operand(struct decode_cache *c)
else if (c->src.bytes == 4)
sv = (s3

[COMMIT master] KVM: x86 emulator: drop unused #ifndef __KERNEL__

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

Signed-off-by: Avi Kivity 
Signed-off-by: Marcelo Tosatti 

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 38b6e8d..ffd6e01 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -20,16 +20,9 @@
  * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
  */
 
-#ifndef __KERNEL__
-#include 
-#include 
-#include 
-#define DPRINTF(_f, _a ...) printf(_f , ## _a)
-#else
 #include 
 #include "kvm_cache_regs.h"
 #define DPRINTF(x...) do {} while (0)
-#endif
 #include 
 #include 
 
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: rename hardware_[dis|en]able() to *_nolock() and add locking wrappers

2010-11-23 Thread Avi Kivity
From: Takuya Yoshikawa 

The naming convension of hardware_[dis|en]able family is little bit confusing
because only hardware_[dis|en]able_all are using _nolock suffix.

Renaming current hardware_[dis|en]able() to *_nolock() and using
hardware_[dis|en]able() as wrapper functions which take kvm_lock for them
reduces extra confusion.

Signed-off-by: Takuya Yoshikawa 
Signed-off-by: Marcelo Tosatti 

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0fdd911..fb93ff9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2067,7 +2067,7 @@ static struct miscdevice kvm_dev = {
&kvm_chardev_ops,
 };
 
-static void hardware_enable(void *junk)
+static void hardware_enable_nolock(void *junk)
 {
int cpu = raw_smp_processor_id();
int r;
@@ -2087,7 +2087,14 @@ static void hardware_enable(void *junk)
}
 }
 
-static void hardware_disable(void *junk)
+static void hardware_enable(void *junk)
+{
+   spin_lock(&kvm_lock);
+   hardware_enable_nolock(junk);
+   spin_unlock(&kvm_lock);
+}
+
+static void hardware_disable_nolock(void *junk)
 {
int cpu = raw_smp_processor_id();
 
@@ -2097,13 +2104,20 @@ static void hardware_disable(void *junk)
kvm_arch_hardware_disable(NULL);
 }
 
+static void hardware_disable(void *junk)
+{
+   spin_lock(&kvm_lock);
+   hardware_disable_nolock(junk);
+   spin_unlock(&kvm_lock);
+}
+
 static void hardware_disable_all_nolock(void)
 {
BUG_ON(!kvm_usage_count);
 
kvm_usage_count--;
if (!kvm_usage_count)
-   on_each_cpu(hardware_disable, NULL, 1);
+   on_each_cpu(hardware_disable_nolock, NULL, 1);
 }
 
 static void hardware_disable_all(void)
@@ -2122,7 +2136,7 @@ static int hardware_enable_all(void)
kvm_usage_count++;
if (kvm_usage_count == 1) {
atomic_set(&hardware_enable_failed, 0);
-   on_each_cpu(hardware_enable, NULL, 1);
+   on_each_cpu(hardware_enable_nolock, NULL, 1);
 
if (atomic_read(&hardware_enable_failed)) {
hardware_disable_all_nolock();
@@ -2148,16 +2162,12 @@ static int kvm_cpu_hotplug(struct notifier_block 
*notifier, unsigned long val,
case CPU_DYING:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
   cpu);
-   spin_lock(&kvm_lock);
hardware_disable(NULL);
-   spin_unlock(&kvm_lock);
break;
case CPU_STARTING:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
   cpu);
-   spin_lock(&kvm_lock);
hardware_enable(NULL);
-   spin_unlock(&kvm_lock);
break;
}
return NOTIFY_OK;
@@ -2188,7 +2198,7 @@ static int kvm_reboot(struct notifier_block *notifier, 
unsigned long val,
 */
printk(KERN_INFO "kvm: exiting hardware virtualization\n");
kvm_rebooting = true;
-   on_each_cpu(hardware_disable, NULL, 1);
+   on_each_cpu(hardware_disable_nolock, NULL, 1);
return NOTIFY_OK;
 }
 
@@ -2358,7 +2368,7 @@ static void kvm_exit_debug(void)
 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
 {
if (kvm_usage_count)
-   hardware_disable(NULL);
+   hardware_disable_nolock(NULL);
return 0;
 }
 
@@ -2366,7 +2376,7 @@ static int kvm_resume(struct sys_device *dev)
 {
if (kvm_usage_count) {
WARN_ON(spin_is_locked(&kvm_lock));
-   hardware_enable(NULL);
+   hardware_enable_nolock(NULL);
}
return 0;
 }
@@ -2543,7 +2553,7 @@ void kvm_exit(void)
sysdev_class_unregister(&kvm_sysdev_class);
unregister_reboot_notifier(&kvm_reboot_notifier);
unregister_cpu_notifier(&kvm_cpu_notifier);
-   on_each_cpu(hardware_disable, NULL, 1);
+   on_each_cpu(hardware_disable_nolock, NULL, 1);
kvm_arch_hardware_unsetup();
kvm_arch_exit();
free_cpumask_var(cpus_hardware_enabled);
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: VMX: Inform user about INTEL_TXT dependency

2010-11-23 Thread Avi Kivity
From: Shane Wang 

Inform user to either disable TXT in the BIOS or do TXT launch
with tboot before enabling KVM since some BIOSes do not set
FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX bit when TXT is enabled.

Signed-off-by: Shane Wang 
Signed-off-by: Marcelo Tosatti 

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0badeac..a9ad174 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1305,8 +1305,11 @@ static __init int vmx_disabled_by_bios(void)
&& tboot_enabled())
return 1;
if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
-   && !tboot_enabled())
+   && !tboot_enabled()) {
+   printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
+   " activate TXT before enabling KVM\n");
return 1;
+   }
}
 
return 0;
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Document device assigment API

2010-11-23 Thread Avi Kivity
From: Jan Kiszka 

Adds API documentation for KVM_[DE]ASSIGN_PCI_DEVICE,
KVM_[DE]ASSIGN_DEV_IRQ, KVM_SET_GSI_ROUTING, KVM_ASSIGN_SET_MSIX_NR, and
KVM_ASSIGN_SET_MSIX_ENTRY.

Acked-by: Alex Williamson 
Acked-by: Michael S. Tsirkin 
Signed-off-by: Jan Kiszka 
Signed-off-by: Marcelo Tosatti 

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index b336266..e1a9297 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -1085,6 +1085,184 @@ of 4 instructions that make up a hypercall.
 If any additional field gets added to this structure later on, a bit for that
 additional piece of information will be set in the flags bitmap.
 
+4.47 KVM_ASSIGN_PCI_DEVICE
+
+Capability: KVM_CAP_DEVICE_ASSIGNMENT
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_pci_dev (in)
+Returns: 0 on success, -1 on error
+
+Assigns a host PCI device to the VM.
+
+struct kvm_assigned_pci_dev {
+   __u32 assigned_dev_id;
+   __u32 busnr;
+   __u32 devfn;
+   __u32 flags;
+   __u32 segnr;
+   union {
+   __u32 reserved[11];
+   };
+};
+
+The PCI device is specified by the triple segnr, busnr, and devfn.
+Identification in succeeding service requests is done via assigned_dev_id. The
+following flags are specified:
+
+/* Depends on KVM_CAP_IOMMU */
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU(1 << 0)
+
+4.48 KVM_DEASSIGN_PCI_DEVICE
+
+Capability: KVM_CAP_DEVICE_DEASSIGNMENT
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_pci_dev (in)
+Returns: 0 on success, -1 on error
+
+Ends PCI device assignment, releasing all associated resources.
+
+See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
+used in kvm_assigned_pci_dev to identify the device.
+
+4.49 KVM_ASSIGN_DEV_IRQ
+
+Capability: KVM_CAP_ASSIGN_DEV_IRQ
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_irq (in)
+Returns: 0 on success, -1 on error
+
+Assigns an IRQ to a passed-through device.
+
+struct kvm_assigned_irq {
+   __u32 assigned_dev_id;
+   __u32 host_irq;
+   __u32 guest_irq;
+   __u32 flags;
+   union {
+   struct {
+   __u32 addr_lo;
+   __u32 addr_hi;
+   __u32 data;
+   } guest_msi;
+   __u32 reserved[12];
+   };
+};
+
+The following flags are defined:
+
+#define KVM_DEV_IRQ_HOST_INTX(1 << 0)
+#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
+#define KVM_DEV_IRQ_HOST_MSIX(1 << 2)
+
+#define KVM_DEV_IRQ_GUEST_INTX   (1 << 8)
+#define KVM_DEV_IRQ_GUEST_MSI(1 << 9)
+#define KVM_DEV_IRQ_GUEST_MSIX   (1 << 10)
+
+It is not valid to specify multiple types per host or guest IRQ. However, the
+IRQ type of host and guest can differ or can even be null.
+
+4.50 KVM_DEASSIGN_DEV_IRQ
+
+Capability: KVM_CAP_ASSIGN_DEV_IRQ
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_irq (in)
+Returns: 0 on success, -1 on error
+
+Ends an IRQ assignment to a passed-through device.
+
+See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
+by assigned_dev_id, flags must correspond to the IRQ type specified on
+KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
+
+4.51 KVM_SET_GSI_ROUTING
+
+Capability: KVM_CAP_IRQ_ROUTING
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_irq_routing (in)
+Returns: 0 on success, -1 on error
+
+Sets the GSI routing table entries, overwriting any previously set entries.
+
+struct kvm_irq_routing {
+   __u32 nr;
+   __u32 flags;
+   struct kvm_irq_routing_entry entries[0];
+};
+
+No flags are specified so far, the corresponding field must be set to zero.
+
+struct kvm_irq_routing_entry {
+   __u32 gsi;
+   __u32 type;
+   __u32 flags;
+   __u32 pad;
+   union {
+   struct kvm_irq_routing_irqchip irqchip;
+   struct kvm_irq_routing_msi msi;
+   __u32 pad[8];
+   } u;
+};
+
+/* gsi routing entry types */
+#define KVM_IRQ_ROUTING_IRQCHIP 1
+#define KVM_IRQ_ROUTING_MSI 2
+
+No flags are specified so far, the corresponding field must be set to zero.
+
+struct kvm_irq_routing_irqchip {
+   __u32 irqchip;
+   __u32 pin;
+};
+
+struct kvm_irq_routing_msi {
+   __u32 address_lo;
+   __u32 address_hi;
+   __u32 data;
+   __u32 pad;
+};
+
+4.52 KVM_ASSIGN_SET_MSIX_NR
+
+Capability: KVM_CAP_DEVICE_MSIX
+Architectures: x86 ia64
+Type: vm ioctl
+Parameters: struct kvm_assigned_msix_nr (in)
+Returns: 0 on success, -1 on error
+
+Set the number of MSI-X interrupts for an assigned device. This service can
+only be called once in the lifetime of an assigned device.
+
+struct kvm_assigned_msix_nr {
+   __u32 assigned_dev_id;
+   __u16 entry_nr;
+   __u16 padding;
+};
+
+#define KVM_MAX_MSIX_PER_DEV   256
+
+4.53 KVM_ASSIGN_SET_MSIX_ENTRY
+
+Capability: KVM_CAP_DEVICE_MSIX
+Architectures: x86 ia64
+Type: v

[COMMIT master] KVM: MMU: don't mark spte notrap if reserved bit set

2010-11-23 Thread Avi Kivity
From: Xiao Guangrong 

If reserved bit is set, we need inject the #PF with PFEC.RSVD=1,
but shadow_notrap_nonpresent_pte injects #PF with PFEC.RSVD=0 only

Signed-off-by: Xiao Guangrong 
Signed-off-by: Marcelo Tosatti 

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ba00eef..590bf12 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -395,8 +395,10 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, 
struct guest_walker *gw,
 
gpte = gptep[i];
 
-   if (!is_present_gpte(gpte) ||
- is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) {
+   if (is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL))
+   continue;
+
+   if (!is_present_gpte(gpte)) {
if (!sp->unsync)
__set_spte(spte, shadow_notrap_nonpresent_pte);
continue;
@@ -760,6 +762,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *sp,
pt_element_t gpte;
gpa_t pte_gpa;
gfn_t gfn;
+   bool rsvd_bits_set;
 
if (!is_shadow_present_pte(sp->spt[i]))
continue;
@@ -771,12 +774,14 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *sp,
return -EINVAL;
 
gfn = gpte_to_gfn(gpte);
-   if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)
- || gfn != sp->gfns[i] || !is_present_gpte(gpte)
- || !(gpte & PT_ACCESSED_MASK)) {
+   rsvd_bits_set = is_rsvd_bits_set(&vcpu->arch.mmu, gpte,
+PT_PAGE_TABLE_LEVEL);
+   if (rsvd_bits_set || gfn != sp->gfns[i] ||
+ !is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) {
u64 nonpresent;
 
-   if (is_present_gpte(gpte) || !clear_unsync)
+   if (rsvd_bits_set || is_present_gpte(gpte) ||
+ !clear_unsync)
nonpresent = shadow_trap_nonpresent_pte;
else
nonpresent = shadow_notrap_nonpresent_pte;
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: take kvm_lock for hardware_disable() during cpu hotplug

2010-11-23 Thread Avi Kivity
From: Takuya Yoshikawa 

In kvm_cpu_hotplug(), only CPU_STARTING case is protected by kvm_lock.
This patch adds missing protection for CPU_DYING case.

Signed-off-by: Takuya Yoshikawa 
Signed-off-by: Marcelo Tosatti 

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 339dd43..0fdd911 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2148,7 +2148,9 @@ static int kvm_cpu_hotplug(struct notifier_block 
*notifier, unsigned long val,
case CPU_DYING:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
   cpu);
+   spin_lock(&kvm_lock);
hardware_disable(NULL);
+   spin_unlock(&kvm_lock);
break;
case CPU_STARTING:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Refactor IRQ names of assigned devices

2010-11-23 Thread Avi Kivity
From: Jan Kiszka 

Cosmetic change, but it helps to correlate IRQs with PCI devices.

Acked-by: Alex Williamson 
Acked-by: Michael S. Tsirkin 
Signed-off-by: Jan Kiszka 
Signed-off-by: Marcelo Tosatti 

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9fe7fef..4bd663d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -489,6 +489,7 @@ struct kvm_assigned_dev_kernel {
struct pci_dev *dev;
struct kvm *kvm;
spinlock_t intx_lock;
+   char irq_name[32];
 };
 
 struct kvm_irq_mask_notifier {
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 1d77ce1..7623408 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -231,8 +231,7 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
 * are going to be long delays in accepting, acking, etc.
 */
if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
-IRQF_ONESHOT, "kvm_assigned_intx_device",
-(void *)dev))
+IRQF_ONESHOT, dev->irq_name, (void *)dev))
return -EIO;
return 0;
 }
@@ -251,7 +250,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
 
dev->host_irq = dev->dev->irq;
if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
-0, "kvm_assigned_msi_device", (void *)dev)) {
+0, dev->irq_name, (void *)dev)) {
pci_disable_msi(dev->dev);
return -EIO;
}
@@ -278,8 +277,7 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
for (i = 0; i < dev->entries_nr; i++) {
r = request_threaded_irq(dev->host_msix_entries[i].vector,
 NULL, kvm_assigned_dev_thread,
-0, "kvm_assigned_msix_device",
-(void *)dev);
+0, dev->irq_name, (void *)dev);
if (r)
goto err;
}
@@ -336,6 +334,9 @@ static int assign_host_irq(struct kvm *kvm,
if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
return r;
 
+   snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
+pci_name(dev->dev));
+
switch (host_irq_type) {
case KVM_DEV_IRQ_HOST_INTX:
r = assigned_device_enable_host_intx(kvm, dev);
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Save/restore state of assigned PCI device

2010-11-23 Thread Avi Kivity
From: Jan Kiszka 

The guest may change states that pci_reset_function does not touch. So
we better save/restore the assigned device across guest usage.

Acked-by: Alex Williamson 
Acked-by: Michael S. Tsirkin 
Signed-off-by: Jan Kiszka 
Signed-off-by: Marcelo Tosatti 

diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 7623408..d389207 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -197,7 +197,8 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 {
kvm_free_assigned_irq(kvm, assigned_dev);
 
-   pci_reset_function(assigned_dev->dev);
+   __pci_reset_function(assigned_dev->dev);
+   pci_restore_state(assigned_dev->dev);
 
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
@@ -514,6 +515,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
}
 
pci_reset_function(dev);
+   pci_save_state(dev);
 
match->assigned_dev_id = assigned_dev->assigned_dev_id;
match->host_segnr = assigned_dev->segnr;
@@ -544,6 +546,7 @@ out:
mutex_unlock(&kvm->lock);
return r;
 out_list_del:
+   pci_restore_state(dev);
list_del(&match->list);
pci_release_regions(dev);
 out_disable:
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Clean up kvm_vm_ioctl_assigned_device

2010-11-23 Thread Avi Kivity
From: Jan Kiszka 

Any arch not supporting device assigment will also not build
assigned-dev.c. So testing for KVM_CAP_DEVICE_DEASSIGNMENT is pointless.
KVM_CAP_ASSIGN_DEV_IRQ is unconditinally set. Moreover, add a default
case for dispatching the ioctl.

Acked-by: Alex Williamson 
Acked-by: Michael S. Tsirkin 
Signed-off-by: Jan Kiszka 
Signed-off-by: Marcelo Tosatti 

diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index d389207..ae72ae6 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -674,7 +674,7 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned 
ioctl,
  unsigned long arg)
 {
void __user *argp = (void __user *)arg;
-   int r = -ENOTTY;
+   int r;
 
switch (ioctl) {
case KVM_ASSIGN_PCI_DEVICE: {
@@ -692,7 +692,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned 
ioctl,
r = -EOPNOTSUPP;
break;
}
-#ifdef KVM_CAP_ASSIGN_DEV_IRQ
case KVM_ASSIGN_DEV_IRQ: {
struct kvm_assigned_irq assigned_irq;
 
@@ -715,8 +714,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned 
ioctl,
goto out;
break;
}
-#endif
-#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
case KVM_DEASSIGN_PCI_DEVICE: {
struct kvm_assigned_pci_dev assigned_dev;
 
@@ -728,7 +725,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned 
ioctl,
goto out;
break;
}
-#endif
 #ifdef KVM_CAP_IRQ_ROUTING
case KVM_SET_GSI_ROUTING: {
struct kvm_irq_routing routing;
@@ -781,6 +777,9 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned 
ioctl,
break;
}
 #endif
+   default:
+   r = -ENOTTY;
+   break;
}
 out:
return r;
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: VMX: Fix host userspace gsbase corruption

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

We now use load_gs_index() to load gs safely; unfortunately this also
changes MSR_KERNEL_GS_BASE, which we managed separately.  This resulted
in confusion and breakage running 32-bit host userspace on a 64-bit kernel.

Fix by
- saving guest MSR_KERNEL_GS_BASE before we we reload the host's gs
- doing the host save/load unconditionally, instead of only when in guest
  long mode

Things can be cleaned up further, but this is the minmal fix for now.

Signed-off-by: Avi Kivity 
Signed-off-by: Marcelo Tosatti 

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9367abc..0badeac 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -821,10 +821,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 #endif
 
 #ifdef CONFIG_X86_64
-   if (is_long_mode(&vmx->vcpu)) {
-   rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+   rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+   if (is_long_mode(&vmx->vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
-   }
 #endif
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
@@ -839,11 +838,14 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 
++vmx->vcpu.stat.host_state_reload;
vmx->host_state.loaded = 0;
+#ifdef CONFIG_X86_64
+   if (is_long_mode(&vmx->vcpu))
+   rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+#endif
if (vmx->host_state.gs_ldt_reload_needed) {
kvm_load_ldt(vmx->host_state.ldt_sel);
 #ifdef CONFIG_X86_64
load_gs_index(vmx->host_state.gs_sel);
-   wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
 #else
loadsegment(gs, vmx->host_state.gs_sel);
 #endif
@@ -852,10 +854,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
loadsegment(fs, vmx->host_state.fs_sel);
reload_tss();
 #ifdef CONFIG_X86_64
-   if (is_long_mode(&vmx->vcpu)) {
-   rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
-   wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
-   }
+   wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
 #endif
if (current_thread_info()->status & TS_USEDFPU)
clts();
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Clear assigned guest IRQ on release

2010-11-23 Thread Avi Kivity
From: Jan Kiszka 

When we deassign a guest IRQ, clear the potentially asserted guest line.
There might be no chance for the guest to do this, specifically if we
switch from INTx to MSI mode.

Acked-by: Alex Williamson 
Acked-by: Michael S. Tsirkin 
Signed-off-by: Jan Kiszka 
Signed-off-by: Marcelo Tosatti 

diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 7c98928..ecc4419 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -141,6 +141,9 @@ static void deassign_guest_irq(struct kvm *kvm,
kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
assigned_dev->ack_notifier.gsi = -1;
 
+   kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+   assigned_dev->guest_irq, 0);
+
if (assigned_dev->irq_source_id != -1)
kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
assigned_dev->irq_source_id = -1;
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Switch assigned device IRQ forwarding to threaded handler

2010-11-23 Thread Avi Kivity
From: Jan Kiszka 

This improves the IRQ forwarding for assigned devices: By using the
kernel's threaded IRQ scheme, we can get rid of the latency-prone work
queue and simplify the code in the same run.

Moreover, we no longer have to hold assigned_dev_lock while raising the
guest IRQ, which can be a lenghty operation as we may have to iterate
over all VCPUs. The lock is now only used for synchronizing masking vs.
unmasking of INTx-type IRQs, thus is renames to intx_lock.

Acked-by: Alex Williamson 
Acked-by: Michael S. Tsirkin 
Signed-off-by: Jan Kiszka 
Signed-off-by: Marcelo Tosatti 

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2d63f2c..9fe7fef 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -470,16 +470,8 @@ struct kvm_irq_ack_notifier {
void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
-#define KVM_ASSIGNED_MSIX_PENDING  0x1
-struct kvm_guest_msix_entry {
-   u32 vector;
-   u16 entry;
-   u16 flags;
-};
-
 struct kvm_assigned_dev_kernel {
struct kvm_irq_ack_notifier ack_notifier;
-   struct work_struct interrupt_work;
struct list_head list;
int assigned_dev_id;
int host_segnr;
@@ -490,13 +482,13 @@ struct kvm_assigned_dev_kernel {
bool host_irq_disabled;
struct msix_entry *host_msix_entries;
int guest_irq;
-   struct kvm_guest_msix_entry *guest_msix_entries;
+   struct msix_entry *guest_msix_entries;
unsigned long irq_requested_type;
int irq_source_id;
int flags;
struct pci_dev *dev;
struct kvm *kvm;
-   spinlock_t assigned_dev_lock;
+   spinlock_t intx_lock;
 };
 
 struct kvm_irq_mask_notifier {
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index ecc4419..1d77ce1 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -55,58 +55,31 @@ static int find_index_from_host_irq(struct 
kvm_assigned_dev_kernel
return index;
 }
 
-static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
+static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id)
 {
-   struct kvm_assigned_dev_kernel *assigned_dev;
-   int i;
+   struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+   u32 vector;
+   int index;
 
-   assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
-   interrupt_work);
+   if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) {
+   spin_lock(&assigned_dev->intx_lock);
+   disable_irq_nosync(irq);
+   assigned_dev->host_irq_disabled = true;
+   spin_unlock(&assigned_dev->intx_lock);
+   }
 
-   spin_lock_irq(&assigned_dev->assigned_dev_lock);
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
-   struct kvm_guest_msix_entry *guest_entries =
-   assigned_dev->guest_msix_entries;
-   for (i = 0; i < assigned_dev->entries_nr; i++) {
-   if (!(guest_entries[i].flags &
-   KVM_ASSIGNED_MSIX_PENDING))
-   continue;
-   guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
+   index = find_index_from_host_irq(assigned_dev, irq);
+   if (index >= 0) {
+   vector = assigned_dev->
+   guest_msix_entries[index].vector;
kvm_set_irq(assigned_dev->kvm,
-   assigned_dev->irq_source_id,
-   guest_entries[i].vector, 1);
+   assigned_dev->irq_source_id, vector, 1);
}
} else
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
assigned_dev->guest_irq, 1);
 
-   spin_unlock_irq(&assigned_dev->assigned_dev_lock);
-}
-
-static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
-{
-   unsigned long flags;
-   struct kvm_assigned_dev_kernel *assigned_dev =
-   (struct kvm_assigned_dev_kernel *) dev_id;
-
-   spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
-   if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
-   int index = find_index_from_host_irq(assigned_dev, irq);
-   if (index < 0)
-   goto out;
-   assigned_dev->guest_msix_entries[index].flags |=
-   KVM_ASSIGNED_MSIX_PENDING;
-   }
-
-   schedule_work(&assigned_dev->interrupt_work);
-
-   if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
-   disable_irq_nosync(irq);
-   assigned_dev->host_irq_disabled = true;
-   }
-
-out:
-   spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
return IRQ_HANDLED;
 }
 
@@ -114,7 +8

[COMMIT master] KVM: SVM: Replace svm_has() by standard Linux cpuid accessors

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

Instead of querying cpuid directly, use the Linux accessors (boot_cpu_has,
etc.).  This allows the things like the clearcpuid kernel command line to
work (when it's fixed wrt scattered cpuid bits).

Acked-by: Joerg Roedel 
Signed-off-by: Avi Kivity 
Signed-off-by: Marcelo Tosatti 

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1a5757a..c6a7798 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -271,11 +271,6 @@ static u32 svm_msrpm_offset(u32 msr)
 
 #define MAX_INST_SIZE 15
 
-static inline u32 svm_has(u32 feat)
-{
-   return svm_features & feat;
-}
-
 static inline void clgi(void)
 {
asm volatile (__ex(SVM_CLGI));
@@ -381,7 +376,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, 
unsigned nr,
nested_svm_check_exception(svm, nr, has_error_code, error_code))
return;
 
-   if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) {
+   if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
 
/*
@@ -677,7 +672,7 @@ static __init int svm_hardware_setup(void)
 
svm_features = cpuid_edx(SVM_CPUID_FUNC);
 
-   if (!svm_has(SVM_FEATURE_NPT))
+   if (!boot_cpu_has(X86_FEATURE_NPT))
npt_enabled = false;
 
if (npt_enabled && !npt) {
@@ -876,7 +871,7 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->nested.vmcb = 0;
svm->vcpu.arch.hflags = 0;
 
-   if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
+   if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
control->pause_filter_count = 3000;
control->intercept |= (1ULL << INTERCEPT_PAUSE);
}
@@ -2743,7 +2738,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned 
ecx, u64 data)
svm->vmcb->save.sysenter_esp = data;
break;
case MSR_IA32_DEBUGCTLMSR:
-   if (!svm_has(SVM_FEATURE_LBRV)) {
+   if (!boot_cpu_has(X86_FEATURE_LBRV)) {
pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
__func__, data);
break;
@@ -3527,7 +3522,7 @@ static void svm_set_supported_cpuid(u32 func, struct 
kvm_cpuid_entry2 *entry)
   additional features */
 
/* Support next_rip if host supports it */
-   if (svm_has(SVM_FEATURE_NRIP))
+   if (boot_cpu_has(X86_FEATURE_NRIPS))
entry->edx |= SVM_FEATURE_NRIP;
 
/* Support NPT for the guest if enabled */
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Mask KVM_GET_SUPPORTED_CPUID data with Linux cpuid info

2010-11-23 Thread Avi Kivity
From: Avi Kivity 

This allows Linux to mask cpuid bits if, for example, nx is enabled on only
some cpus.

Signed-off-by: Avi Kivity 
Signed-off-by: Marcelo Tosatti 

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 003a0ca..410d2d1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2232,6 +2232,11 @@ out:
return r;
 }
 
+static void cpuid_mask(u32 *word, int wordnum)
+{
+   *word &= boot_cpu_data.x86_capability[wordnum];
+}
+
 static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
   u32 index)
 {
@@ -2306,7 +2311,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, 
u32 function,
break;
case 1:
entry->edx &= kvm_supported_word0_x86_features;
+   cpuid_mask(&entry->edx, 0);
entry->ecx &= kvm_supported_word4_x86_features;
+   cpuid_mask(&entry->ecx, 4);
/* we support x2apic emulation even if host does not support
 * it since we emulate x2apic in software */
entry->ecx |= F(X2APIC);
@@ -2397,7 +2404,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, 
u32 function,
break;
case 0x8001:
entry->edx &= kvm_supported_word1_x86_features;
+   cpuid_mask(&entry->edx, 1);
entry->ecx &= kvm_supported_word6_x86_features;
+   cpuid_mask(&entry->ecx, 6);
break;
}
 
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] device-assignment: Register as un-migratable

2010-11-23 Thread Avi Kivity
From: Alex Williamson 

Use register_device_unmigratable() to declare ourselves as
non-migratable.

Signed-off-by: Alex Williamson 
Signed-off-by: Marcelo Tosatti 

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 5f5bde1..c2a7b27 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1434,6 +1434,10 @@ static void 
assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
 dev->msix_table_page = NULL;
 }
 
+static const VMStateDescription vmstate_assigned_device = {
+.name = "pci-assign"
+};
+
 static int assigned_initfn(struct PCIDevice *pci_dev)
 {
 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
@@ -1495,6 +1499,12 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
 
 assigned_dev_load_option_rom(dev);
 QLIST_INSERT_HEAD(&devs, dev, next);
+
+/* Register a vmsd so that we can mark it unmigratable. */
+vmstate_register(&dev->dev.qdev, 0, &vmstate_assigned_device, dev);
+register_device_unmigratable(&dev->dev.qdev,
+ vmstate_assigned_device.name, dev);
+
 return 0;
 
 assigned_out:
@@ -1508,6 +1518,7 @@ static int assigned_exitfn(struct PCIDevice *pci_dev)
 {
 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
 
+vmstate_unregister(&dev->dev.qdev, &vmstate_assigned_device, dev);
 QLIST_REMOVE(dev, next);
 deassign_device(dev);
 free_assigned_device(dev);
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] make-release: fix mtime for a wider range of git versions

2010-11-23 Thread Avi Kivity
From: Bernhard Kohl 

With the latest git versions, e.g. 1.7.2.3, git still prints out
the tag info in addition to the requested format. So let's simply
fetch the first line from the output.

In addition I use the --pretty option instead of --format which
is not recognized in very old git versions, e.g. 1.5.5.6.

Tested with git versions 1.5.5.6 and 1.7.2.3.

Signed-off-by: Bernhard Kohl 
Signed-off-by: Marcelo Tosatti 

diff --git a/kvm/scripts/make-release b/kvm/scripts/make-release
index 56302c3..2d050fc 100755
--- a/kvm/scripts/make-release
+++ b/kvm/scripts/make-release
@@ -51,7 +51,7 @@ cd "$(dirname "$0")"/../..
 mkdir -p "$(dirname "$tarball")"
 git archive --prefix="$name/" --format=tar "$commit" > "$tarball"
 
-mtime=`git show --format=%ct "$commit""^{commit}" --`
+mtime=`git show --pretty=format:%ct "$commit""^{commit}" -- | head -n 1`
 tarargs="--owner=root --group=root"
 
 mkdir -p "$tmpdir/$name"
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] device-assignment: register a reset function

2010-11-23 Thread Avi Kivity
From: Bernhard Kohl 

This is necessary because during reboot of a VM the assigned devices
continue DMA transfers which causes memory corruption.

Acked-by: Alex Williamson 
Acked-by: Jan Kiszka 
Signed-off-by: Thomas Ostler 
Signed-off-by: Bernhard Kohl 
Signed-off-by: Marcelo Tosatti 

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index c2a7b27..369bff9 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1438,6 +1438,17 @@ static const VMStateDescription vmstate_assigned_device 
= {
 .name = "pci-assign"
 };
 
+static void reset_assigned_device(DeviceState *dev)
+{
+PCIDevice *d = DO_UPCAST(PCIDevice, qdev, dev);
+
+/*
+ * When a 0 is written to the command register, the device is logically
+ * disconnected from the PCI bus. This avoids further DMA transfers.
+ */
+assigned_dev_pci_write_config(d, PCI_COMMAND, 0, 2);
+}
+
 static int assigned_initfn(struct PCIDevice *pci_dev)
 {
 AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
@@ -1555,6 +1566,7 @@ static PCIDeviceInfo assign_info = {
 .qdev.name= "pci-assign",
 .qdev.desc= "pass through host pci devices to the guest",
 .qdev.size= sizeof(AssignedDevice),
+.qdev.reset   = reset_assigned_device,
 .init = assigned_initfn,
 .exit = assigned_exitfn,
 .config_read  = assigned_dev_pci_read_config,
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html