[COMMIT master] Fix broken indentation in kvm_main_loop_cpu()

2009-06-15 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 0f7adcc..eefca61 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1914,12 +1914,12 @@ static int kvm_main_loop_cpu(CPUState *env)
kvm_main_loop_wait(env, 1000);
if (env-interrupt_request  (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI))
env-halted = 0;
-if (!kvm_irqchip_in_kernel(kvm_context)) {
+if (!kvm_irqchip_in_kernel(kvm_context)) {
if (env-kvm_cpu_state.init)
update_regs_for_init(env);
if (env-kvm_cpu_state.sipi_needed)
update_regs_for_sipi(env);
-}
+}
if (!env-halted  !env-kvm_cpu_state.init)
kvm_cpu_exec(env);
env-exit_request = 0;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Return to userspace on emulation failure

2009-06-15 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

Instead of mindlessly retrying to execute the instruction, report the
failure to userspace.

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3f5dc35..7e9b29d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2649,8 +2649,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, 
u32 error_code)
++vcpu-stat.mmio_exits;
return 0;
case EMULATE_FAIL:
-   kvm_report_emulation_failure(vcpu, pagetable);
-   return 1;
+   vcpu-run-exit_reason = KVM_EXIT_INTERNAL_ERROR;
+   vcpu-run-internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+   return 0;
default:
BUG();
}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c7611ef..38ff31e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -95,6 +95,10 @@ struct kvm_pit_config {
 #define KVM_EXIT_S390_RESET   14
 #define KVM_EXIT_DCR  15
 #define KVM_EXIT_NMI  16
+#define KVM_EXIT_INTERNAL_ERROR   17
+
+/* For KVM_EXIT_INTERNAL_ERROR */
+#define KVM_INTERNAL_ERROR_EMULATION 1
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
@@ -181,6 +185,9 @@ struct kvm_run {
__u32 data;
__u8  is_write;
} dcr;
+   struct {
+   __u32 suberror;
+   } internal;
/* Fix the size of the union. */
char padding[256];
};
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: MMU: introduce is_last_spte helper

2009-06-15 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

Hiding some of the last largepage / level interaction (which is useful
for gbpages and for zero based levels).

Also merge the PT_PAGE_TABLE_LEVEL clearing loop in unlink_children.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7e9b29d..0178e8b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -250,6 +250,15 @@ static int is_rmap_spte(u64 pte)
return is_shadow_present_pte(pte);
 }
 
+static int is_last_spte(u64 pte, int level)
+{
+   if (level == PT_PAGE_TABLE_LEVEL)
+   return 1;
+   if (level == PT_DIRECTORY_LEVEL  is_large_pte(pte))
+   return 1;
+   return 0;
+}
+
 static pfn_t spte_to_pfn(u64 pte)
 {
return (pte  PT64_BASE_ADDR_MASK)  PAGE_SHIFT;
@@ -1293,25 +1302,17 @@ static void kvm_mmu_page_unlink_children(struct kvm 
*kvm,
 
pt = sp-spt;
 
-   if (sp-role.level == PT_PAGE_TABLE_LEVEL) {
-   for (i = 0; i  PT64_ENT_PER_PAGE; ++i) {
-   if (is_shadow_present_pte(pt[i]))
-   rmap_remove(kvm, pt[i]);
-   pt[i] = shadow_trap_nonpresent_pte;
-   }
-   return;
-   }
-
for (i = 0; i  PT64_ENT_PER_PAGE; ++i) {
ent = pt[i];
 
if (is_shadow_present_pte(ent)) {
-   if (!is_large_pte(ent)) {
+   if (!is_last_spte(ent, sp-role.level)) {
ent = PT64_BASE_ADDR_MASK;
mmu_page_remove_parent_pte(page_header(ent),
   pt[i]);
} else {
-   --kvm-stat.lpages;
+   if (is_large_pte(ent))
+   --kvm-stat.lpages;
rmap_remove(kvm, pt[i]);
}
}
@@ -2357,8 +2358,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
 
pte = *spte;
if (is_shadow_present_pte(pte)) {
-   if (sp-role.level == PT_PAGE_TABLE_LEVEL ||
-   is_large_pte(pte))
+   if (is_last_spte(pte, sp-role.level))
rmap_remove(vcpu-kvm, spte);
else {
child = page_header(pte  PT64_BASE_ADDR_MASK);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: MMU audit: update audit_write_protection

2009-06-15 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

- Unsync pages contain writable sptes in the rmap.
- rmaps do not exclusively contain writable sptes anymore.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a58f8d8..1679620 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3236,20 +3236,28 @@ static void audit_write_protection(struct kvm_vcpu 
*vcpu)
struct kvm_mmu_page *sp;
struct kvm_memory_slot *slot;
unsigned long *rmapp;
+   u64 *spte;
gfn_t gfn;
 
list_for_each_entry(sp, vcpu-kvm-arch.active_mmu_pages, link) {
if (sp-role.direct)
continue;
+   if (sp-unsync)
+   continue;
 
gfn = unalias_gfn(vcpu-kvm, sp-gfn);
slot = gfn_to_memslot_unaliased(vcpu-kvm, sp-gfn);
rmapp = slot-rmap[gfn - slot-base_gfn];
-   if (*rmapp)
-   printk(KERN_ERR %s: (%s) shadow page has writable
-   mappings: gfn %lx role %x\n,
+
+   spte = rmap_next(vcpu-kvm, rmapp, NULL);
+   while (spte) {
+   if (*spte  PT_WRITABLE_MASK)
+   printk(KERN_ERR %s: (%s) shadow page has 
+   writable mappings: gfn %lx role %x\n,
   __func__, audit_msg, sp-gfn,
   sp-role.word);
+   spte = rmap_next(vcpu-kvm, rmapp, spte);
+   }
}
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: MMU audit: nontrapping ptes in nonleaf level

2009-06-15 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

It is valid to set non leaf sptes as notrap.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1679620..1c1b002 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3085,12 +3085,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, 
u64 page_pte,
 
va = canonicalize(va);
if (level  1) {
-   if (ent == shadow_notrap_nonpresent_pte)
-   printk(KERN_ERR audit: (%s) nontrapping pte
-   in nonleaf level: levels %d gva %lx
-   level %d pte %llx\n, audit_msg,
-  vcpu-arch.mmu.root_level, va, level, 
ent);
-   else
+   if (is_shadow_present_pte(ent))
audit_mappings_page(vcpu, ent, va, level - 1);
} else {
gpa_t gpa = vcpu-arch.mmu.gva_to_gpa(vcpu, va);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: MMU audit: update count_writable_mappings / count_rmaps

2009-06-15 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

Under testing, count_writable_mappings returns a value that is 2 integers
larger than what count_rmaps returns.

Suspicion is that either of the two functions is counting a duplicate (either
positively or negatively).

Modifying check_writable_mappings_rmap to check for rmap existance on
all present MMU pages fails to trigger an error, which should keep Avi
happy.

Also introduce mmu_spte_walk to invoke a callback on all present sptes visible
to the current vcpu, might be useful in the future.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0178e8b..a58f8d8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3021,6 +3021,55 @@ static gva_t canonicalize(gva_t gva)
return gva;
 }
 
+
+typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp,
+u64 *sptep);
+
+static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp,
+   inspect_spte_fn fn)
+{
+   int i;
+
+   for (i = 0; i  PT64_ENT_PER_PAGE; ++i) {
+   u64 ent = sp-spt[i];
+
+   if (is_shadow_present_pte(ent)) {
+   if (sp-role.level  1  !is_large_pte(ent)) {
+   struct kvm_mmu_page *child;
+   child = page_header(ent  PT64_BASE_ADDR_MASK);
+   __mmu_spte_walk(kvm, child, fn);
+   }
+   if (sp-role.level == 1)
+   fn(kvm, sp, sp-spt[i]);
+   }
+   }
+}
+
+static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
+{
+   int i;
+   struct kvm_mmu_page *sp;
+
+   if (!VALID_PAGE(vcpu-arch.mmu.root_hpa))
+   return;
+   if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+   hpa_t root = vcpu-arch.mmu.root_hpa;
+   sp = page_header(root);
+   __mmu_spte_walk(vcpu-kvm, sp, fn);
+   return;
+   }
+   for (i = 0; i  4; ++i) {
+   hpa_t root = vcpu-arch.mmu.pae_root[i];
+
+   if (root  VALID_PAGE(root)) {
+   root = PT64_BASE_ADDR_MASK;
+   sp = page_header(root);
+   __mmu_spte_walk(vcpu-kvm, sp, fn);
+   }
+   }
+   return;
+}
+
 static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
gva_t va, int level)
 {
@@ -3113,9 +3162,47 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
return nmaps;
 }
 
-static int count_writable_mappings(struct kvm_vcpu *vcpu)
+void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 
*sptep)
+{
+   unsigned long *rmapp;
+   struct kvm_mmu_page *rev_sp;
+   gfn_t gfn;
+
+   if (*sptep  PT_WRITABLE_MASK) {
+   rev_sp = page_header(__pa(sptep));
+   gfn = rev_sp-gfns[sptep - rev_sp-spt];
+
+   if (!gfn_to_memslot(kvm, gfn)) {
+   if (!printk_ratelimit())
+   return;
+   printk(KERN_ERR %s: no memslot for gfn %ld\n,
+audit_msg, gfn);
+   printk(KERN_ERR %s: index %ld of sp (gfn=%lx)\n,
+   audit_msg, sptep - rev_sp-spt,
+   rev_sp-gfn);
+   dump_stack();
+   return;
+   }
+
+   rmapp = gfn_to_rmap(kvm, rev_sp-gfns[sptep - rev_sp-spt], 0);
+   if (!*rmapp) {
+   if (!printk_ratelimit())
+   return;
+   printk(KERN_ERR %s: no rmap for writable spte %llx\n,
+audit_msg, *sptep);
+   dump_stack();
+   }
+   }
+
+}
+
+void audit_writable_sptes_have_rmaps(struct kvm_vcpu *vcpu)
+{
+   mmu_spte_walk(vcpu, inspect_spte_has_rmap);
+}
+
+static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu)
 {
-   int nmaps = 0;
struct kvm_mmu_page *sp;
int i;
 
@@ -3132,20 +3219,16 @@ static int count_writable_mappings(struct kvm_vcpu 
*vcpu)
continue;
if (!(ent  PT_WRITABLE_MASK))
continue;
-   ++nmaps;
+   inspect_spte_has_rmap(vcpu-kvm, sp, pt[i]);
}
}
-   return nmaps;
+   return;
 }
 
 static void audit_rmap(struct kvm_vcpu *vcpu)
 {
-   int n_rmap = count_rmaps(vcpu);
-   int n_actual = count_writable_mappings(vcpu);
-
-   if (n_rmap != n_actual)
-   printk(KERN_ERR %s: (%s) rmap %d actual %d\n,
-  __func__, audit_msg, n_rmap, 

[COMMIT master] KVM: Replace pending exception by PF if it happens serially

2009-06-15 Thread Avi Kivity
From: Gleb Natapov g...@redhat.com

Replace previous exception with a new one in a hope that instruction
re-execution will regenerate lost exception.

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8dae08d..6025e5b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -181,16 +181,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, 
unsigned long addr,
++vcpu-stat.pf_guest;
 
if (vcpu-arch.exception.pending) {
-   if (vcpu-arch.exception.nr == PF_VECTOR) {
-   printk(KERN_DEBUG kvm: inject_page_fault:
-double fault 0x%lx\n, addr);
-   vcpu-arch.exception.nr = DF_VECTOR;
-   vcpu-arch.exception.error_code = 0;
-   } else if (vcpu-arch.exception.nr == DF_VECTOR) {
+   switch(vcpu-arch.exception.nr) {
+   case DF_VECTOR:
/* triple fault - shutdown */
set_bit(KVM_REQ_TRIPLE_FAULT, vcpu-requests);
+   return;
+   case PF_VECTOR:
+   vcpu-arch.exception.nr = DF_VECTOR;
+   vcpu-arch.exception.error_code = 0;
+   return;
+   default:
+   /* replace previous exception with a new one in a hope
+  that instruction re-execution will regenerate lost
+  exception */
+   vcpu-arch.exception.pending = false;
+   break;
}
-   return;
}
vcpu-arch.cr2 = addr;
kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Move performance counter MSR access interception to generic x86 path

2009-06-15 Thread Avi Kivity
From: Andre Przywara andre.przyw...@amd.com

The performance counter MSRs are different for AMD and Intel CPUs and they
are chosen mainly by the CPUID vendor string. This patch catches writes to
all addresses (regardless of VMX/SVM path) and handles them in the generic
MSR handler routine. Writing a 0 into the event select register is something
we perfectly emulate ;-), so don't print out a warning to dmesg in this
case.
This fixes booting a 64bit Windows guest with an AMD CPUID on an Intel host.

Signed-off-by: Andre Przywara andre.przyw...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 13f6f7d..cdbd4d1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2143,22 +2143,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned 
ecx, u64 data)
else
svm_disable_lbrv(svm);
break;
-   case MSR_K7_EVNTSEL0:
-   case MSR_K7_EVNTSEL1:
-   case MSR_K7_EVNTSEL2:
-   case MSR_K7_EVNTSEL3:
-   case MSR_K7_PERFCTR0:
-   case MSR_K7_PERFCTR1:
-   case MSR_K7_PERFCTR2:
-   case MSR_K7_PERFCTR3:
-   /*
-* Just discard all writes to the performance counters; this
-* should keep both older linux and windows 64-bit guests
-* happy
-*/
-   pr_unimpl(vcpu, unimplemented perfctr wrmsr: 0x%x data 
0x%llx\n, ecx, data);
-
-   break;
case MSR_VM_HSAVE_PA:
svm-hsave_msr = data;
break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b949db0..dc502b9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1025,18 +1025,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 
msr_index, u64 data)
rdtscll(host_tsc);
guest_write_tsc(data, host_tsc);
break;
-   case MSR_P6_PERFCTR0:
-   case MSR_P6_PERFCTR1:
-   case MSR_P6_EVNTSEL0:
-   case MSR_P6_EVNTSEL1:
-   /*
-* Just discard all writes to the performance counters; this
-* should keep both older linux and windows 64-bit guests
-* happy
-*/
-   pr_unimpl(vcpu, unimplemented perfctr wrmsr: 0x%x data 
0x%llx\n, msr_index, data);
-
-   break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl  VM_ENTRY_LOAD_IA32_PAT) {
vmcs_write64(GUEST_IA32_PAT, data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1b91ea7..8dae08d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -849,6 +849,36 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 
data)
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
return set_msr_mce(vcpu, msr, data);
+
+   /* Performance counters are not protected by a CPUID bit,
+* so we should check all of them in the generic path for the sake of
+* cross vendor migration.
+* Writing a zero into the event select MSRs disables them,
+* which we perfectly emulate ;-). Any other value should be at least
+* reported, some guests depend on them.
+*/
+   case MSR_P6_EVNTSEL0:
+   case MSR_P6_EVNTSEL1:
+   case MSR_K7_EVNTSEL0:
+   case MSR_K7_EVNTSEL1:
+   case MSR_K7_EVNTSEL2:
+   case MSR_K7_EVNTSEL3:
+   if (data != 0)
+   pr_unimpl(vcpu, unimplemented perfctr wrmsr: 
+   0x%x data 0x%llx\n, msr, data);
+   break;
+   /* at least RHEL 4 unconditionally writes to the perfctr registers,
+* so we ignore writes to make it happy.
+*/
+   case MSR_P6_PERFCTR0:
+   case MSR_P6_PERFCTR1:
+   case MSR_K7_PERFCTR0:
+   case MSR_K7_PERFCTR1:
+   case MSR_K7_PERFCTR2:
+   case MSR_K7_PERFCTR3:
+   pr_unimpl(vcpu, unimplemented perfctr wrmsr: 
+   0x%x data 0x%llx\n, msr, data);
+   break;
default:
pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n, msr, data);
return 1;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: VMX: more MSR_IA32_VMX_EPT_VPID_CAP capability bits

2009-06-15 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

Required for EPT misconfiguration handler.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index e7927a6..272514c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -352,9 +352,16 @@ enum vmcs_field {
 #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
 #define VMX_EPT_EXTENT_CONTEXT 1
 #define VMX_EPT_EXTENT_GLOBAL  2
+
+#define VMX_EPT_EXECUTE_ONLY_BIT   (1ull)
+#define VMX_EPT_PAGE_WALK_4_BIT(1ull  6)
+#define VMX_EPTP_UC_BIT(1ull  8)
+#define VMX_EPTP_WB_BIT(1ull  14)
+#define VMX_EPT_2MB_PAGE_BIT   (1ull  16)
 #define VMX_EPT_EXTENT_INDIVIDUAL_BIT  (1ull  24)
 #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull  25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT  (1ull  26)
+
 #define VMX_EPT_DEFAULT_GAW3
 #define VMX_EPT_MAX_GAW0x4
 #define VMX_EPT_MT_EPTE_SHIFT  3
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dc502b9..6e202dd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -270,6 +270,26 @@ static inline bool cpu_has_vmx_flexpriority(void)
cpu_has_vmx_virtualize_apic_accesses();
 }
 
+static inline bool cpu_has_vmx_ept_execute_only(void)
+{
+   return !!(vmx_capability.ept  VMX_EPT_EXECUTE_ONLY_BIT);
+}
+
+static inline bool cpu_has_vmx_eptp_uncacheable(void)
+{
+   return !!(vmx_capability.ept  VMX_EPTP_UC_BIT);
+}
+
+static inline bool cpu_has_vmx_eptp_writeback(void)
+{
+   return !!(vmx_capability.ept  VMX_EPTP_WB_BIT);
+}
+
+static inline bool cpu_has_vmx_ept_2m_page(void)
+{
+   return !!(vmx_capability.ept  VMX_EPT_2MB_PAGE_BIT);
+}
+
 static inline int cpu_has_vmx_invept_individual_addr(void)
 {
return !!(vmx_capability.ept  VMX_EPT_EXTENT_INDIVIDUAL_BIT);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: MMU: make for_each_shadow_entry aware of largepages

2009-06-15 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

This way there is no need to add explicit checks in every
for_each_shadow_entry user.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1b47a69..d440f64 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1282,6 +1282,11 @@ static bool shadow_walk_okay(struct 
kvm_shadow_walk_iterator *iterator)
 {
if (iterator-level  PT_PAGE_TABLE_LEVEL)
return false;
+
+   if (iterator-level == PT_PAGE_TABLE_LEVEL)
+   if (is_large_pte(*iterator-sptep))
+   return false;
+
iterator-index = SHADOW_PT_INDEX(iterator-addr, iterator-level);
iterator-sptep = ((u64 *)__va(iterator-shadow_addr)) + 
iterator-index;
return true;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: MMU: add kvm_mmu_get_spte_hierarchy helper

2009-06-15 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

Required by EPT misconfiguration handler.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d440f64..edf3dea 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3014,6 +3014,24 @@ out:
return r;
 }
 
+int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
+{
+   struct kvm_shadow_walk_iterator iterator;
+   int nr_sptes = 0;
+
+   spin_lock(vcpu-kvm-mmu_lock);
+   for_each_shadow_entry(vcpu, addr, iterator) {
+   sptes[iterator.level-1] = *iterator.sptep;
+   nr_sptes++;
+   if (!is_shadow_present_pte(*iterator.sptep))
+   break;
+   }
+   spin_unlock(vcpu-kvm-mmu_lock);
+
+   return nr_sptes;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
+
 #ifdef AUDIT
 
 static const char *audit_msg;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 016bf71..61a1b38 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -37,6 +37,8 @@
 #define PT32_ROOT_LEVEL 2
 #define PT32E_ROOT_LEVEL 3
 
+int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
+
 static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
 {
if (unlikely(vcpu-kvm-arch.n_free_mmu_pages  KVM_MIN_FREE_MMU_PAGES))
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: VMX: conditionally disable 2M pages

2009-06-15 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

Disable usage of 2M pages if VMX_EPT_2MB_PAGE_BIT (bit 16) is clear
in MSR_IA32_VMX_EPT_VPID_CAP and EPT is enabled.

[avi: s/largepages_disabled/largepages_enabled/ to avoid negative logic]

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1fe3785..6c0902e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1381,6 +1381,9 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_tpr_shadow())
kvm_x86_ops-update_cr8_intercept = NULL;
 
+   if (enable_ept  !cpu_has_vmx_ept_2m_page())
+   kvm_disable_largepages();
+
return alloc_kvm_area();
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e4e78db..1b48092 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -219,6 +219,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc);
+void kvm_disable_largepages(void);
 void kvm_arch_flush_shadow(struct kvm *kvm);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9fab08e..d604812 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -85,6 +85,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int 
ioctl,
 
 static bool kvm_rebooting;
 
+static bool largepages_enabled = true;
+
 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head 
*head,
  int assigned_dev_id)
@@ -1171,9 +1173,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
ugfn = new.userspace_addr  PAGE_SHIFT;
/*
 * If the gfn and userspace address are not aligned wrt each
-* other, disable large page support for this slot
+* other, or if explicitly asked to, disable large page
+* support for this slot
 */
-   if ((base_gfn ^ ugfn)  (KVM_PAGES_PER_HPAGE - 1))
+   if ((base_gfn ^ ugfn)  (KVM_PAGES_PER_HPAGE - 1) ||
+   !largepages_enabled)
for (i = 0; i  largepages; ++i)
new.lpage_info[i].write_count = 1;
}
@@ -1286,6 +1290,12 @@ out:
return r;
 }
 
+void kvm_disable_largepages(void)
+{
+   largepages_enabled = false;
+}
+EXPORT_SYMBOL_GPL(kvm_disable_largepages);
+
 int is_error_page(struct page *page)
 {
return page == bad_page;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: MMU audit: largepage handling

2009-06-15 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

Make the audit code aware of largepages.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1ed7362..1b47a69 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3034,12 +3034,11 @@ static void __mmu_spte_walk(struct kvm *kvm, struct 
kvm_mmu_page *sp,
u64 ent = sp-spt[i];
 
if (is_shadow_present_pte(ent)) {
-   if (sp-role.level  1  !is_large_pte(ent)) {
+   if (!is_last_spte(ent, sp-role.level)) {
struct kvm_mmu_page *child;
child = page_header(ent  PT64_BASE_ADDR_MASK);
__mmu_spte_walk(kvm, child, fn);
-   }
-   if (sp-role.level == 1)
+   } else
fn(kvm, sp, sp-spt[i]);
}
}
@@ -3084,10 +3083,9 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, 
u64 page_pte,
continue;
 
va = canonicalize(va);
-   if (level  1) {
-   if (is_shadow_present_pte(ent))
-   audit_mappings_page(vcpu, ent, va, level - 1);
-   } else {
+   if (is_shadow_present_pte(ent)  !is_last_spte(ent, level))
+   audit_mappings_page(vcpu, ent, va, level - 1);
+   else {
gpa_t gpa = vcpu-arch.mmu.gva_to_gpa(vcpu, va);
gfn_t gfn = gpa  PAGE_SHIFT;
pfn_t pfn = gfn_to_pfn(vcpu-kvm, gfn);
@@ -3184,7 +3182,8 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct 
kvm_mmu_page *sp, u64 *sptep)
return;
}
 
-   rmapp = gfn_to_rmap(kvm, rev_sp-gfns[sptep - rev_sp-spt], 0);
+   rmapp = gfn_to_rmap(kvm, rev_sp-gfns[sptep - rev_sp-spt],
+   is_large_pte(*sptep));
if (!*rmapp) {
if (!printk_ratelimit())
return;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Fix dirty bit tracking for slots with large pages

2009-06-15 Thread Avi Kivity
From: Izik Eidus iei...@redhat.com

When slot is already allocated and being asked to be tracked we need
to break the large pages.

This code flush the mmu when someone ask a slot to start dirty bit
tracking.

Cc: sta...@kernel.org
Signed-off-by: Izik Eidus iei...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d604812..8939ffa 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1190,6 +1190,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (!new.dirty_bitmap)
goto out_free;
memset(new.dirty_bitmap, 0, dirty_bytes);
+   if (old.npages)
+   kvm_arch_flush_shadow(kvm);
}
 #endif /* not defined CONFIG_S390 */
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Drop useless atomic test from timer function

2009-06-15 Thread Avi Kivity
From: Jan Kiszka jan.kis...@siemens.com

The current code tries to optimize the setting of
KVM_REQ_PENDING_TIMER but used atomic_inc_and_test - which always
returns true unless pending had the invalid value of -1 on entry. This
patch drops the test part preserving the original semantic but
expressing it less confusingly.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
index 1baed41..eea4043 100644
--- a/arch/x86/kvm/timer.c
+++ b/arch/x86/kvm/timer.c
@@ -15,9 +15,9 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct 
kvm_timer *ktimer)
 * case anyway.
 */
if (ktimer-reinject || !atomic_read(ktimer-pending)) {
+   atomic_inc(ktimer-pending);
/* FIXME: this code should not know anything about vcpus */
-   if (!atomic_inc_and_test(ktimer-pending))
-   set_bit(KVM_REQ_PENDING_TIMER, vcpu-requests);
+   set_bit(KVM_REQ_PENDING_TIMER, vcpu-requests);
}
 
if (waitqueue_active(q))
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Fix racy event propagation in timer

2009-06-15 Thread Avi Kivity
From: Jan Kiszka jan.kis...@siemens.com

Minor issue that likely had no practical relevance: the kvm timer
function so far incremented the pending counter and then may reset it
again to 1 in case reinjection was disabled. This opened a small racy
window with the corresponding VCPU loop that may have happened to run
on another (real) CPU and already consumed the value.

Fix it by skipping the incrementation in case pending is already  0.
This opens a different race windows, but may only rarely cause lost
events in case we do not care about them anyway (!reinject).

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
index 85cc743..1baed41 100644
--- a/arch/x86/kvm/timer.c
+++ b/arch/x86/kvm/timer.c
@@ -9,12 +9,16 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct 
kvm_timer *ktimer)
int restart_timer = 0;
wait_queue_head_t *q = vcpu-wq;
 
-   /* FIXME: this code should not know anything about vcpus */
-   if (!atomic_inc_and_test(ktimer-pending))
-   set_bit(KVM_REQ_PENDING_TIMER, vcpu-requests);
-
-   if (!ktimer-reinject)
-   atomic_set(ktimer-pending, 1);
+   /*
+* There is a race window between reading and incrementing, but we do
+* not care about potentially loosing timer events in the !reinject
+* case anyway.
+*/
+   if (ktimer-reinject || !atomic_read(ktimer-pending)) {
+   /* FIXME: this code should not know anything about vcpus */
+   if (!atomic_inc_and_test(ktimer-pending))
+   set_bit(KVM_REQ_PENDING_TIMER, vcpu-requests);
+   }
 
if (waitqueue_active(q))
wake_up_interruptible(q);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: x86: Ignore reads to K7 EVNTSEL MSRs

2009-06-15 Thread Amit Shah
In commit 7fe29e0faacb650d31b9e9f538203a157bec821d we ignored the
reads to the P6 EVNTSEL MSRs. That fixed crashes on Intel machines.

Ignore the reads to K7 EVNTSEL MSRs as well to fix this on AMD
hosts.

This fixes Kaspersky antivirus crashing Windows guests on AMD hosts.

Signed-off-by: Amit Shah amit.s...@redhat.com
---
 arch/x86/kvm/x86.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1b91ea7..c5b44c9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -957,6 +957,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 
*pdata)
case MSR_VM_HSAVE_PA:
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
+   case MSR_K7_EVNTSEL0:
data = 0;
break;
case MSR_MTRRcap:
-- 
1.6.2.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Mark McLoughlin
On Sun, 2009-06-14 at 12:34 +0300, Michael S. Tsirkin wrote:
 On Fri, Jun 12, 2009 at 04:53:27PM +0100, Mark McLoughlin wrote:
  The other obvious piece to add to it would be PCI addresses, so that
  even if you remove a device, the addresses assigned to existing devices
  don't change.
 
 Could you clarify this requirement please?

Avi clarified, but I've written it up here too:

  https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses

Cheers,
Mark.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Mark McLoughlin
On Sun, 2009-06-14 at 12:50 +0300, Michael S. Tsirkin wrote:
 On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
  However, in order to retain compat for that SCSI device (e.g. ensuring
  the PCI address doesn't change as other devices are added an removed),
  we're back to the same problem ... either:
  
1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure 
   out what address to use, libvirt would need to query qemu for what 
   address was originally allocated to device or it would do all the 
   PCI address allocation itself ... 
 
 This last option makes sense to me: in a real world the user has
 control over where he places the device on the bus, so why
 not with qemu?

Yep, most people seem to agree that it makes sense to allow this, but
some believe it should only be via a machine description file, not the
command line.

However, the first problem is that it isn't a solution to the guest ABI
problem more generally.

And the second problem is that for e.g. libvirt to use it, it would have
to be possible to query qemu for what PCI slots were assigned to the
devices - libvirt would need to be able to parse 'info pci' and match
the devices listed with the devices specified on the command line.

Again, details written up here:

  https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses

Cheers,
Mark.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 12:08 PM, Mark McLoughlin wrote:

This last option makes sense to me: in a real world the user has
control over where he places the device on the bus, so why
not with qemu?
 


Yep, most people seem to agree that it makes sense to allow this, but
some believe it should only be via a machine description file, not the
command line.
   


I don't understand this opposition.  It's clear a machine config file is 
a long way in our future.  It's also clear lack of stable PCI addresses 
hurts us now.



However, the first problem is that it isn't a solution to the guest ABI
problem more generally.
   


pci_addr was never meant to bring world peace, just stable PCI 
addresses.  The other issues should be addressed separately.



And the second problem is that for e.g. libvirt to use it, it would have
to be possible to query qemu for what PCI slots were assigned to the
devices - libvirt would need to be able to parse 'info pci' and match
the devices listed with the devices specified on the command line.
   


If all devices (including vga, ide) are set up with pci_addr, then this 
is unneeded.  You do need to export available slot numbers from qemu.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/14/2009 12:47 PM, Michael S. Tsirkin wrote:

Michael S. Tsirkin wrote:

If we want to remove a device from under a running guest, you need
hotplug. So we can't just remove several lines from the config and hope
that it'll work simply because the PCI address is stable.

   

Why not?
 


E.g. configuration cycles address a specific bus/slot.
You need cooperation from guest if you want to move
a device.
   


By remove several lines from the config I understood the guest needs 
to be restarted.  Of course if you don't restart the guest you need true 
hotplug.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:

On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
   

However, in order to retain compat for that SCSI device (e.g. ensuring
the PCI address doesn't change as other devices are added an removed),
we're back to the same problem ... either:

   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
  out what address to use, libvirt would need to query qemu for what
  address was originally allocated to device or it would do all the
  PCI address allocation itself ...
 


This last option makes sense to me: in a real world the user has
control over where he places the device on the bus, so why
not with qemu?
   


Yes, the user build the machine using the command line and monitor (or, 
in 2017, the machine configuration file), then turns on the power.  
Command line options are the parts lying around when we start.


btw, -drive needs to be separated:

  -controller type=lsi1234,pci_addr=foobar,name=blah
  -drive file=foo.img,controller=blah,index=0
  -drive file=bar.img,controller=blah,index=1

Drives to not have pci addresses.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH v2 2/2] kvm: use POLLHUP to close an irqfd instead of an explicit ioctl

2009-06-15 Thread Michael S. Tsirkin
On Sun, Jun 14, 2009 at 11:39:21PM -0400, Gregory Haskins wrote:
 Michael S. Tsirkin wrote:
  On Sun, Jun 14, 2009 at 08:53:11AM -0400, Gregory Haskins wrote:

  Michael S. Tsirkin wrote:
  
  On Thu, Jun 04, 2009 at 08:48:12AM -0400, Gregory Haskins wrote:


  +static void
  +irqfd_disconnect(struct _irqfd *irqfd)
  +{
  +struct kvm *kvm;
  +
  +mutex_lock(irqfd-lock);
  +
  +kvm = rcu_dereference(irqfd-kvm);
  +rcu_assign_pointer(irqfd-kvm, NULL);
  +
  +mutex_unlock(irqfd-lock);
  +
  +if (!kvm)
  +return;
   
   mutex_lock(kvm-lock);
  -kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1);
  -kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0);
  +list_del(irqfd-list);
   mutex_unlock(kvm-lock);
  +
  +/*
  + * It is important to not drop the kvm reference until the next 
  grace
  + * period because there might be lockless references in flight 
  up
  + * until then
  + */
  +synchronize_srcu(irqfd-srcu);
  +kvm_put_kvm(kvm);
   }
  
  
  So irqfd object will persist after kvm goes away, until eventfd is closed?


  Yep, by design.  It becomes part of the eventfd and is thus associated
  with its lifetime.  Consider it as if we made our own anon-fd
  implementation for irqfd and the lifetime looks similar.  The difference
  is that we are reusing eventfd and its interface semantics.
  


   
   static int
   irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
   {
   struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
  +unsigned long flags = (unsigned long)key;
   
  -/*
  - * The wake_up is called with interrupts disabled.  Therefore 
  we need
  - * to defer the IRQ injection until later since we need to 
  acquire the
  - * kvm-lock to do so.
  - */
  -schedule_work(irqfd-work);
  +if (flags  POLLIN)
  +/*
  + * The POLLIN wake_up is called with interrupts 
  disabled.
  + * Therefore we need to defer the IRQ injection until 
  later
  + * since we need to acquire the kvm-lock to do so.
  + */
  +schedule_work(irqfd-inject);
  +
  +if (flags  POLLHUP) {
  +/*
  + * The POLLHUP is called unlocked, so it theoretically 
  should
  + * be safe to remove ourselves from the wqh using the 
  locked
  + * variant of remove_wait_queue()
  + */
  +remove_wait_queue(irqfd-wqh, irqfd-wait);
  +flush_work(irqfd-inject);
  +irqfd_disconnect(irqfd);
  +
  +cleanup_srcu_struct(irqfd-srcu);
  +kfree(irqfd);
  +}
   
   return 0;
   }
  
  
  And it is removed by this function when eventfd is closed.
  But what prevents the kvm module from going away, meanwhile?


  Well, we hold a reference to struct kvm until we call
  irqfd_disconnect().  If kvm closes first, we disconnect and disassociate
  all references to kvm leaving irqfd-kvm = NULL.  Likewise, if irqfd
  closes first, we disassociate with kvm with the above quoted logic.  In
  either case, we are holding a kvm reference up until that disconnect
  point.  Therefore kvm should not be able to disappear before that
  disconnect, and after that point we do not care.
  
 
  Yes, we do care.
 
  Here's the scenario in more detail:
 
  - kvm is closed
  - irq disconnect is called
  - kvm is put
  - kvm module is removed: all irqs are disconnected
  - eventfd closes and triggers callback into removed kvm module
  - crash

 
 [ lightbulb turns on]
 
 Ah, now I see the point you were making.  I thought you were talking
 about the .text in kvm_set_irq() (which would be protected by my
 kvm_get_kvm() reference afaict).  But you are actually talking about the
 irqfd .text itself.  Indeed, you are correct that is this currently a
 race.  Good catch!
 

  If that is not sufficient to prevent kvm.ko from going away in the
  middle, then IMO kvm_get_kvm() has a bug, not irqfd. ;) However, I
  believe everything is actually ok here.
 
  -Greg
 
  
 
 
  BTW, why can't we remove irqfds in kvm_release?

 
 Well, this would be ideal but we run into that bi-directional reference
 thing that we talked about earlier and we both agree is non-trivial to
 solve.  Solving this locking problem would incidentally also pave the
 way for restoring the DEASSIGN feature, so patches welcome!

So far the only workable approach that I see is reverting the POLLHUP
patch. I agree it looks pretty, but DEASSIGN and closing the races is
more important IMO. And locking will definitely become much simpler.

 In the meantime, I think we can close the 

Re: [PATCH 1/6] env-kvm_cpu_state.init is always zero here.

2009-06-15 Thread Avi Kivity

On 06/14/2009 01:52 PM, Gleb Natapov wrote:

Signed-off-by: Gleb Natapovg...@redhat.com
---
  qemu-kvm.c |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 2aeb17c..ec911ef 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -412,7 +412,7 @@ static int kvm_main_loop_cpu(CPUState *env)
if (env-kvm_cpu_state.sipi_needed)
update_regs_for_sipi(env);
  }
-   if (!env-halted  !env-kvm_cpu_state.init)
+   if (!env-halted)
kvm_cpu_exec(env);
env-exit_request = 0;
  env-exception_index = EXCP_INTERRUPT;
   


Are you sure?  Can't a reset reenable it?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/6] env-kvm_cpu_state.init is always zero here.

2009-06-15 Thread Gleb Natapov
On Mon, Jun 15, 2009 at 12:55:27PM +0300, Avi Kivity wrote:
 On 06/14/2009 01:52 PM, Gleb Natapov wrote:
 Signed-off-by: Gleb Natapovg...@redhat.com
 ---
   qemu-kvm.c |2 +-
   1 files changed, 1 insertions(+), 1 deletions(-)

 diff --git a/qemu-kvm.c b/qemu-kvm.c
 index 2aeb17c..ec911ef 100644
 --- a/qemu-kvm.c
 +++ b/qemu-kvm.c
 @@ -412,7 +412,7 @@ static int kvm_main_loop_cpu(CPUState *env)
  if (env-kvm_cpu_state.sipi_needed)
  update_regs_for_sipi(env);
   }
 -if (!env-halted  !env-kvm_cpu_state.init)
 +if (!env-halted)
  kvm_cpu_exec(env);
  env-exit_request = 0;
   env-exception_index = EXCP_INTERRUPT;


 Are you sure?  Can't a reset reenable it?

The thing is used only with userspace irq chip. If
env-kvm_cpu_state.init == 1 update_regs_for_init() is called
three line above and it will set kvm_cpu_state.init to zero.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] Handle vcpu init/sipi by calling a function on vcpu

2009-06-15 Thread Avi Kivity

On 06/14/2009 01:52 PM, Gleb Natapov wrote:

Instead of having special case in vcpu event loop.

   


I'm a little worried about two vcpus INITing each other simultaneously 
and deadlocking.  INIT/SIPI are async events, the initiator should not 
wait for them.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/6] env-kvm_cpu_state.init is always zero here.

2009-06-15 Thread Avi Kivity

On 06/15/2009 12:58 PM, Gleb Natapov wrote:

On Mon, Jun 15, 2009 at 12:55:27PM +0300, Avi Kivity wrote:
   

On 06/14/2009 01:52 PM, Gleb Natapov wrote:
 

Signed-off-by: Gleb Natapovg...@redhat.com
---
   qemu-kvm.c |2 +-
   1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 2aeb17c..ec911ef 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -412,7 +412,7 @@ static int kvm_main_loop_cpu(CPUState *env)
if (env-kvm_cpu_state.sipi_needed)
update_regs_for_sipi(env);
   }
-   if (!env-halted   !env-kvm_cpu_state.init)
+   if (!env-halted)
kvm_cpu_exec(env);
env-exit_request = 0;
   env-exception_index = EXCP_INTERRUPT;

   

Are you sure?  Can't a reset reenable it?

 

The thing is used only with userspace irq chip. If
env-kvm_cpu_state.init == 1 update_regs_for_init() is called
three line above and it will set kvm_cpu_state.init to zero.
   


Right, and nothing can sleep in between.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Mark McLoughlin
On Sun, 2009-06-14 at 10:58 +0300, Avi Kivity wrote:
 Mark McLoughlin wrote:
 
   
 
  I think the point is that you don't need version numbers if you have a 
  proper device tree.
  
 
  How do you add a new attribute to the device tree and, when a supplied
  device tree lacking said attribute, distinguish between a device tree
  from an old version of qemu (i.e. use the old default) and a partial
  device tree from the VM manager (i.e. use the new default) ?

 
 -baseline 0.10

That's a version number :-)

(I was responding to Anthony's you don't need a version number)

Cheers,
Mark.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] Handle vcpu init/sipi by calling a function on vcpu

2009-06-15 Thread Gleb Natapov
On Mon, Jun 15, 2009 at 01:03:39PM +0300, Avi Kivity wrote:
 On 06/14/2009 01:52 PM, Gleb Natapov wrote:
 Instead of having special case in vcpu event loop.



 I'm a little worried about two vcpus INITing each other simultaneously  
 and deadlocking.  INIT/SIPI are async events, the initiator should not  
 wait for them.

I thought to add on_vcpu_async() for that (if this case is worth warring about).

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] Handle vcpu init/sipi by calling a function on vcpu

2009-06-15 Thread Avi Kivity

On 06/15/2009 01:11 PM, Gleb Natapov wrote:

On Mon, Jun 15, 2009 at 01:03:39PM +0300, Avi Kivity wrote:
   

On 06/14/2009 01:52 PM, Gleb Natapov wrote:
 

Instead of having special case in vcpu event loop.


   

I'm a little worried about two vcpus INITing each other simultaneously
and deadlocking.  INIT/SIPI are async events, the initiator should not
wait for them.

 

I thought to add on_vcpu_async() for that (if this case is worth warring about).
   


A generic on_vcpu_async() would need to allocate, that might be expoitable.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] Handle vcpu init/sipi by calling a function on vcpu

2009-06-15 Thread Gleb Natapov
On Mon, Jun 15, 2009 at 01:14:21PM +0300, Avi Kivity wrote:
 On 06/15/2009 01:11 PM, Gleb Natapov wrote:
 On Mon, Jun 15, 2009 at 01:03:39PM +0300, Avi Kivity wrote:

 On 06/14/2009 01:52 PM, Gleb Natapov wrote:
  
 Instead of having special case in vcpu event loop.



 I'm a little worried about two vcpus INITing each other simultaneously
 and deadlocking.  INIT/SIPI are async events, the initiator should not
 wait for them.

  
 I thought to add on_vcpu_async() for that (if this case is worth warring 
 about).


 A generic on_vcpu_async() would need to allocate, that might be expoitable.

Then what about processing events while waiting in on_vcpu()?

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] Handle vcpu init/sipi by calling a function on vcpu

2009-06-15 Thread Avi Kivity

On 06/15/2009 01:16 PM, Gleb Natapov wrote:

A generic on_vcpu_async() would need to allocate, that might be expoitable.

 

Then what about processing events while waiting in on_vcpu()?

   


Could work, but prefer a simpler solution.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Michael S. Tsirkin
On Mon, Jun 15, 2009 at 12:43:48PM +0300, Avi Kivity wrote:
 On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:
 On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:

 However, in order to retain compat for that SCSI device (e.g. ensuring
 the PCI address doesn't change as other devices are added an removed),
 we're back to the same problem ... either:

1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure
   out what address to use, libvirt would need to query qemu for what
   address was originally allocated to device or it would do all the
   PCI address allocation itself ...
  

 This last option makes sense to me: in a real world the user has
 control over where he places the device on the bus, so why
 not with qemu?


 Yes, the user build the machine using the command line and monitor (or,  
 in 2017, the machine configuration file), then turns on the power.   
 Command line options are the parts lying around when we start.

 btw, -drive needs to be separated:

   -controller type=lsi1234,pci_addr=foobar,name=blah
   -drive file=foo.img,controller=blah,index=0
   -drive file=bar.img,controller=blah,index=1

 Drives to not have pci addresses.

Maybe we need a generic 'bus options' flag.



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Michael S. Tsirkin
On Mon, Jun 15, 2009 at 12:27:08PM +0300, Avi Kivity wrote:
 On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
 This last option makes sense to me: in a real world the user has
 control over where he places the device on the bus, so why
 not with qemu?
  

 Yep, most people seem to agree that it makes sense to allow this, but
 some believe it should only be via a machine description file, not the
 command line.


 I don't understand this opposition.  It's clear a machine config file is  
 a long way in our future.  It's also clear lack of stable PCI addresses  
 hurts us now.

 However, the first problem is that it isn't a solution to the guest ABI
 problem more generally.


 pci_addr was never meant to bring world peace, just stable PCI  
 addresses.  The other issues should be addressed separately.

 And the second problem is that for e.g. libvirt to use it, it would have
 to be possible to query qemu for what PCI slots were assigned to the
 devices - libvirt would need to be able to parse 'info pci' and match
 the devices listed with the devices specified on the command line.


 If all devices (including vga, ide) are set up with pci_addr, then this  
 is unneeded.

Right. I think it could be an all or nothing at all approach.

 You do need to export available slot numbers from qemu.

Why would a slot be unavailable?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Gleb Natapov
On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
  You do need to export available slot numbers from qemu.
 
 Why would a slot be unavailable?
 
Because it does not exist?

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Michael S. Tsirkin
On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
 On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
   You do need to export available slot numbers from qemu.
  
  Why would a slot be unavailable?
  
 Because it does not exist?

We can create a slot with any number, can't we?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Gleb Natapov
On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
 On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
  On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
You do need to export available slot numbers from qemu.
   
   Why would a slot be unavailable?
   
  Because it does not exist?
 
 We can create a slot with any number, can't we?
What do you mean? If the mobo has 4 slots you can't create fifth.
KVM describes 32 slots in the BIOS.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Michael S. Tsirkin
On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
 On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
  On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
   On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
 You do need to export available slot numbers from qemu.

Why would a slot be unavailable?

   Because it does not exist?
  
  We can create a slot with any number, can't we?
 What do you mean? If the mobo has 4 slots you can't create fifth.
 KVM describes 32 slots in the BIOS.

Do you mean the KVM kernel module here? I don't know much about the
BIOS. Can't qemu control the number of slots declared?

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Gleb Natapov
On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote:
 On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
  On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
   On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
  You do need to export available slot numbers from qemu.
 
 Why would a slot be unavailable?
 
Because it does not exist?
   
   We can create a slot with any number, can't we?
  What do you mean? If the mobo has 4 slots you can't create fifth.
  KVM describes 32 slots in the BIOS.
 
 Do you mean the KVM kernel module here? I don't know much about the
No I don't mean KVM kernel module here.

 BIOS. Can't qemu control the number of slots declared?
 
Qemu represents HW, BIOS drives this HW. They should be in sync on such
important issues like pci slot configuration. Even if QEMU can control
the number of slots declared (which it can't easily do), it will be able
to do it only on startup (before BIOS runs). The way to have dynamic
number of slots may be pci bridge emulation. Not sure what is needed
from BIOS for that.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote:

You do need to export available slot numbers from qemu.
 


Why would a slot be unavailable?
   


A slot needs to be configured in ACPI, and not be taken by onboard chips 
(piix takes slot 0, for example).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Activate Virtualization On Demand v2

2009-06-15 Thread Alexander Graf
X86 CPUs need to have some magic happening to enable the virtualization
extensions on them. This magic can result in unpleasant results for
users, like blocking other VMMs from working (vmx) or using invalid TLB
entries (svm).

Currently KVM activates virtualization when the respective kernel module
is loaded. This blocks us from autoloading KVM modules without breaking
other VMMs.

To circumvent this problem at least a bit, this patch introduces on
demand activation of virtualization. This means, that instead
virtualization is enabled on creation of the first virtual machine
and disabled on destruction of the last one.

So using this, KVM can be easily autoloaded, while keeping other
hypervisors usable.

---

v2 uses kvm_lock and traces failures atomically

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/ia64/kvm/kvm-ia64.c|8 ++-
 arch/powerpc/kvm/powerpc.c  |2 +-
 arch/s390/kvm/kvm-s390.c|2 +-
 arch/x86/include/asm/kvm_host.h |2 +-
 arch/x86/kvm/svm.c  |   13 --
 arch/x86/kvm/vmx.c  |7 +++-
 arch/x86/kvm/x86.c  |4 +-
 include/linux/kvm_host.h|2 +-
 virt/kvm/kvm_main.c |   82 +--
 9 files changed, 96 insertions(+), 26 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 906d597..3141a92 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 
*opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
long  status;
long  tmp_base;
@@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
local_irq_restore(saved_psr);
if (slot  0)
-   return;
+   return -EINVAL;
 
spin_lock(vp_lock);
status = ia64_pal_vp_init_env(kvm_vsa_base ?
@@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, tmp_base);
if (status != 0) {
printk(KERN_WARNINGkvm: Failed to Enable VT Support\n);
-   return ;
+   return -EINVAL;
}
 
if (!kvm_vsa_base) {
@@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
}
spin_unlock(vp_lock);
ia64_ptr_entry(0x3, slot);
+
+   return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 9057335..6558ab7 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -80,7 +80,8 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu 
*vcpu)
return r;
 }
 
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
+   return 0;
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index cbfe91e..a14e676 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -70,7 +70,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 
 /* Section: not file related */
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
 {
/* every s390 is virtualization enabled ;-) */
+   return 0;
 }
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4627627..72d5075 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -463,7 +463,7 @@ struct descriptor_table {
 struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void);  /* __init */
int (*disabled_by_bios)(void); /* __init */
-   void (*hardware_enable)(void *dummy);  /* __init */
+   int (*hardware_enable)(void *dummy);
void (*hardware_disable)(void *dummy);
void (*check_processor_compatibility)(void *rtn);
int (*hardware_setup)(void);   /* __init */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 04ee964..47a8b94 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -245,7 +245,7 @@ static void svm_hardware_disable(void *garbage)
cpu_svm_disable();
 }
 
-static void svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void *garbage)
 {
 
struct svm_cpu_data *svm_data;
@@ -254,16 +254,20 @@ static void svm_hardware_enable(void *garbage)
struct desc_struct *gdt;
int me = raw_smp_processor_id();
 
+   rdmsrl(MSR_EFER, efer);
+   if (efer  EFER_SVME)
+   return -EBUSY;
+
if (!has_svm()) {
printk(KERN_ERR svm_cpu_init: err EOPNOTSUPP on %d\n, me);
-   return;
+   return -EINVAL;
}
svm_data = per_cpu(svm_data, me);
 
if (!svm_data) {
printk(KERN_ERR svm_cpu_init: svm_data 

Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 12:09 PM, Mark McLoughlin wrote:

I think the point is that you don't need version numbers if you have a
proper device tree.

 

How do you add a new attribute to the device tree and, when a supplied
device tree lacking said attribute, distinguish between a device tree
from an old version of qemu (i.e. use the old default) and a partial
device tree from the VM manager (i.e. use the new default) ?

   

-baseline 0.10
 


That's a version number :-)

(I was responding to Anthony's you don't need a version number)
   


If you want to prevent incompatibilities, you need to make everything 
new (potentially including bugfixes) non-default.  Eventually the 
default configuration becomes increasingly unusable and you need a new 
baseline.  You must still be able to fall back to the old baseline for 
older guests.  I don't think games with configuration files can hide that.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Michael S. Tsirkin
On Mon, Jun 15, 2009 at 02:14:15PM +0300, Gleb Natapov wrote:
 On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote:
  On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote:
   On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote:
On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote:
 On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote:
   You do need to export available slot numbers from qemu.
  
  Why would a slot be unavailable?
  
 Because it does not exist?

We can create a slot with any number, can't we?
   What do you mean? If the mobo has 4 slots you can't create fifth.
   KVM describes 32 slots in the BIOS.
  
  Do you mean the KVM kernel module here? I don't know much about the
 No I don't mean KVM kernel module here.
 
  BIOS. Can't qemu control the number of slots declared?
  
 Qemu represents HW, BIOS drives this HW. They should be in sync on such
 important issues like pci slot configuration.

As a simple solution, let's stick to 32 slots per bus. That's the
maximum that the PCI spec allows, anyway.

 Even if QEMU can control the number of slots declared (which it can't
 easily do), it will be able to do it only on startup (before BIOS
 runs).

That's OK - this is when the machine description is read.

 The way to have dynamic
 number of slots may be pci bridge emulation. Not sure what is needed
 from BIOS for that.

Since bridge can be hot-plugged, probably nothing?
But we don't necessarily need dynamic number of slots IMO.

 
 --
   Gleb.

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints

2009-06-15 Thread Markus Armbruster
Avi Kivity a...@redhat.com writes:

 On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
 This last option makes sense to me: in a real world the user has
 control over where he places the device on the bus, so why
 not with qemu?
  

 Yep, most people seem to agree that it makes sense to allow this, but
 some believe it should only be via a machine description file, not the
 command line.


 I don't understand this opposition.  It's clear a machine config file
 is a long way in our future.  It's also clear lack of stable PCI
 addresses hurts us now.

Correct.

 However, the first problem is that it isn't a solution to the guest ABI
 problem more generally.


 pci_addr was never meant to bring world peace, just stable PCI
 addresses.  The other issues should be addressed separately.

 And the second problem is that for e.g. libvirt to use it, it would have
 to be possible to query qemu for what PCI slots were assigned to the
 devices - libvirt would need to be able to parse 'info pci' and match
 the devices listed with the devices specified on the command line.


 If all devices (including vga, ide) are set up with pci_addr, then
 this is unneeded.  You do need to export available slot numbers from
 qemu.

Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
other slots are available.

The real problem is devices that get implicitly added, like the SCSI
controller.  Those devices get their slots auto-assigned, which can
interfere with slot numbers chosen by the user.  We need a way to avoid
that, as you suggested elsewhere in this thread.


[*] There's an exception or two for oddball targets.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/4] Implement Hyper-V MSRs v2

2009-06-15 Thread Alexander Graf


On 19.05.2009, at 14:52, Avi Kivity wrote:


Alexander Graf wrote:
Hyper-V uses some MSRs, some of which are actually reserved for  
BIOS usage.


But let's be nice today and have it its way, because otherwise it  
fails

terribly.

v2 changes:
 - remove the 0x4081 MSR definition
 - add pr_unimpl() on unimplemented writes

Signed-off-by: Alexander Graf ag...@suse.de
---
arch/x86/kvm/svm.c |5 +
1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ef43a18..4b4eadd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2034,6 +2034,11 @@ static int svm_set_msr(struct kvm_vcpu  
*vcpu, unsigned ecx, u64 data)

case MSR_VM_HSAVE_PA:
svm-hsave_msr = data;
break;
+   case MSR_VM_CR:
+   case MSR_VM_IGNNE:
+   case MSR_K8_HWCR:
+		pr_unimpl(vcpu, unimplemented wrmsr: 0x%x data 0x%llx\n, ecx,  
data);

+   break;



We can be nicer, if the write doesn't set bits which we don't  
implement, we can let it proceed silently.  See for example  
MSR_IA32_DEBUGCTLMSR.  Most likely the values written are already  
correctly implemented (by doing nothing).


Actually we implement very little of the bits. See http://support.amd.com/us/Processor_TechDocs/31116-Public-GH-BKDG_3-28_5-28-09.pdf 
 for what we're missing ;-).


So it might make sense to always warn for now, see what OSs use and  
only filter out those bits they actually try to access (and maybe  
implement them).


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints

2009-06-15 Thread Avi Kivity

(adding cc)

On 06/15/2009 02:35 PM, Markus Armbruster wrote:

Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
other slots are available.
   


qemu needs to export these two bits of information: the first free slot 
and the number of slots.


More generally, which slots are open.  We can assume 1:31, but that's 
unlovely.



The real problem is devices that get implicitly added, like the SCSI
controller.  Those devices get their slots auto-assigned, which can
interfere with slot numbers chosen by the user.  We need a way to avoid
that, as you suggested elsewhere in this thread.
   


Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=, and 
-drive pci_addr= (and later, -disk-controller)?  Stalling while waiting 
for the ultimate config file is only generating pain and out-of-tree 
patches.


(I'd be quite happy constructing the entire machine config on the 
command line, but I realize it's just me)


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] Nested SVM: Improve interrupt injection v2

2009-06-15 Thread Alexander Graf


On 19.05.2009, at 15:22, Gleb Natapov wrote:


On Tue, May 19, 2009 at 12:54:03PM +0200, Alexander Graf wrote:
While trying to get Hyper-V running, I realized that the interrupt  
injection

mechanisms that are in place right now are not 100% correct.

This patch makes nested SVM's interrupt injection behave more like  
on a

real machine.

v2 calls BUG_ON when svm_set_irq is called with GIF=0

Signed-off-by: Alexander Graf ag...@suse.de
---
arch/x86/kvm/svm.c |   39 ---
1 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fa2a710..5b14c9d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1517,7 +1517,8 @@ static int nested_svm_vmexit_real(struct  
vcpu_svm *svm, void *arg1,

/* Kill any pending exceptions */
if (svm-vcpu.arch.exception.pending == true)
nsvm_printk(WARNING: Pending Exception\n);
-   svm-vcpu.arch.exception.pending = false;
+   kvm_clear_exception_queue(svm-vcpu);
+   kvm_clear_interrupt_queue(svm-vcpu);


What about pending NMI here?


NMI injected to the guest? That should have triggered by now and  
caused an #NMI exit, no?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Michael S. Tsirkin
On Mon, Jun 15, 2009 at 02:27:14PM +0300, Avi Kivity wrote:
 On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote:
 You do need to export available slot numbers from qemu.
  

 Why would a slot be unavailable?


 A slot needs to be configured in ACPI,

Can we configure all possible 32 slots?

 and not be taken by onboard chips  
 (piix takes slot 0, for example).

piix is the root complex, isn't it? Are there other examples? If not,
we could teach management about the root complex being special ...


 -- 
 error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] Nested SVM: Improve interrupt injection v2

2009-06-15 Thread Gleb Natapov
On Mon, Jun 15, 2009 at 01:47:08PM +0200, Alexander Graf wrote:

 On 19.05.2009, at 15:22, Gleb Natapov wrote:

 On Tue, May 19, 2009 at 12:54:03PM +0200, Alexander Graf wrote:
 While trying to get Hyper-V running, I realized that the interrupt  
 injection
 mechanisms that are in place right now are not 100% correct.

 This patch makes nested SVM's interrupt injection behave more like  
 on a
 real machine.

 v2 calls BUG_ON when svm_set_irq is called with GIF=0

 Signed-off-by: Alexander Graf ag...@suse.de
 ---
 arch/x86/kvm/svm.c |   39 ---
 1 files changed, 24 insertions(+), 15 deletions(-)

 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
 index fa2a710..5b14c9d 100644
 --- a/arch/x86/kvm/svm.c
 +++ b/arch/x86/kvm/svm.c
 @@ -1517,7 +1517,8 @@ static int nested_svm_vmexit_real(struct  
 vcpu_svm *svm, void *arg1,
 /* Kill any pending exceptions */
 if (svm-vcpu.arch.exception.pending == true)
 nsvm_printk(WARNING: Pending Exception\n);
 -   svm-vcpu.arch.exception.pending = false;
 +   kvm_clear_exception_queue(svm-vcpu);
 +   kvm_clear_interrupt_queue(svm-vcpu);

 What about pending NMI here?

 NMI injected to the guest? That should have triggered by now and caused 
 an #NMI exit, no?

I don't really understand what this code is doing, but there are three
types of events exception/interrupt/nmi you clear only two of them. If
you are sure this is correct then OK.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote:

A slot needs to be configured in ACPI,
 


Can we configure all possible 32 slots?
   


That's what we do.  But one is always taken.  In the future, perhaps more.


and not be taken by onboard chips
(piix takes slot 0, for example).
 


piix is the root complex, isn't it? Are there other examples? If not,
we could teach management about the root complex being special ...
   


We should just tell the user which slots are open.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Configuration vs. compat hints

2009-06-15 Thread Stefano Stabellini
Avi Kivity wrote:

 (I'd be quite happy constructing the entire machine config on the 
 command line, but I realize it's just me)
 


It is not just you.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH v2 2/2] kvm: use POLLHUP to close an irqfd instead of an explicit ioctl

2009-06-15 Thread Gregory Haskins
Michael S. Tsirkin wrote:
 On Sun, Jun 14, 2009 at 11:39:21PM -0400, Gregory Haskins wrote:
   
 Michael S. Tsirkin wrote:
 
 On Sun, Jun 14, 2009 at 08:53:11AM -0400, Gregory Haskins wrote:
   
   
 Michael S. Tsirkin wrote:
 
 
 On Thu, Jun 04, 2009 at 08:48:12AM -0400, Gregory Haskins wrote:
   
   
   
 +static void
 +irqfd_disconnect(struct _irqfd *irqfd)
 +{
 +struct kvm *kvm;
 +
 +mutex_lock(irqfd-lock);
 +
 +kvm = rcu_dereference(irqfd-kvm);
 +rcu_assign_pointer(irqfd-kvm, NULL);
 +
 +mutex_unlock(irqfd-lock);
 +
 +if (!kvm)
 +return;
  
  mutex_lock(kvm-lock);
 -kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1);
 -kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0);
 +list_del(irqfd-list);
  mutex_unlock(kvm-lock);
 +
 +/*
 + * It is important to not drop the kvm reference until the next 
 grace
 + * period because there might be lockless references in flight 
 up
 + * until then
 + */
 +synchronize_srcu(irqfd-srcu);
 +kvm_put_kvm(kvm);
  }
 
 
 
 So irqfd object will persist after kvm goes away, until eventfd is closed?
   
   
   
 Yep, by design.  It becomes part of the eventfd and is thus associated
 with its lifetime.  Consider it as if we made our own anon-fd
 implementation for irqfd and the lifetime looks similar.  The difference
 is that we are reusing eventfd and its interface semantics.
 
 
   
   
   
  
  static int
  irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
  {
  struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
 +unsigned long flags = (unsigned long)key;
  
 -/*
 - * The wake_up is called with interrupts disabled.  Therefore 
 we need
 - * to defer the IRQ injection until later since we need to 
 acquire the
 - * kvm-lock to do so.
 - */
 -schedule_work(irqfd-work);
 +if (flags  POLLIN)
 +/*
 + * The POLLIN wake_up is called with interrupts 
 disabled.
 + * Therefore we need to defer the IRQ injection until 
 later
 + * since we need to acquire the kvm-lock to do so.
 + */
 +schedule_work(irqfd-inject);
 +
 +if (flags  POLLHUP) {
 +/*
 + * The POLLHUP is called unlocked, so it theoretically 
 should
 + * be safe to remove ourselves from the wqh using the 
 locked
 + * variant of remove_wait_queue()
 + */
 +remove_wait_queue(irqfd-wqh, irqfd-wait);
 +flush_work(irqfd-inject);
 +irqfd_disconnect(irqfd);
 +
 +cleanup_srcu_struct(irqfd-srcu);
 +kfree(irqfd);
 +}
  
  return 0;
  }
 
 
 
 And it is removed by this function when eventfd is closed.
 But what prevents the kvm module from going away, meanwhile?
   
   
   
 Well, we hold a reference to struct kvm until we call
 irqfd_disconnect().  If kvm closes first, we disconnect and disassociate
 all references to kvm leaving irqfd-kvm = NULL.  Likewise, if irqfd
 closes first, we disassociate with kvm with the above quoted logic.  In
 either case, we are holding a kvm reference up until that disconnect
 point.  Therefore kvm should not be able to disappear before that
 disconnect, and after that point we do not care.
 
 
 Yes, we do care.

 Here's the scenario in more detail:

 - kvm is closed
 - irq disconnect is called
 - kvm is put
 - kvm module is removed: all irqs are disconnected
 - eventfd closes and triggers callback into removed kvm module
 - crash
   
   
 [ lightbulb turns on]

 Ah, now I see the point you were making.  I thought you were talking
 about the .text in kvm_set_irq() (which would be protected by my
 kvm_get_kvm() reference afaict).  But you are actually talking about the
 irqfd .text itself.  Indeed, you are correct that is this currently a
 race.  Good catch!

 
   
   
 If that is not sufficient to prevent kvm.ko from going away in the
 middle, then IMO kvm_get_kvm() has a bug, not irqfd. ;) However, I
 believe everything is actually ok here.

 -Greg

 
 
 BTW, why can't we remove irqfds in kvm_release?
   
   
 Well, this would be ideal but we run into that bi-directional reference
 thing that we talked about earlier and we both agree is non-trivial to
 solve.  Solving this locking problem would incidentally also pave the
 way for restoring the DEASSIGN feature, so patches welcome!
 

 So far the only workable approach that I see is reverting the POLLHUP
 patch. I agree it looks pretty, but DEASSIGN and closing the races is
 more important IMO. And locking will definitely become 

[KVM-AUTOTEST PATCH 1/5] Add new module kvm_subprocess

2009-06-15 Thread Michael Goldish
This module is intended to be used for controlling all child processes in KVM
tests: both QEMU processes and SSH/SCP/Telnet processes. Processes started with
this module keep running and can be interacted with even after the parent
process exits.

The current run_bg() utility tracks a child process as long as the parent
process is running. When the parent process exits, the tracking thread
terminates and cannot resume when needed.

Currently SSH/SCP/Telnet communication is handled by kvm_utils.kvm_spawn, which
does not allow the child process to run after the parent process exits. Thus,
open SSH/SCP/Telnet sessions cannot be reused by tests following the one in
which they are opened.

The new module provides a solution to these two problems, and also saves some
code by reusing common code required both for QEMU processes and SSH/SCP/Telnet
processes.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_subprocess.py |  870 
 1 files changed, 870 insertions(+), 0 deletions(-)
 create mode 100644 client/tests/kvm/kvm_subprocess.py

diff --git a/client/tests/kvm/kvm_subprocess.py 
b/client/tests/kvm/kvm_subprocess.py
new file mode 100644
index 000..a6090ad
--- /dev/null
+++ b/client/tests/kvm/kvm_subprocess.py
@@ -0,0 +1,870 @@
+#!/usr/bin/python
+import sys, subprocess, pty, select, os, time, signal, re, termios, fcntl
+import threading, logging
+import common, kvm_utils
+
+
+A class and functions used for running and controlling child processes.
+
+...@copyright: 2008-2009 Red Hat Inc.
+
+
+
+def _lock(filename):
+if not os.path.exists(filename):
+open(filename, w).close()
+fd = os.open(filename, os.O_RDWR)
+fcntl.lockf(fd, fcntl.LOCK_EX)
+return fd
+
+
+def _unlock(fd):
+fcntl.lockf(fd, fcntl.LOCK_UN)
+os.close(fd)
+
+
+def _locked(filename):
+try:
+fd = os.open(filename, os.O_RDWR)
+except:
+return False
+try:
+fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+except:
+os.close(fd)
+return True
+fcntl.lockf(fd, fcntl.LOCK_UN)
+os.close(fd)
+return False
+
+
+def _wait(filename):
+fd = _lock(filename)
+_unlock(fd)
+
+
+def _get_filenames(base_dir, id):
+return map(lambda s: os.path.join(base_dir, s + - + id),
+   (pid, status, output, outpipe1, outpipe2, inpipe,
+lock-server-running, lock-client-starting))
+
+
+class kvm_spawn:
+
+This class is used for spawning and controlling a child process.
+
+A new instance of this class can either run a new server (a small Python
+program that reads output from the child process and reports it to the
+client and to a text file) or attach to an already running server.
+When a server is started it runs the child process.
+The server reports output from the child's STDOUT and STDERR via 3
+channels: two named pipes and a text file.
+The text file can be accessed at any time using get_output().
+The first named pipe is used by _tail(), a function that runs in the
+background and reports new output from the child as it is produced.
+The second named pipe is used by a set of functions that read and parse
+output as requested by the user in an interactive manner, similar to
+pexpect.
+The server also receives input from the client and sends it to the child
+process.
+An instance of this class can be pickled. When unpickled it automatically
+resumes _tail() if needed.
+
+
+def __init__(self, command=None, id=None, termination_func=None,
+ output_func=None, output_prefix=, linesep=\n,
+ prompt=r[\#\$]\s*$, status_test_command=echo $?):
+
+Initialize the class and run command as a child process.
+
+@param command: Command that will be run, or None if accessing an
+already running process.
+@param id: id of an already running instance, if accessing a running
+instance, or None if starting a new one.
+@param termination_func: Function to call when the process exits. The
+function must accept a single exit status parameter.
+@param output_func: Function to call whenever a line of output is
+available from the STDOUT or STDERR streams of the process.
+The function must accept a single string parameter. The string
+does not include the final newline.
+@param output_prefix: String to prepend to lines sent to output_func.
+@param linesep: Line separator to be appended to strings sent to the
+child process using sendline().
+@param prompt: Regular expression to be used with read_up_to_prompt().
+@param status_test_command: Command to be used for getting the last
+exit status of commands run inside a shell (used by
+get_command_status_output() and 

[KVM-AUTOTEST PATCH 2/5] Modify kvm_vm and kvm_preprocessing to use the new kvm_subprocess module

2009-06-15 Thread Michael Goldish
Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_preprocessing.py |   17 +-
 client/tests/kvm/kvm_vm.py|  103 +
 2 files changed, 44 insertions(+), 76 deletions(-)

diff --git a/client/tests/kvm/kvm_preprocessing.py 
b/client/tests/kvm/kvm_preprocessing.py
index f60cfe8..7fac46a 100644
--- a/client/tests/kvm/kvm_preprocessing.py
+++ b/client/tests/kvm/kvm_preprocessing.py
@@ -1,7 +1,7 @@
 import sys, os, time, commands, re, logging
 from autotest_lib.client.bin import test
 from autotest_lib.client.common_lib import error
-import kvm_vm, kvm_utils
+import kvm_vm, kvm_utils, kvm_subprocess
 
 
 def preprocess_image(test, params):
@@ -167,17 +167,6 @@ def preprocess(test, params, env):
 @param params: A dict containing all VM and image parameters.
 @param env: The environment (a dict-like object).
 
-# Verify the identities of all living VMs
-for vm in env.values():
-if not kvm_utils.is_vm(vm):
-continue
-if vm.is_dead():
-continue
-if not vm.verify_process_identity():
-logging.debug(VM '%s' seems to have been replaced by another
-   process % vm.name)
-vm.pid = None
-
 # Destroy and remove VMs that are no longer needed in the environment
 requested_vms = kvm_utils.get_sub_dict_names(params, vms)
 for key in env.keys():
@@ -239,8 +228,8 @@ def postprocess(test, params, env):
 # Remove them all
 logging.debug('keep_ppm_files' not specified; removing all PPM files
from results dir...)
-kvm_utils.run_bg(rm -vf %s % os.path.join(test.debugdir, *.ppm),
-  None, logging.debug, (rm) , timeout=5.0)
+rm_cmd = rm -vf %s % os.path.join(test.debugdir, *.ppm)
+kvm_subprocess.run_fg(rm_cmd, logging.debug, (rm) , timeout=5.0)
 
 
 def postprocess_on_error(test, params, env):
diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py
index 5028161..9aea3fb 100644
--- a/client/tests/kvm/kvm_vm.py
+++ b/client/tests/kvm/kvm_vm.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 import time, socket, os, logging, fcntl
-import kvm_utils
+import kvm_utils, kvm_subprocess
 
 
 Utility classes and functions to handle Virtual Machine creation using qemu.
@@ -54,17 +54,22 @@ def create_image(params, qemu_img_path, image_dir):
 qemu_img_cmd +=  %s % size
 
 logging.debug(Running qemu-img command:\n%s % qemu_img_cmd)
-(status, pid, output) = kvm_utils.run_bg(qemu_img_cmd, None,
- logging.debug, (qemu-img) ,
- timeout=30)
+(status, output) = kvm_subprocess.run_fg(qemu_img_cmd, logging.debug,
+ (qemu-img) , timeout=30)
 
-if status:
-logging.debug(qemu-img exited with status %d % status)
-logging.error(Could not create image %s % image_filename)
+if status is None:
+logging.error(Timeout elapsed while waiting for qemu-img command 
+  to complete:\n%s % qemu_img_cmd)
+return None
+elif status != 0:
+logging.error(Could not create image; 
+  qemu-img command failed:\n%s % qemu_img_cmd)
+logging.error(Status: %s % status)
+logging.error(Output: + kvm_utils.format_str_for_message(output))
 return None
 if not os.path.exists(image_filename):
-logging.debug(Image file does not exist for some reason)
-logging.error(Could not create image %s % image_filename)
+logging.error(Image could not be created for some reason; 
+  qemu-img command:\n%s % qemu_img_cmd)
 return None
 
 logging.info(Image created in %s % image_filename)
@@ -106,7 +111,7 @@ class VM:
 @param image_dir: The directory where images reside
 @param iso_dir: The directory where ISOs reside
 
-self.pid = None
+self.process = None
 
 self.name = name
 self.params = params
@@ -153,28 +158,6 @@ class VM:
 return VM(name, params, qemu_path, image_dir, iso_dir)
 
 
-def verify_process_identity(self):
-
-Make sure .pid really points to the original qemu process. If .pid
-points to the same process that was created with the create method,
-or to a dead process, return True. Otherwise return False.
-
-if self.is_dead():
-return True
-filename = /proc/%d/cmdline % self.pid
-if not os.path.exists(filename):
-logging.debug(Filename %s does not exist % filename)
-return False
-file = open(filename)
-cmdline = file.read()
-file.close()
-if not self.qemu_path in cmdline:
-return False
-if not self.monitor_file_name in cmdline:
-return False
-return True
-
-
 

[KVM-AUTOTEST PATCH 3/5] Modify remote_login and remote_scp in kvm_utils to use kvm_subprocess

2009-06-15 Thread Michael Goldish
Also remove a reference to kvm_log that was left behind.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_utils.py |   13 -
 1 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py
index 4bc8dc7..09af62d 100644
--- a/client/tests/kvm/kvm_utils.py
+++ b/client/tests/kvm/kvm_utils.py
@@ -2,6 +2,7 @@ import md5, thread, subprocess, time, string, random, socket, 
os, signal, pty
 import select, re, logging
 from autotest_lib.client.bin import utils
 from autotest_lib.client.common_lib import error
+import kvm_subprocess
 
 
 KVM test utility functions.
@@ -635,7 +636,8 @@ def remote_login(command, password, prompt, linesep=\n, 
timeout=10):
 
 @return Return the kvm_spawn object on success and None on failure.
 
-sub = kvm_spawn(command, linesep)
+sub = kvm_subprocess.kvm_spawn(command)
+sub.set_linesep(linesep)
 sub.set_prompt(prompt)
 
 password_prompt_count = 0
@@ -670,7 +672,7 @@ def remote_login(command, password, prompt, linesep=\n, 
timeout=10):
 sub.close()
 return None
 elif match == 4:  # Connection refused
-kvm_log.debug(Got 'Connection refused')
+logging.debug(Got 'Connection refused')
 sub.close()
 return None
 elif match == 5:  # prompt
@@ -702,7 +704,7 @@ def remote_scp(command, password, timeout=300, 
login_timeout=10):
 
 @return: True if the transfer succeeds and False on failure.
 
-sub = kvm_spawn(command)
+sub = kvm_subprocess.kvm_spawn(command)
 
 password_prompt_count = 0
 _timeout = login_timeout
@@ -733,9 +735,10 @@ def remote_scp(command, password, timeout=300, 
login_timeout=10):
 sub.close()
 return False
 else:  # match == None
-logging.debug(Timeout or process terminated)
+logging.debug(Timeout elapsed or process terminated)
+status = sub.get_status()
 sub.close()
-return sub.poll() == 0
+return status == 0
 
 
 def scp_to_remote(host, port, username, password, local_path, remote_path,
-- 
1.5.4.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM-AUTOTEST PATCH 4/5] Modify run_autotest() in kvm_tests.py to use the new kvm_subprocess module.

2009-06-15 Thread Michael Goldish
Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_tests.py |9 +
 1 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/client/tests/kvm/kvm_tests.py b/client/tests/kvm/kvm_tests.py
index 54d2a7a..ffe9116 100644
--- a/client/tests/kvm/kvm_tests.py
+++ b/client/tests/kvm/kvm_tests.py
@@ -1,6 +1,6 @@
 import time, os, logging
 from autotest_lib.client.common_lib import utils, error
-import kvm_utils, ppm_utils, scan_results
+import kvm_utils, kvm_subprocess, ppm_utils, scan_results
 
 
 KVM test definitions.
@@ -237,15 +237,16 @@ def run_autotest(test, params, env):
 cmd +=  --exclude=*.pyc
 cmd +=  --exclude=*.svn
 cmd +=  --exclude=*.git
-kvm_utils.run_bg(cmd % (test.bindir, tarred_autotest_path), timeout=30)
+kvm_subprocess.run_fg(cmd % (test.bindir, tarred_autotest_path), 
timeout=30)
 
 # tar the contents of bindir/autotest/tests/test_name
 cmd = cd %s; tar cvjf %s %s/*
 cmd +=  --exclude=*.pyc
 cmd +=  --exclude=*.svn
 cmd +=  --exclude=*.git
-kvm_utils.run_bg(cmd % (os.path.join(test.bindir, autotest, tests),
-tarred_test_path, test_name), timeout=30)
+kvm_subprocess.run_fg(cmd %
+  (os.path.join(test.bindir, autotest, tests),
+   tarred_test_path, test_name), timeout=30)
 
 # Check if we need to copy autotest.tar.bz2
 copy = False
-- 
1.5.4.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM-AUTOTEST PATCH 5/5] Remove kvm_spawn and run_bg() from kvm_utils.py.

2009-06-15 Thread Michael Goldish
These are now provided by kvm_subprocess.py.

Signed-off-by: Michael Goldish mgold...@redhat.com
---
 client/tests/kvm/kvm_utils.py |  477 +
 1 files changed, 2 insertions(+), 475 deletions(-)

diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py
index 09af62d..0ddb7fc 100644
--- a/client/tests/kvm/kvm_utils.py
+++ b/client/tests/kvm/kvm_utils.py
@@ -231,390 +231,8 @@ def check_kvm_source_dir(source_dir):
 raise error.TestError(Unknown source dir layout, cannot proceed.)
 
 
-# The following are a class and functions used for SSH, SCP and Telnet
-# communication with guests.
-
-class kvm_spawn:
-
-This class is used for spawning and controlling a child process.
-
-
-def __init__(self, command, linesep=\n):
-
-Initialize the class and run command as a child process.
-
-@param command: Command that will be run.
-@param linesep: Line separator for the given platform.
-
-self.exitstatus = None
-self.linesep = linesep
-(pid, fd) = pty.fork()
-if pid == 0:
-os.execv(/bin/sh, [/bin/sh, -c, command])
-else:
-self.pid = pid
-self.fd = fd
-
-
-def set_linesep(self, linesep):
-
-Sets the line separator string (usually \\n).
-
-@param linesep: Line separator character.
-
-self.linesep = linesep
-
-
-def is_responsive(self, timeout=5.0):
-
-Return True if the session is responsive.
-
-Send a newline to the child process (e.g. SSH or Telnet) and read some
-output using read_nonblocking.
-If all is OK, some output should be available (e.g. the shell prompt).
-In that case return True. Otherwise return False.
-
-@param timeout: Timeout that will happen before we consider the
-process unresponsive
-
-self.read_nonblocking(timeout=0.1)
-self.sendline()
-output = self.read_nonblocking(timeout=timeout)
-if output.strip():
-return True
-return False
-
-
-def poll(self):
-
-If the process exited, return its exit status. Otherwise return None.
-The exit status is stored for use in subsequent calls.
-
-if self.exitstatus != None:
-return self.exitstatus
-pid, status = os.waitpid(self.pid, os.WNOHANG)
-if pid:
-self.exitstatus = os.WEXITSTATUS(status)
-return self.exitstatus
-else:
-return None
-
-
-def close(self):
-
-Close the session (close the process filedescriptors and kills the
-process ID), and return the exit status.
-
-try:
-os.close(self.fd)
-os.kill(self.pid, signal.SIGTERM)
-except OSError:
-pass
-return self.poll()
-
-
-def sendline(self, str=):
-
-Sends a string followed by a line separator to the child process.
-
-@param str: String that will be sent to the child process.
-
-try:
-os.write(self.fd, str + self.linesep)
-except OSError:
-pass
-
-
-def read_nonblocking(self, timeout=1.0):
-
-Read from child until there is nothing to read for timeout seconds.
-
-@param timeout: Time (seconds) of wait before we give up reading from
-the child process.
-
-data = 
-while True:
-r, w, x = select.select([self.fd], [], [], timeout)
-if self.fd in r:
-try:
-data += os.read(self.fd, 1024)
-except OSError:
-return data
-else:
-return data
-
-
-def match_patterns(self, str, patterns):
-
-Match str against a list of patterns.
-
-Return the index of the first pattern that matches a substring of str.
-None and empty strings in patterns are ignored.
-If no match is found, return None.
-
-@param patterns: List of strings (regular expression patterns).
-
-for i in range(len(patterns)):
-if not patterns[i]:
-continue
-exp = re.compile(patterns[i])
-if exp.search(str):
-return i
-
-
-def read_until_output_matches(self, patterns, filter=lambda(x):x,
-  timeout=30.0, internal_timeout=1.0,
-  print_func=None):
-
-Read using read_nonblocking until a match is found using 
match_patterns,
-or until timeout expires. Before attempting to search for a match, the
-data is filtered using the filter function provided.
-
-@brief: Read from child using read_nonblocking until a pattern
-matches.
-@param patterns: List of strings (regular 

Re: [PATCH] Activate Virtualization On Demand v2

2009-06-15 Thread Christoph Hellwig
On Mon, Jun 15, 2009 at 01:30:05PM +0200, Alexander Graf wrote:
 X86 CPUs need to have some magic happening to enable the virtualization
 extensions on them. This magic can result in unpleasant results for
 users, like blocking other VMMs from working (vmx) or using invalid TLB
 entries (svm).
 
 Currently KVM activates virtualization when the respective kernel module
 is loaded. This blocks us from autoloading KVM modules without breaking
 other VMMs.

That will only become interesting if we every have such a thing in
mainline.  So NACK, lots of complication for no good reason.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Activate Virtualization On Demand v2

2009-06-15 Thread Alexander Graf


On 15.06.2009, at 14:17, Christoph Hellwig wrote:


On Mon, Jun 15, 2009 at 01:30:05PM +0200, Alexander Graf wrote:
X86 CPUs need to have some magic happening to enable the  
virtualization

extensions on them. This magic can result in unpleasant results for
users, like blocking other VMMs from working (vmx) or using invalid  
TLB

entries (svm).

Currently KVM activates virtualization when the respective kernel  
module
is loaded. This blocks us from autoloading KVM modules without  
breaking

other VMMs.


That will only become interesting if we every have such a thing in
mainline.  So NACK, lots of complication for no good reason.



I don't want to fight political battles here. Seriously - we're out of  
kindergarden.
There are users out there who want to have VBox/VMware and kvm  
installed in parallel and can't have both kernel modules loaded at the  
same time. We're only hurting _our_ users, not the others if we keep  
people from having kvm*.ko loaded.


Sigh.

Alex
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Activate Virtualization On Demand v2

2009-06-15 Thread Christoph Hellwig
On Mon, Jun 15, 2009 at 02:25:01PM +0200, Alexander Graf wrote:
 I don't want to fight political battles here.


So stop that crap.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: Ignore reads to K7 EVNTSEL MSRs

2009-06-15 Thread Avi Kivity

On 06/15/2009 10:55 AM, Amit Shah wrote:

In commit 7fe29e0faacb650d31b9e9f538203a157bec821d we ignored the
reads to the P6 EVNTSEL MSRs. That fixed crashes on Intel machines.

Ignore the reads to K7 EVNTSEL MSRs as well to fix this on AMD
hosts.

This fixes Kaspersky antivirus crashing Windows guests on AMD hosts.
   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Anthony Liguori

Avi Kivity wrote:

On 06/15/2009 12:08 PM, Mark McLoughlin wrote:

This last option makes sense to me: in a real world the user has
control over where he places the device on the bus, so why
not with qemu?
 


Yep, most people seem to agree that it makes sense to allow this, but
some believe it should only be via a machine description file, not the
command line.
   


I don't understand this opposition.  It's clear a machine config file 
is a long way in our future.  It's also clear lack of stable PCI 
addresses hurts us now.


Is there opposition?  I don't ever recall seeing a patch...

I think it's a perfectly fine idea.

Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2803638 ] kvm-86: winxp guest + kvmnet.sys virtio unstable

2009-06-15 Thread SourceForge.net
Bugs item #2803638, was opened at 2009-06-09 19:45
Message generated for change (Comment added) made by thekozmo
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2803638group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Geoffrey Brimhall (vinyvat)
Assigned to: Nobody/Anonymous (nobody)
Summary: kvm-86: winxp guest + kvmnet.sys virtio unstable

Initial Comment:
* cpu model: Core i7 920
* kvm version: kvm 86
* host kernel version: 2.6.29.4 debian lenny
* host kernel arch: x86_64
* guest: winxp 32bit sp3
* qemu command line: qemu-system-x86_64 -boot c -hda ./winxp.qcow2 -cdrom 
winxp-sp3.iso -smp 2 -m 1024 -vga std -net 
nic,model=virtio,vlan=0,macaddr=00:15:60:51:09:BD -net 
tap,vlan=0,ifname=tap0,script=/etc/kvm/kvm-ifup -soundhw es1370 -localtime -k 
en-us -name  -usb

When using kvmnet.sys in guest winxp32, guest crashes ( blue-screen with 
IRQ_NOT_LESS_OR_EQUAL most common ) on complex web2.0 browser pages ( lots of 
ajax and form fields ). Switching back to default rtl8139 driver ( ie remove 
model=virtio from command line options ) gets system back to being very stable.

Reproduced crash using all permutations of kvm-85 and 2.6.26 kernel also.

--

Comment By: Dor Laor (thekozmo)
Date: 2009-06-15 15:43

Message:
Once we'll get authorization to release new windows drivers it will get
solved.


--

Comment By: Geoffrey Brimhall (vinyvat)
Date: 2009-06-09 20:32

Message:
Also have 6GB ram on system.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2803638group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Configuration vs. compat hints

2009-06-15 Thread Markus Armbruster
Avi Kivity a...@redhat.com writes:

 (adding cc)

 On 06/15/2009 02:35 PM, Markus Armbruster wrote:
 Not really.  QEMU gives just the host bridge a fixed slot[*].  All the
 other slots are available.


 qemu needs to export these two bits of information: the first free
 slot and the number of slots.

 More generally, which slots are open.  We can assume 1:31, but that's
 unlovely.

Point.

 The real problem is devices that get implicitly added, like the SCSI
 controller.  Those devices get their slots auto-assigned, which can
 interfere with slot numbers chosen by the user.  We need a way to avoid
 that, as you suggested elsewhere in this thread.


 Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=,
 and -drive pci_addr= (and later, -disk-controller)?  Stalling while
 waiting for the ultimate config file is only generating pain and
 out-of-tree patches.

Yup.

I got bit-rotten patches for pci_addr=, and I can unrot them if they're
wanted.

 (I'd be quite happy constructing the entire machine config on the
 command line, but I realize it's just me)

Ha, .bash_history as config file...
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Michael S. Tsirkin
On Mon, Jun 15, 2009 at 02:56:42PM +0300, Avi Kivity wrote:
 On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote:
 A slot needs to be configured in ACPI,
  

 Can we configure all possible 32 slots?


 That's what we do.  But one is always taken.  In the future, perhaps more.

 and not be taken by onboard chips
 (piix takes slot 0, for example).
  

 piix is the root complex, isn't it? Are there other examples? If not,
 we could teach management about the root complex being special ...


 We should just tell the user which slots are open.

This might be tricky if the config is passed in with the command line
flags.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Anthony Liguori

Avi Kivity wrote:

On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote:

On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote:
  

However, in order to retain compat for that SCSI device (e.g. ensuring
the PCI address doesn't change as other devices are added an removed),
we're back to the same problem ... either:

   1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to 
figure
  out what address to use, libvirt would need to query qemu for 
what

  address was originally allocated to device or it would do all the
  PCI address allocation itself ...
 


This last option makes sense to me: in a real world the user has
control over where he places the device on the bus, so why
not with qemu?
   


Yes, the user build the machine using the command line and monitor 
(or, in 2017, the machine configuration file),


Considering pbrook just posted a machine config for arm, I think it 
would be rather sad if pc wasn't converted to it by 2017...


then turns on the power.  Command line options are the parts lying 
around when we start.


btw, -drive needs to be separated:

  -controller type=lsi1234,pci_addr=foobar,name=blah
  -drive file=foo.img,controller=blah,index=0
  -drive file=bar.img,controller=blah,index=1

Drives to not have pci addresses.


Drivers don't have indexes and buses but we specify it on the -drive 
line.  -drive is convenient syntax.  It stops being convenient when you 
force it to be two options.


Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Anthony Liguori

Avi Kivity wrote:

On 06/15/2009 12:09 PM, Mark McLoughlin wrote:
I think the point is that you don't need version numbers if you 
have a

proper device tree.

 

How do you add a new attribute to the device tree and, when a supplied
device tree lacking said attribute, distinguish between a device tree
from an old version of qemu (i.e. use the old default) and a partial
device tree from the VM manager (i.e. use the new default) ?

   

-baseline 0.10
 


That's a version number :-)

(I was responding to Anthony's you don't need a version number)
   


If you want to prevent incompatibilities, you need to make everything 
new (potentially including bugfixes) non-default.  Eventually the 
default configuration becomes increasingly unusable and you need a new 
baseline.  You must still be able to fall back to the old baseline for 
older guests.  I don't think games with configuration files can hide 
that.


-M pc1
-M pc2

etc.

This is pretty easy to maintain with config files.

Regards,

Anthony Liguori


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Configuration vs. compat hints

2009-06-15 Thread Anthony Liguori

Markus Armbruster wrote:

Avi Kivity a...@redhat.com writes:

  

Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=,
and -drive pci_addr= (and later, -disk-controller)?  Stalling while
waiting for the ultimate config file is only generating pain and
out-of-tree patches.



Yup.

I got bit-rotten patches for pci_addr=, and I can unrot them if they're
wanted.
  


Yes, would be good to have patches on the list to discuss.  In 
principle, I have no objection to this.


Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Anthony Liguori

Avi Kivity wrote:

On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:

We should just tell the user which slots are open.
  
This might be tricky if the config is passed in with the command line

flags.
   


qemu -show-available-pci-slots


Why does the user care?

Let QEMU allocate the PCI slot, then query it to see what slot it 
assigned and remember that.


It's not a good idea to have management applications attempt to do PCI 
slot allocation.  For instance, one day we may decide to make virtio 
devices multi-function.


Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:

We should just tell the user which slots are open.
   


This might be tricky if the config is passed in with the command line
flags.
   


qemu -show-available-pci-slots

(the qemu equivalent to KVM_CHECK_EXTENSION)

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH v2 2/2] kvm: use POLLHUP to close an irqfd instead of an explicit ioctl

2009-06-15 Thread Michael S. Tsirkin
On Mon, Jun 15, 2009 at 08:08:18AM -0400, Gregory Haskins wrote:
  @@ -123,6 +124,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int
  sync, void
   *key)
   
  cleanup_srcu_struct(irqfd-srcu);
  kfree(irqfd);
  +   module_put(THIS_MODULE);
  }
   
  return 0;
  
 
  module_put(THIS_MODULE) is always a bug unless you know that someone has
  a reference to the current module: the module could go away between this
  call and returning from function.

 
 Hmm.  I understand what you are saying conceptually (i.e. the .text
 could get yanked before we hit the next line of code, in this case the
 return 0).  However, holding a reference when you _know_ someone else
 holds a reference to me says that one of the references is redundant. 
 In addition, there is certainly plenty of precedence for
 module_put(THIS_MODULE) all throughout the kernel (including
 module_put_and_exit()).  Are those broken as well?

Maybe not, but I don't know why. It works fine as long as you don't
unload any modules though :) Rusty, could you enlighten us please?


-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 03:41 PM, Anthony Liguori wrote:

Yep, most people seem to agree that it makes sense to allow this, but
some believe it should only be via a machine description file, not the
command line.


I don't understand this opposition.  It's clear a machine config file 
is a long way in our future.  It's also clear lack of stable PCI 
addresses hurts us now.



Is there opposition?  I don't ever recall seeing a patch...


Izik Eidus posted a patch (using a different syntax) in November 2007.



I think it's a perfectly fine idea.


Good.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 03:45 PM, Anthony Liguori wrote:

This last option makes sense to me: in a real world the user has
control over where he places the device on the bus, so why
not with qemu?


Yes, the user build the machine using the command line and monitor 
(or, in 2017, the machine configuration file),



Considering pbrook just posted a machine config for arm, I think it 
would be rather sad if pc wasn't converted to it by 2017...


I'd be sad too, but not surprised.

then turns on the power.  Command line options are the parts lying 
around when we start.


btw, -drive needs to be separated:

  -controller type=lsi1234,pci_addr=foobar,name=blah
  -drive file=foo.img,controller=blah,index=0
  -drive file=bar.img,controller=blah,index=1

Drives to not have pci addresses.


Drivers don't have indexes and buses but we specify it on the -drive 
line. 


Drives do have indexes.  On old parallel scsi drives you set the index 
by clicking a button on the back of the drive to cycle through scsi 
addresses 0-7.  An IDE drive's index is determined by the cable 
(master/slave).  A SATA drive's index is determined by which header on 
the motherboard the drive connects to.


If by bus you mean the if= parameter, then drives certainly do have 
buses.  Just try connecting the scsi drive from the previous paragraph 
to a USB port.


-drive is convenient syntax.  It stops being convenient when you force 
it to be two options.


controller= defaults to some builtin thing which autoinstantiates when 
necessary, so the old -drive syntax works.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints

2009-06-15 Thread Markus Armbruster
Anthony Liguori anth...@codemonkey.ws writes:

 Avi Kivity wrote:
 On 06/15/2009 12:08 PM, Mark McLoughlin wrote:
 This last option makes sense to me: in a real world the user has
 control over where he places the device on the bus, so why
 not with qemu?
  

 Yep, most people seem to agree that it makes sense to allow this, but
 some believe it should only be via a machine description file, not the
 command line.


 I don't understand this opposition.  It's clear a machine config
 file is a long way in our future.  It's also clear lack of stable
 PCI addresses hurts us now.

 Is there opposition?  I don't ever recall seeing a patch...

http://www.archivum.info/qemu-de...@nongnu.org/2009-01/msg01458.html

 I think it's a perfectly fine idea.

Off to dust off my patch series.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 03:52 PM, Anthony Liguori wrote:

Avi Kivity wrote:

On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:

We should just tell the user which slots are open.
  This might be tricky if the config is passed in with the command line
flags.


qemu -show-available-pci-slots


Why does the user care?

Let QEMU allocate the PCI slot, then query it to see what slot it 
assigned and remember that.


It's a roundabout way of doing things.

As an example, if you try to fit too many devices into the machine, you 
have to try to add all devices and watch for a qemu error.  If you know 
in advance how many slots you have, you never enter into that situation 
(and you need to show the limit to the user anyway).




It's not a good idea to have management applications attempt to do PCI 
slot allocation.  For instance, one day we may decide to make virtio 
devices multi-function.


Non-virtio, as well.  But we can't make that the default, so the user 
will have to specify this anyway.


Given that you can't hotunplug individual functions, the user will have 
to specify exactly how functions are aggregated into devices.  My 
recommendation would be for a GUI to allow the user to select a 'quad 
port virtio NIC' or 'dual port virtio scsi controller' rather than 
trying to do anything automatic.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 03:48 PM, Anthony Liguori wrote:

device tree lacking said attribute, distinguish between a device tree
from an old version of qemu (i.e. use the old default) and a partial
device tree from the VM manager (i.e. use the new default) ?


-baseline 0.10


That's a version number :-)

(I was responding to Anthony's you don't need a version number)


If you want to prevent incompatibilities, you need to make everything 
new (potentially including bugfixes) non-default.  Eventually the 
default configuration becomes increasingly unusable and you need a 
new baseline.  You must still be able to fall back to the old 
baseline for older guests.  I don't think games with configuration 
files can hide that.

How do you add a new attribute to the device tree and, when a supplied

-M pc1
-M pc2


Certainly preferable to -baseline.


This is pretty easy to maintain with config files.


Let's not tie the two together.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Gerd Hoffmann

  Hi,


Yes, the user build the machine using the command line and monitor
(or, in 2017, the machine configuration file),


Considering pbrook just posted a machine config for arm, I think it
would be rather sad if pc wasn't converted to it by 2017...


It shouldn't last until 2017, but the process isn't that trivial.
Some qemu code / control flow needs serious restruction until it is in a
state that creating the devices from a fdt can actually work.


Drivers don't have indexes and buses but we specify it on the -drive
line. -drive is convenient syntax. It stops being convenient when you
force it to be two options.


One more issue:  -drive also mixes host and guest configuration.  These 
must be separated too.


cheers,
  Gerd

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4] Add rudimentary Hyper-V guest support v3

2009-06-15 Thread Alexander Graf
Now that we have nested SVM in place, let's make use of it and virtualize
something non-kvm.
The first interesting target that came to my mind here was Hyper-V.

This patchset makes Windows Server 2008 boot with Hyper-V, which runs
the dom0 in virtualized mode already. It hangs somewhere in IDE code when
booted, so I haven't been able to run a second VM within for now yet.

Please keep in mind that Hyper-V won't work unless you apply the userspace
patches too and the PAT bit patch

---

v2 changes:
  - remove reserved PAT check patch (Avi will do this)
  - remove #PF inject on emulated_read
  - take comments from v1 into account (listed individually)

v3 changes:
  - forward-port to current git

Alexander Graf (4):
  Add definition for IGNNE MSR
  Implement Hyper-V MSRs v2
  Nested SVM: Implement INVLPGA v2
  Nested SVM: Improve interrupt injection v2

 arch/x86/include/asm/msr-index.h |1 +
 arch/x86/kvm/svm.c   |   59 +++--
 2 files changed, 44 insertions(+), 16 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] Add definition for IGNNE MSR

2009-06-15 Thread Alexander Graf
Hyper-V tried to access MSR_IGNNE, so let's at least have a definition
for it in our headers.

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/x86/include/asm/msr-index.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec41fc1..e273549 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -372,6 +372,7 @@
 /* AMD-V MSRs */
 
 #define MSR_VM_CR   0xc0010114
+#define MSR_VM_IGNNE0xc0010115
 #define MSR_VM_HSAVE_PA 0xc0010117
 
 #endif /* _ASM_X86_MSR_INDEX_H */
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Anthony Liguori

Avi Kivity wrote:

On 06/15/2009 03:45 PM, Anthony Liguori wrote:

This last option makes sense to me: in a real world the user has
control over where he places the device on the bus, so why
not with qemu?


Yes, the user build the machine using the command line and monitor 
(or, in 2017, the machine configuration file),



Considering pbrook just posted a machine config for arm, I think it 
would be rather sad if pc wasn't converted to it by 2017...


I'd be sad too, but not surprised.

then turns on the power.  Command line options are the parts lying 
around when we start.


btw, -drive needs to be separated:

  -controller type=lsi1234,pci_addr=foobar,name=blah
  -drive file=foo.img,controller=blah,index=0
  -drive file=bar.img,controller=blah,index=1

Drives to not have pci addresses.


Drivers don't have indexes and buses but we specify it on the -drive 
line. 


Drives do have indexes.  On old parallel scsi drives you set the index 
by clicking a button on the back of the drive to cycle through scsi 
addresses 0-7.  An IDE drive's index is determined by the cable 
(master/slave).  A SATA drive's index is determined by which header on 
the motherboard the drive connects to.


It's not at all that simple.   SCSI has a hierarchical address mechanism 
with 0-7 targets but then potentially multiple LUNs per target.  Today, 
we always emulate a single LUN per target but if we ever wanted to 
support more than 7 disks on a SCSI controller, we would have to add 
multiple LUN support too.  So the current linear unit= parameter is 
actually pretty broken for SCSI.


For IDE, it's a combination of bus, slot, and master/slave.  For virtio, 
it's just a PCI address.  What we really need is something that is more 
opaque and controller specific.  For instance, if we were going to do 
controllers...


-controller type=lsi1234,pci_addr=foobar,name=blah
-controller-disk controller=blah,target=0,lun=1,name=sda

-controller type=ide,pci_addr=barfoo,name=ide
-controller-disk controller=ide,slot=secondary,cable=slave,name=hdd

-drive file=foo.img,controller-disk=sda
-drive file=bar.img,controller-disk=hdd

And having -hdd file=foo.img be short-hand for -drive 
file=%s,controller-disk=%s.





If by bus you mean the if= parameter, then drives certainly do have 
buses.  Just try connecting the scsi drive from the previous paragraph 
to a USB port.


No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious what 
it should do to you that's because it isn't at all obvious :-)  It ends 
up skipping a predefined number of locations in the drive table.  This 
is pretty broken fundamentally because it assumes controllers always 
support a fixed number of devices.  Nothing really respects bus_id 
though so in practice, I assume it's almost universally broken.


Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] Implement Hyper-V MSRs

2009-06-15 Thread Alexander Graf
Hyper-V uses some MSRs, some of which are actually reserved for BIOS usage.

But let's be nice today and have it its way, because otherwise it fails
terribly.

v2 changes:
  - remove the 0x4081 MSR definition
  - add pr_unimpl() on unimplemented writes

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/x86/kvm/svm.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ef43a18..4b4eadd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2034,6 +2034,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned 
ecx, u64 data)
case MSR_VM_HSAVE_PA:
svm-hsave_msr = data;
break;
+   case MSR_VM_CR:
+   case MSR_VM_IGNNE:
+   case MSR_K8_HWCR:
+   pr_unimpl(vcpu, unimplemented wrmsr: 0x%x data 0x%llx\n, ecx, 
data);
+   break;
default:
return kvm_set_msr_common(vcpu, ecx, data);
}
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] Nested SVM: Implement INVLPGA

2009-06-15 Thread Alexander Graf
SVM adds another way to do INVLPG by ASID which Hyper-V makes use of,
so let's implement it!

For now we just do the same thing invlpg does, as asid switching
means we flush the mmu anyways. That might change one day though.

v2 makes invlpga do the same as invlpg, not flush the whole mmu

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/x86/kvm/svm.c |   15 ++-
 1 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4b4eadd..fa2a710 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1785,6 +1785,19 @@ static int clgi_interception(struct vcpu_svm *svm, 
struct kvm_run *kvm_run)
return 1;
 }
 
+static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+   struct kvm_vcpu *vcpu = svm-vcpu;
+   nsvm_printk(INVLPGA\n);
+
+   /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
+   kvm_mmu_invlpg(vcpu, vcpu-arch.regs[VCPU_REGS_RAX]);
+
+   svm-next_rip = kvm_rip_read(svm-vcpu) + 3;
+   skip_emulated_instruction(svm-vcpu);
+   return 1;
+}
+
 static int invalid_op_interception(struct vcpu_svm *svm,
   struct kvm_run *kvm_run)
 {
@@ -2130,7 +2143,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_INVD] = emulate_on_interception,
[SVM_EXIT_HLT]  = halt_interception,
[SVM_EXIT_INVLPG]   = invlpg_interception,
-   [SVM_EXIT_INVLPGA]  = invalid_op_interception,
+   [SVM_EXIT_INVLPGA]  = invlpga_interception,
[SVM_EXIT_IOIO] = io_interception,
[SVM_EXIT_MSR]  = msr_interception,
[SVM_EXIT_TASK_SWITCH]  = task_switch_interception,
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] Nested SVM: Improve interrupt injection

2009-06-15 Thread Alexander Graf
While trying to get Hyper-V running, I realized that the interrupt injection
mechanisms that are in place right now are not 100% correct.

This patch makes nested SVM's interrupt injection behave more like on a
real machine.

v2 calls BUG_ON when svm_set_irq is called with GIF=0

Signed-off-by: Alexander Graf ag...@suse.de
---
 arch/x86/kvm/svm.c |   39 ---
 1 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fa2a710..5b14c9d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1517,7 +1517,8 @@ static int nested_svm_vmexit_real(struct vcpu_svm *svm, 
void *arg1,
/* Kill any pending exceptions */
if (svm-vcpu.arch.exception.pending == true)
nsvm_printk(WARNING: Pending Exception\n);
-   svm-vcpu.arch.exception.pending = false;
+   kvm_clear_exception_queue(svm-vcpu);
+   kvm_clear_interrupt_queue(svm-vcpu);
 
/* Restore selected save entries */
svm-vmcb-save.es = hsave-save.es;
@@ -1585,7 +1586,8 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void 
*arg1,
svm-nested_vmcb = svm-vmcb-save.rax;
 
/* Clear internal status */
-   svm-vcpu.arch.exception.pending = false;
+   kvm_clear_exception_queue(svm-vcpu);
+   kvm_clear_interrupt_queue(svm-vcpu);
 
/* Save the old vmcb, so we don't need to pick what we save, but
   can restore everything when a VMEXIT occurs */
@@ -2277,21 +2279,14 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, 
int irq)
((/*control-int_vector  4*/ 0xf)  V_INTR_PRIO_SHIFT);
 }
 
-static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
-{
-   struct vcpu_svm *svm = to_svm(vcpu);
-
-   svm-vmcb-control.event_inj = nr |
-   SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
-}
-
 static void svm_set_irq(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
 
-   nested_svm_intr(svm);
+   BUG_ON(!(svm-vcpu.arch.hflags  HF_GIF_MASK));
 
-   svm_queue_irq(vcpu, vcpu-arch.interrupt.nr);
+   svm-vmcb-control.event_inj = vcpu-arch.interrupt.nr |
+   SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
@@ -2319,13 +2314,25 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
struct vmcb *vmcb = svm-vmcb;
return (vmcb-save.rflags  X86_EFLAGS_IF) 
!(vmcb-control.int_state  SVM_INTERRUPT_SHADOW_MASK) 
-   (svm-vcpu.arch.hflags  HF_GIF_MASK);
+   (svm-vcpu.arch.hflags  HF_GIF_MASK) 
+   !is_nested(svm);
 }
 
 static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
-   svm_set_vintr(to_svm(vcpu));
-   svm_inject_irq(to_svm(vcpu), 0x0);
+   struct vcpu_svm *svm = to_svm(vcpu);
+   nsvm_printk(Trying to open IRQ window\n);
+
+   nested_svm_intr(svm);
+
+   /* In case GIF=0 we can't rely on the CPU to tell us when
+* GIF becomes 1, because that's a separate STGI/VMRUN intercept.
+* The next time we get that intercept, this function will be
+* called again though and we'll get the vintr intercept. */
+   if (svm-vcpu.arch.hflags  HF_GIF_MASK) {
+   svm_set_vintr(svm);
+   svm_inject_irq(svm, 0x0);
+   }
 }
 
 static void enable_nmi_window(struct kvm_vcpu *vcpu)
@@ -2393,6 +2400,8 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
case SVM_EXITINTINFO_TYPE_EXEPT:
/* In case of software exception do not reinject an exception
   vector, but re-execute and instruction instead */
+   if (is_nested(svm))
+   break;
if (kvm_exception_is_soft(vector))
break;
if (exitintinfo  SVM_EXITINTINFO_VALID_ERR) {
-- 
1.6.0.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/8] Small cpu loop cleanups

2009-06-15 Thread Gleb Natapov
Also fix info cpus to show correct halt status with in kernel irq chip.

I removed patch that uses on_vcpu() for init/sipi handling for now.

Gleb Natapov (8):
  env-kvm_cpu_state.init is always zero here.
  Do not use env-halted to decide where halted state should be
handled.
  Call kvm_arch_load_regs() instead of kvm_load_registers()
  Rename kvm_(load|save)_mpstate to kvm_arch_(load|save)_mpstate
  Retrieve mp state info in cpu_synchronize_state()
  env-exception_index is not used by kvm code.
  s-cpu_env cannot be zero here.
  env-exit_request is not used by kvm.

 hw/apic.c |6 ++
 qemu-kvm-ia64.c   |6 ++
 qemu-kvm-x86.c|9 +++--
 qemu-kvm.c|   39 +++
 qemu-kvm.h|4 
 target-i386/machine.c |6 +++---
 target-ia64/machine.c |4 ++--
 vl.c  |1 -
 8 files changed, 47 insertions(+), 28 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/8] Rename kvm_(load|save)_mpstate to kvm_arch_(load|save)_mpstate

2009-06-15 Thread Gleb Natapov
To be consistent with other function naming.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 qemu-kvm-ia64.c   |4 ++--
 qemu-kvm-x86.c|4 ++--
 qemu-kvm.h|2 ++
 target-i386/machine.c |6 +++---
 target-ia64/machine.c |4 ++--
 5 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c
index d33c1c3..234602c 100644
--- a/qemu-kvm-ia64.c
+++ b/qemu-kvm-ia64.c
@@ -98,7 +98,7 @@ void kvm_arch_update_guest_debug(CPUState *env, struct 
kvm_guest_debug *dbg)
 {
 }
 
-void kvm_save_mpstate(CPUState *env)
+void kvm_arch_save_mpstate(CPUState *env)
 {
 #ifdef KVM_CAP_MP_STATE
 int r;
@@ -112,7 +112,7 @@ void kvm_save_mpstate(CPUState *env)
 #endif
 }
 
-void kvm_load_mpstate(CPUState *env)
+void kvm_arch_load_mpstate(CPUState *env)
 {
 #ifdef KVM_CAP_MP_STATE
 struct kvm_mp_state mp_state = { .mp_state = env-mp_state };
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 729d600..8e6fb75 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -295,7 +295,7 @@ void kvm_load_tsc(CPUState *env)
 perror(kvm_set_tsc FAILED.\n);
 }
 
-void kvm_save_mpstate(CPUState *env)
+void kvm_arch_save_mpstate(CPUState *env)
 {
 #ifdef KVM_CAP_MP_STATE
 int r;
@@ -309,7 +309,7 @@ void kvm_save_mpstate(CPUState *env)
 #endif
 }
 
-void kvm_load_mpstate(CPUState *env)
+void kvm_arch_load_mpstate(CPUState *env)
 {
 #ifdef KVM_CAP_MP_STATE
 struct kvm_mp_state mp_state = { .mp_state = env-mp_state };
diff --git a/qemu-kvm.h b/qemu-kvm.h
index fa40542..3b73fe9 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -69,6 +69,8 @@ int kvm_arch_qemu_create_context(void);
 
 void kvm_arch_save_regs(CPUState *env);
 void kvm_arch_load_regs(CPUState *env);
+void kvm_arch_load_mpstate(CPUState *env);
+void kvm_arch_save_mpstate(CPUState *env);
 int kvm_arch_qemu_init_env(CPUState *cenv);
 void kvm_arch_pre_kvm_run(void *opaque, CPUState *env);
 void kvm_arch_post_kvm_run(void *opaque, CPUState *env);
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 07df1e1..14942c0 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -34,7 +34,7 @@ void cpu_save(QEMUFile *f, void *opaque)
 
 if (kvm_enabled()) {
 kvm_save_registers(env);
-kvm_save_mpstate(env);
+kvm_arch_save_mpstate(env);
 }
 
 for(i = 0; i  CPU_NB_REGS; i++)
@@ -369,12 +369,12 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
 kvm_load_tsc(env);
 if (version_id = 5) {
 qemu_get_be32s(f, env-mp_state);
-kvm_load_mpstate(env);
+kvm_arch_load_mpstate(env);
 }
 } else {
 kvm_load_registers(env);
 kvm_load_tsc(env);
-kvm_load_mpstate(env);
+kvm_arch_load_mpstate(env);
 }
 }
 return 0;
diff --git a/target-ia64/machine.c b/target-ia64/machine.c
index dd205c5..70ef379 100644
--- a/target-ia64/machine.c
+++ b/target-ia64/machine.c
@@ -10,7 +10,7 @@ void cpu_save(QEMUFile *f, void *opaque)
 
 if (kvm_enabled()) {
 kvm_save_registers(env);
-kvm_save_mpstate(env);
+kvm_arch_save_mpstate(env);
 }
 }
 
@@ -20,7 +20,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
 
 if (kvm_enabled()) {
 kvm_load_registers(env);
-kvm_load_mpstate(env);
+kvm_arch_load_mpstate(env);
 }
 return 0;
 }
-- 
1.6.2.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/8] Call kvm_arch_load_regs() instead of kvm_load_registers()

2009-06-15 Thread Gleb Natapov
The call is done from vcpu thread.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 qemu-kvm.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 7676e02..5fa7154 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -399,7 +399,7 @@ static int kvm_main_loop_cpu(CPUState *env)
 #endif
 
 cpu_single_env = env;
-kvm_load_registers(env);
+kvm_arch_load_regs(env);
 
 while (1) {
while (!has_work(env))
-- 
1.6.2.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/8] env-kvm_cpu_state.init is always zero here.

2009-06-15 Thread Gleb Natapov

Signed-off-by: Gleb Natapov g...@redhat.com
---
 qemu-kvm.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 2aeb17c..ec911ef 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -412,7 +412,7 @@ static int kvm_main_loop_cpu(CPUState *env)
if (env-kvm_cpu_state.sipi_needed)
update_regs_for_sipi(env);
 }
-   if (!env-halted  !env-kvm_cpu_state.init)
+   if (!env-halted)
kvm_cpu_exec(env);
env-exit_request = 0;
 env-exception_index = EXCP_INTERRUPT;
-- 
1.6.2.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/8] env-exit_request is not used by kvm.

2009-06-15 Thread Gleb Natapov
Remove its use from kvm code.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 qemu-kvm-x86.c |3 +--
 qemu-kvm.c |3 ---
 vl.c   |1 -
 3 files changed, 1 insertions(+), 6 deletions(-)

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 6865385..5460136 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -634,8 +634,7 @@ void kvm_arch_post_kvm_run(void *opaque, CPUState *env)
 
 int kvm_arch_has_work(CPUState *env)
 {
-if (env-exit_request ||
-((env-interrupt_request  CPU_INTERRUPT_HARD) 
+if (((env-interrupt_request  CPU_INTERRUPT_HARD) 
 (env-eflags  IF_MASK)) ||
(env-interrupt_request  CPU_INTERRUPT_NMI))
return 1;
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 2930a1d..bbdb03a 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -168,8 +168,6 @@ static int kvm_pre_run(void *opaque, void *data)
 
 kvm_arch_pre_kvm_run(opaque, env);
 
-if (env-exit_request)
-   return 1;
 pthread_mutex_unlock(qemu_mutex);
 return 0;
 }
@@ -441,7 +439,6 @@ static int kvm_main_loop_cpu(CPUState *env)
 }
if (!env-halted || kvm_irqchip_in_kernel(kvm_context))
kvm_cpu_exec(env);
-   env-exit_request = 0;
kvm_main_loop_wait(env, 0);
 }
 pthread_mutex_unlock(qemu_mutex);
diff --git a/vl.c b/vl.c
index 845ed54..c08299c 100644
--- a/vl.c
+++ b/vl.c
@@ -3724,7 +3724,6 @@ void qemu_system_reset_request(void)
 }
 if (cpu_single_env) {
 qemu_kvm_cpu_stop(cpu_single_env);
-cpu_exit(cpu_single_env);
 }
 qemu_notify_event();
 }
-- 
1.6.2.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/8] Retrieve mp state info in cpu_synchronize_state()

2009-06-15 Thread Gleb Natapov
And set env-halted based on the value to show accurate vcpu state in
QEMU monitor.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 qemu-kvm.c |   27 +++
 qemu-kvm.h |2 ++
 2 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 5fa7154..3ae4b45 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -200,6 +200,33 @@ void kvm_save_registers(CPUState *env)
 on_vcpu(env, kvm_do_save_registers, env);
 }
 
+static void kvm_do_load_mpstate(void *_env)
+{
+CPUState *env = _env;
+
+kvm_arch_load_mpstate(env);
+}
+
+void kvm_load_mpstate(CPUState *env)
+{
+if (kvm_enabled()  qemu_system_ready)
+on_vcpu(env, kvm_do_load_mpstate, env);
+}
+
+static void kvm_do_save_mpstate(void *_env)
+{
+CPUState *env = _env;
+
+kvm_arch_save_mpstate(env);
+env-halted = (env-mp_state == KVM_MP_STATE_HALTED);
+}
+
+void kvm_save_mpstate(CPUState *env)
+{
+if (kvm_enabled())
+on_vcpu(env, kvm_do_save_mpstate, env);
+}
+
 int kvm_cpu_exec(CPUState *env)
 {
 int r;
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 3b73fe9..22452e9 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -221,11 +221,13 @@ static inline int kvm_sync_vcpus(void) { return 0; }
 static inline void kvm_arch_get_registers(CPUState *env)
 {
 kvm_save_registers(env);
+kvm_save_mpstate(env);
 }
 
 static inline void kvm_arch_put_registers(CPUState *env)
 {
 kvm_load_registers(env);
+kvm_load_mpstate(env);
 }
 
 static inline void cpu_synchronize_state(CPUState *env, int modified)
-- 
1.6.2.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/8] env-exception_index is not used by kvm code.

2009-06-15 Thread Gleb Natapov

Signed-off-by: Gleb Natapov g...@redhat.com
---
 qemu-kvm-ia64.c |2 --
 qemu-kvm-x86.c  |2 --
 qemu-kvm.c  |1 -
 3 files changed, 0 insertions(+), 5 deletions(-)

diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c
index 234602c..477d24c 100644
--- a/qemu-kvm-ia64.c
+++ b/qemu-kvm-ia64.c
@@ -35,7 +35,6 @@ int kvm_arch_halt(void *opaque, kvm_vcpu_context_t vcpu)
 {
 CPUState *env = cpu_single_env;
 env-hflags |= HF_HALTED_MASK;
-env-exception_index = EXCP_HLT;
 return 1;
 }
 
@@ -135,7 +134,6 @@ void kvm_arch_cpu_reset(CPUState *env)
 } else {
env-interrupt_request = ~CPU_INTERRUPT_HARD;
env-halted = 1;
-   env-exception_index = EXCP_HLT;
 }
 }
 
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 8e6fb75..6865385 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -611,7 +611,6 @@ int kvm_arch_halt(void *opaque, kvm_vcpu_context_t vcpu)
  (env-eflags  IF_MASK)) 
!(env-interrupt_request  CPU_INTERRUPT_NMI)) {
 env-halted = 1;
-   env-exception_index = EXCP_HLT;
 }
 return 1;
 }
@@ -707,7 +706,6 @@ void kvm_arch_cpu_reset(CPUState *env)
} else {
env-interrupt_request = ~CPU_INTERRUPT_HARD;
env-halted = 1;
-   env-exception_index = EXCP_HLT;
}
 }
 }
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 3ae4b45..2930a1d 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -442,7 +442,6 @@ static int kvm_main_loop_cpu(CPUState *env)
if (!env-halted || kvm_irqchip_in_kernel(kvm_context))
kvm_cpu_exec(env);
env-exit_request = 0;
-env-exception_index = EXCP_INTERRUPT;
kvm_main_loop_wait(env, 0);
 }
 pthread_mutex_unlock(qemu_mutex);
-- 
1.6.2.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/8] s-cpu_env cannot be zero here.

2009-06-15 Thread Gleb Natapov
Remove redundant check.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 hw/apic.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index f186202..eac54fd 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -471,8 +471,7 @@ static void apic_init_ipi(APICState *s)
 s-cpu_env-halted = 1;
 
 if (kvm_enabled()  !qemu_kvm_irqchip_in_kernel())
-   if (s-cpu_env)
-   kvm_apic_init(s-cpu_env);
+kvm_apic_init(s-cpu_env);
 }
 
 /* send a SIPI message to the CPU to start it */
-- 
1.6.2.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Anthony Liguori

Avi Kivity wrote:

On 06/15/2009 03:52 PM, Anthony Liguori wrote:

Avi Kivity wrote:

On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:

We should just tell the user which slots are open.
  This might be tricky if the config is passed in with the command 
line

flags.


qemu -show-available-pci-slots


Why does the user care?

Let QEMU allocate the PCI slot, then query it to see what slot it 
assigned and remember that.


It's a roundabout way of doing things.


Having libvirt do PCI slot allocation scares me.  It assumes we can 
return a whitelist of available slots, and then let libvirt just 
randomly assign things.  There's knowledge though in slot assignment 
that's board-specific.  For instance, depending on how many LNK lines 
you have, you may want to put things in slots in such a way to optimize 
interrupt balancing or something like that.


Some platforms have quirks about expecting a particular slot to have a 
particular device.  It's still an optimal device but it has to be in 
that slot.  You can't really express that via an available slot list.


Non-virtio, as well.  But we can't make that the default, so the user 
will have to specify this anyway.


Given that you can't hotunplug individual functions, the user will 
have to specify exactly how functions are aggregated into devices.  My 
recommendation would be for a GUI to allow the user to select a 'quad 
port virtio NIC' or 'dual port virtio scsi controller' rather than 
trying to do anything automatic.


Yeah, I haven't thought much about that.

Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Anthony Liguori

Avi Kivity wrote:


Certainly preferable to -baseline.


This is pretty easy to maintain with config files.


Let's not tie the two together.


I mentioned it because it suggests a good transition.  We at least have 
to think through how things map to the post-config file world regardless 
of whether that's a few months from now or a decade :-)


Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Autotest] [KVM-AUTOTEST PATCH 1/4] Make all programs on kvm test use /usr/bin/python

2009-06-15 Thread Alexey Eromenko

- Martin Bligh mbl...@google.com wrote:

 On Wed, Jun 10, 2009 at 4:01 AM, Alexey Eromenkoaerom...@redhat.com
 wrote:
 
  Even better would be to use /usr/bin/python2.
 
 That doesn't seem to exist, on Ubuntu at least.
 

Red Hat systems have it. /usr/bin/python2 is a symlink to /usr/bin/python 
(which is python2 executable)

Is there any Ubuntu-compatible way of achieving this?

-Alexey
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 04:20 PM, Anthony Liguori wrote:


then turns on the power.  Command line options are the parts lying 
around when we start.


btw, -drive needs to be separated:

  -controller type=lsi1234,pci_addr=foobar,name=blah
  -drive file=foo.img,controller=blah,index=0
  -drive file=bar.img,controller=blah,index=1

Drives to not have pci addresses.


Drivers don't have indexes and buses but we specify it on the -drive 
line. 


Drives do have indexes.  On old parallel scsi drives you set the 
index by clicking a button on the back of the drive to cycle through 
scsi addresses 0-7.  An IDE drive's index is determined by the cable 
(master/slave).  A SATA drive's index is determined by which header 
on the motherboard the drive connects to.



It's not at all that simple.   SCSI has a hierarchical address 
mechanism with 0-7 targets but then potentially multiple LUNs per 
target.  Today, we always emulate a single LUN per target but if we 
ever wanted to support more than 7 disks on a SCSI controller, we 
would have to add multiple LUN support too.  So the current linear 
unit= parameter is actually pretty broken for SCSI.


Well, another level in the hierarchy, but I don't think it materially 
changes things.




For IDE, it's a combination of bus, slot, and master/slave.  For 
virtio, it's just a PCI address.  What we really need is something 
that is more opaque and controller specific. 


virtio also has a bus (did you mean the pci bus for IDE?), master/slave 
is the index.  virtio doesn't have index, but IMO that was a mistake and 
we should have designed it as a disk controller in the first place.



For instance, if we were going to do controllers...

-controller type=lsi1234,pci_addr=foobar,name=blah
-controller-disk controller=blah,target=0,lun=1,name=sda

-controller type=ide,pci_addr=barfoo,name=ide
-controller-disk controller=ide,slot=secondary,cable=slave,name=hdd

-drive file=foo.img,controller-disk=sda
-drive file=bar.img,controller-disk=hdd

And having -hdd file=foo.img be short-hand for -drive 
file=%s,controller-disk=%s.


Yeah.




If by bus you mean the if= parameter, then drives certainly do have 
buses.  Just try connecting the scsi drive from the previous 
paragraph to a USB port.


No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
what it should do to you that's because it isn't at all obvious :-)  
It ends up skipping a predefined number of locations in the drive 
table.  This is pretty broken fundamentally because it assumes 
controllers always support a fixed number of devices.  Nothing really 
respects bus_id though so in practice, I assume it's almost 
universally broken.


Isn't the drive table something totally internal?  And how does bus= 
relate to it?


Confused.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Anthony Liguori

Avi Kivity wrote:

On 06/15/2009 04:20 PM, Anthony Liguori wrote:
It's not at all that simple.   SCSI has a hierarchical address 
mechanism with 0-7 targets but then potentially multiple LUNs per 
target.  Today, we always emulate a single LUN per target but if we 
ever wanted to support more than 7 disks on a SCSI controller, we 
would have to add multiple LUN support too.  So the current linear 
unit= parameter is actually pretty broken for SCSI.


Well, another level in the hierarchy, but I don't think it materially 
changes things.


Depends on whether you expect to say index=0,lun=3 or index=3.  If you 
mean the later, then it's quite conceivable that each target supports 
less than the maximum number of LUNs.  This makes things pretty 
confusing to the user because they have to know that in the current 
implementation, index=0 is valid, index=1 isn't, but index=8 is.


No, I meant drive file=foo.img,bus=3.  If that doesn't seem obvious 
what it should do to you that's because it isn't at all obvious :-)  
It ends up skipping a predefined number of locations in the drive 
table.  This is pretty broken fundamentally because it assumes 
controllers always support a fixed number of devices.  Nothing really 
respects bus_id though so in practice, I assume it's almost 
universally broken.


Isn't the drive table something totally internal?  And how does bus= 
relate to it?


The reality of unit=X,bus=Y,if=Z is that they expand to:

drive_table_index=Y*max_devs[Z] + X

Whereas max_devs = {ide:4, scsi: 7, *:0}

How drive_table_index is interpreted is if specific.  For if=scsi, 
each lsi device gets a base drive table index that starts at bus_index * 
7.  For virtio, the first empty spot in drive_table results in no more 
drives being created.


It's broken by design.

Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 04:23 PM, Anthony Liguori wrote:

Avi Kivity wrote:

On 06/15/2009 03:52 PM, Anthony Liguori wrote:

Avi Kivity wrote:

On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote:

We should just tell the user which slots are open.
  This might be tricky if the config is passed in with the command 
line

flags.


qemu -show-available-pci-slots


Why does the user care?

Let QEMU allocate the PCI slot, then query it to see what slot it 
assigned and remember that.


It's a roundabout way of doing things.


Having libvirt do PCI slot allocation scares me.  It assumes we can 
return a whitelist of available slots, and then let libvirt just 
randomly assign things.  There's knowledge though in slot assignment 
that's board-specific.  For instance, depending on how many LNK lines 
you have, you may want to put things in slots in such a way to 
optimize interrupt balancing or something like that.


How would qemu know which slots to optimize for?

In practice, I don't see that as a real problem.  We should (a) add an 
ioapic and four more pci links (b) recommend that slots be assigned in 
ascending order, and everything works.


I don't see your concern about libvirt allocating slots.  If a human can 
plug a card into a slot, so can libvirt.  Doing an interactive 
back-and-forth (equivalent to plugging a card while blindfolded, then 
looking to see which slot we hit) is certainly more difficult.


Some platforms have quirks about expecting a particular slot to have a 
particular device.  It's still an optimal device but it has to be in 
that slot.  You can't really express that via an available slot list.


I'll be surprised if we ever measure different dma speeds on different 
slots in the qemu virtual pci bus.  If we do, we'll find a way to 
express them:


  $ qemu -print-pci
  slot 0:01: available 33MHz
  slot 0:02: available 33MHz
  slot 0:03: available 66MHz

I feel a little silly typing this.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]

2009-06-15 Thread Avi Kivity

On 06/15/2009 04:24 PM, Anthony Liguori wrote:

Avi Kivity wrote:


Certainly preferable to -baseline.


This is pretty easy to maintain with config files.


Let's not tie the two together.


I mentioned it because it suggests a good transition.  We at least 
have to think through how things map to the post-config file world 
regardless of whether that's a few months from now or a decade :-)


Sure, it's good both from the transitional point of view and in its own 
right.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] kvm-s390: streamline memslot handling - rebased v2

2009-06-15 Thread ehrhardt
From: Christian Ehrhardt ehrha...@linux.vnet.ibm.com

As requested this is a rebased patch on top of the already applied v3
of the patch series.

*updates to applied version*
- remove dependency to KVM_REQ_MMU_RELOAD in generic code
- remove explicit barrier after test_and_clear_bit as it is implied
- ensure the wait_on_bit waiter is notified
- ensure dropping vcpu all requests while freeing a vcpu
- kickout only scheduled vcpus (its superfluous and wait might hang forever on
  not running vcpus)
- kvm_arch_set_memory_region waits until the bit is consumed by the vcpu

This patch relocates the variables kvm-s390 uses to track guest mem addr/size.
As discussed dropping the variables at struct kvm_arch level allows to use the
common vcpu-request based mechanism to reload guest memory if e.g. changes
via set_memory_region.
The kick mechanism introduced in this series is used to ensure running vcpus
leave guest state to catch the update.

Signed-off-by: Christian Ehrhardt ehrha...@linux.vnet.ibm.com
---

[diffstat]
 arch/s390/kvm/kvm-s390.c |   27 ---
 arch/s390/kvm/kvm-s390.h |6 ++
 virt/kvm/kvm_main.c  |6 ++
 3 files changed, 32 insertions(+), 7 deletions(-)

Index: kvm/arch/s390/kvm/kvm-s390.c
===
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -674,6 +674,12 @@ long kvm_arch_vcpu_ioctl(struct file *fi
return -EINVAL;
 }
 
+static int wait_bit_schedule(void *word)
+{
+   schedule();
+   return 0;
+}
+
 /* Section: memory related */
 int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
@@ -681,6 +687,7 @@ int kvm_arch_set_memory_region(struct kv
int user_alloc)
 {
int i;
+   struct kvm_vcpu *vcpu;
 
/* A few sanity checks. We can have exactly one memory slot which has
   to start at guest virtual zero and which has to be located at a
@@ -706,13 +713,19 @@ int kvm_arch_set_memory_region(struct kv
 
/* request update of sie control block for all available vcpus */
for (i = 0; i  KVM_MAX_VCPUS; ++i) {
-   if (kvm-vcpus[i]) {
-   if (test_and_set_bit(KVM_REQ_MMU_RELOAD,
-   kvm-vcpus[i]-requests))
-   continue;
-   kvm_s390_inject_sigp_stop(kvm-vcpus[i],
- ACTION_VCPUREQUEST_ON_STOP);
-   }
+   vcpu = kvm-vcpus[i];
+   if (!vcpu)
+   continue;
+
+   if (!test_and_set_bit(KVM_REQ_MMU_RELOAD, vcpu-requests))
+   continue;
+
+   if (vcpu-cpu == -1)
+   continue;
+
+   kvm_s390_inject_sigp_stop(vcpu, ACTION_VCPUREQUEST_ON_STOP);
+   wait_on_bit(vcpu-requests, KVM_REQ_MMU_RELOAD,
+   wait_bit_schedule, TASK_UNINTERRUPTIBLE);
}
 
return 0;
Index: kvm/arch/s390/kvm/kvm-s390.h
===
--- kvm.orig/arch/s390/kvm/kvm-s390.h
+++ kvm/arch/s390/kvm/kvm-s390.h
@@ -92,6 +92,12 @@ static inline unsigned long kvm_s390_han
if (!vcpu-requests)
return 0;
 
+   /* requests that can be handled at all levels */
+   if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, vcpu-requests)) {
+   wake_up_bit(vcpu-requests, KVM_REQ_MMU_RELOAD);
+   kvm_s390_vcpu_set_mem(vcpu);
+   }
+
return vcpu-requests;
 }
 
Index: kvm/virt/kvm/kvm_main.c
===
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -1681,6 +1681,12 @@ static int kvm_vcpu_mmap(struct file *fi
 static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 {
struct kvm_vcpu *vcpu = filp-private_data;
+   int i;
+
+   vcpu-requests = 0;
+   smp_mb();
+   for (i = 0; i  sizeof(vcpu-requests); i++)
+   wake_up_bit(vcpu-requests, i);
 
kvm_put_kvm(vcpu-kvm);
return 0;
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] kvm-s390: update vcpu-cpu - rebased

2009-06-15 Thread ehrhardt
From: Christian Ehrhardt ehrha...@linux.vnet.ibm.com

As requested this is a rebased patch on top of the already applied v3
of the patch series.

kvm on s390 formerly ignored vcpu-cpu.
This patch adds set/unset vcpu-cpu in kvm_arch_vcpu_load/put to allow
further architecture unification e.g. let generic code not find -1 on
currently scheduled vcpus.

Signed-off-by: Christian Ehrhardt ehrha...@linux.vnet.ibm.com
---

[diffstat]
 kvm-s390.c |2 ++
 1 file changed, 2 insertions(+)

[diff]
Index: kvm/arch/s390/kvm/kvm-s390.c
===
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -244,6 +244,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcp
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+   vcpu-cpu = cpu;
save_fp_regs(vcpu-arch.host_fpregs);
save_access_regs(vcpu-arch.host_acrs);
vcpu-arch.guest_fpregs.fpc = FPC_VALID_MASK;
@@ -253,6 +254,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu 
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+   vcpu-cpu = -1;
save_fp_regs(vcpu-arch.guest_fpregs);
save_access_regs(vcpu-arch.guest_acrs);
restore_fp_regs(vcpu-arch.host_fpregs);
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >