[COMMIT master] Fix broken indentation in kvm_main_loop_cpu()
From: Avi Kivity a...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index 0f7adcc..eefca61 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1914,12 +1914,12 @@ static int kvm_main_loop_cpu(CPUState *env) kvm_main_loop_wait(env, 1000); if (env-interrupt_request (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI)) env-halted = 0; -if (!kvm_irqchip_in_kernel(kvm_context)) { +if (!kvm_irqchip_in_kernel(kvm_context)) { if (env-kvm_cpu_state.init) update_regs_for_init(env); if (env-kvm_cpu_state.sipi_needed) update_regs_for_sipi(env); -} +} if (!env-halted !env-kvm_cpu_state.init) kvm_cpu_exec(env); env-exit_request = 0; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Return to userspace on emulation failure
From: Avi Kivity a...@redhat.com Instead of mindlessly retrying to execute the instruction, report the failure to userspace. Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3f5dc35..7e9b29d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2649,8 +2649,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) ++vcpu-stat.mmio_exits; return 0; case EMULATE_FAIL: - kvm_report_emulation_failure(vcpu, pagetable); - return 1; + vcpu-run-exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu-run-internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + return 0; default: BUG(); } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index c7611ef..38ff31e 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -95,6 +95,10 @@ struct kvm_pit_config { #define KVM_EXIT_S390_RESET 14 #define KVM_EXIT_DCR 15 #define KVM_EXIT_NMI 16 +#define KVM_EXIT_INTERNAL_ERROR 17 + +/* For KVM_EXIT_INTERNAL_ERROR */ +#define KVM_INTERNAL_ERROR_EMULATION 1 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { @@ -181,6 +185,9 @@ struct kvm_run { __u32 data; __u8 is_write; } dcr; + struct { + __u32 suberror; + } internal; /* Fix the size of the union. */ char padding[256]; }; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: introduce is_last_spte helper
From: Marcelo Tosatti mtosa...@redhat.com Hiding some of the last largepage / level interaction (which is useful for gbpages and for zero based levels). Also merge the PT_PAGE_TABLE_LEVEL clearing loop in unlink_children. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7e9b29d..0178e8b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -250,6 +250,15 @@ static int is_rmap_spte(u64 pte) return is_shadow_present_pte(pte); } +static int is_last_spte(u64 pte, int level) +{ + if (level == PT_PAGE_TABLE_LEVEL) + return 1; + if (level == PT_DIRECTORY_LEVEL is_large_pte(pte)) + return 1; + return 0; +} + static pfn_t spte_to_pfn(u64 pte) { return (pte PT64_BASE_ADDR_MASK) PAGE_SHIFT; @@ -1293,25 +1302,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, pt = sp-spt; - if (sp-role.level == PT_PAGE_TABLE_LEVEL) { - for (i = 0; i PT64_ENT_PER_PAGE; ++i) { - if (is_shadow_present_pte(pt[i])) - rmap_remove(kvm, pt[i]); - pt[i] = shadow_trap_nonpresent_pte; - } - return; - } - for (i = 0; i PT64_ENT_PER_PAGE; ++i) { ent = pt[i]; if (is_shadow_present_pte(ent)) { - if (!is_large_pte(ent)) { + if (!is_last_spte(ent, sp-role.level)) { ent = PT64_BASE_ADDR_MASK; mmu_page_remove_parent_pte(page_header(ent), pt[i]); } else { - --kvm-stat.lpages; + if (is_large_pte(ent)) + --kvm-stat.lpages; rmap_remove(kvm, pt[i]); } } @@ -2357,8 +2358,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, pte = *spte; if (is_shadow_present_pte(pte)) { - if (sp-role.level == PT_PAGE_TABLE_LEVEL || - is_large_pte(pte)) + if (is_last_spte(pte, sp-role.level)) rmap_remove(vcpu-kvm, spte); else { child = page_header(pte PT64_BASE_ADDR_MASK); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU audit: update audit_write_protection
From: Marcelo Tosatti mtosa...@redhat.com - Unsync pages contain writable sptes in the rmap. - rmaps do not exclusively contain writable sptes anymore. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a58f8d8..1679620 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3236,20 +3236,28 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) struct kvm_mmu_page *sp; struct kvm_memory_slot *slot; unsigned long *rmapp; + u64 *spte; gfn_t gfn; list_for_each_entry(sp, vcpu-kvm-arch.active_mmu_pages, link) { if (sp-role.direct) continue; + if (sp-unsync) + continue; gfn = unalias_gfn(vcpu-kvm, sp-gfn); slot = gfn_to_memslot_unaliased(vcpu-kvm, sp-gfn); rmapp = slot-rmap[gfn - slot-base_gfn]; - if (*rmapp) - printk(KERN_ERR %s: (%s) shadow page has writable - mappings: gfn %lx role %x\n, + + spte = rmap_next(vcpu-kvm, rmapp, NULL); + while (spte) { + if (*spte PT_WRITABLE_MASK) + printk(KERN_ERR %s: (%s) shadow page has + writable mappings: gfn %lx role %x\n, __func__, audit_msg, sp-gfn, sp-role.word); + spte = rmap_next(vcpu-kvm, rmapp, spte); + } } } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU audit: nontrapping ptes in nonleaf level
From: Marcelo Tosatti mtosa...@redhat.com It is valid to set non leaf sptes as notrap. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1679620..1c1b002 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3085,12 +3085,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, va = canonicalize(va); if (level 1) { - if (ent == shadow_notrap_nonpresent_pte) - printk(KERN_ERR audit: (%s) nontrapping pte - in nonleaf level: levels %d gva %lx - level %d pte %llx\n, audit_msg, - vcpu-arch.mmu.root_level, va, level, ent); - else + if (is_shadow_present_pte(ent)) audit_mappings_page(vcpu, ent, va, level - 1); } else { gpa_t gpa = vcpu-arch.mmu.gva_to_gpa(vcpu, va); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU audit: update count_writable_mappings / count_rmaps
From: Marcelo Tosatti mtosa...@redhat.com Under testing, count_writable_mappings returns a value that is 2 integers larger than what count_rmaps returns. Suspicion is that either of the two functions is counting a duplicate (either positively or negatively). Modifying check_writable_mappings_rmap to check for rmap existance on all present MMU pages fails to trigger an error, which should keep Avi happy. Also introduce mmu_spte_walk to invoke a callback on all present sptes visible to the current vcpu, might be useful in the future. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0178e8b..a58f8d8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3021,6 +3021,55 @@ static gva_t canonicalize(gva_t gva) return gva; } + +typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, +u64 *sptep); + +static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, + inspect_spte_fn fn) +{ + int i; + + for (i = 0; i PT64_ENT_PER_PAGE; ++i) { + u64 ent = sp-spt[i]; + + if (is_shadow_present_pte(ent)) { + if (sp-role.level 1 !is_large_pte(ent)) { + struct kvm_mmu_page *child; + child = page_header(ent PT64_BASE_ADDR_MASK); + __mmu_spte_walk(kvm, child, fn); + } + if (sp-role.level == 1) + fn(kvm, sp, sp-spt[i]); + } + } +} + +static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) +{ + int i; + struct kvm_mmu_page *sp; + + if (!VALID_PAGE(vcpu-arch.mmu.root_hpa)) + return; + if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + hpa_t root = vcpu-arch.mmu.root_hpa; + sp = page_header(root); + __mmu_spte_walk(vcpu-kvm, sp, fn); + return; + } + for (i = 0; i 4; ++i) { + hpa_t root = vcpu-arch.mmu.pae_root[i]; + + if (root VALID_PAGE(root)) { + root = PT64_BASE_ADDR_MASK; + sp = page_header(root); + __mmu_spte_walk(vcpu-kvm, sp, fn); + } + } + return; +} + static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, gva_t va, int level) { @@ -3113,9 +3162,47 @@ static int count_rmaps(struct kvm_vcpu *vcpu) return nmaps; } -static int count_writable_mappings(struct kvm_vcpu *vcpu) +void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) +{ + unsigned long *rmapp; + struct kvm_mmu_page *rev_sp; + gfn_t gfn; + + if (*sptep PT_WRITABLE_MASK) { + rev_sp = page_header(__pa(sptep)); + gfn = rev_sp-gfns[sptep - rev_sp-spt]; + + if (!gfn_to_memslot(kvm, gfn)) { + if (!printk_ratelimit()) + return; + printk(KERN_ERR %s: no memslot for gfn %ld\n, +audit_msg, gfn); + printk(KERN_ERR %s: index %ld of sp (gfn=%lx)\n, + audit_msg, sptep - rev_sp-spt, + rev_sp-gfn); + dump_stack(); + return; + } + + rmapp = gfn_to_rmap(kvm, rev_sp-gfns[sptep - rev_sp-spt], 0); + if (!*rmapp) { + if (!printk_ratelimit()) + return; + printk(KERN_ERR %s: no rmap for writable spte %llx\n, +audit_msg, *sptep); + dump_stack(); + } + } + +} + +void audit_writable_sptes_have_rmaps(struct kvm_vcpu *vcpu) +{ + mmu_spte_walk(vcpu, inspect_spte_has_rmap); +} + +static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) { - int nmaps = 0; struct kvm_mmu_page *sp; int i; @@ -3132,20 +3219,16 @@ static int count_writable_mappings(struct kvm_vcpu *vcpu) continue; if (!(ent PT_WRITABLE_MASK)) continue; - ++nmaps; + inspect_spte_has_rmap(vcpu-kvm, sp, pt[i]); } } - return nmaps; + return; } static void audit_rmap(struct kvm_vcpu *vcpu) { - int n_rmap = count_rmaps(vcpu); - int n_actual = count_writable_mappings(vcpu); - - if (n_rmap != n_actual) - printk(KERN_ERR %s: (%s) rmap %d actual %d\n, - __func__, audit_msg, n_rmap,
[COMMIT master] KVM: Replace pending exception by PF if it happens serially
From: Gleb Natapov g...@redhat.com Replace previous exception with a new one in a hope that instruction re-execution will regenerate lost exception. Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8dae08d..6025e5b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -181,16 +181,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, ++vcpu-stat.pf_guest; if (vcpu-arch.exception.pending) { - if (vcpu-arch.exception.nr == PF_VECTOR) { - printk(KERN_DEBUG kvm: inject_page_fault: -double fault 0x%lx\n, addr); - vcpu-arch.exception.nr = DF_VECTOR; - vcpu-arch.exception.error_code = 0; - } else if (vcpu-arch.exception.nr == DF_VECTOR) { + switch(vcpu-arch.exception.nr) { + case DF_VECTOR: /* triple fault - shutdown */ set_bit(KVM_REQ_TRIPLE_FAULT, vcpu-requests); + return; + case PF_VECTOR: + vcpu-arch.exception.nr = DF_VECTOR; + vcpu-arch.exception.error_code = 0; + return; + default: + /* replace previous exception with a new one in a hope + that instruction re-execution will regenerate lost + exception */ + vcpu-arch.exception.pending = false; + break; } - return; } vcpu-arch.cr2 = addr; kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Move performance counter MSR access interception to generic x86 path
From: Andre Przywara andre.przyw...@amd.com The performance counter MSRs are different for AMD and Intel CPUs and they are chosen mainly by the CPUID vendor string. This patch catches writes to all addresses (regardless of VMX/SVM path) and handles them in the generic MSR handler routine. Writing a 0 into the event select register is something we perfectly emulate ;-), so don't print out a warning to dmesg in this case. This fixes booting a 64bit Windows guest with an AMD CPUID on an Intel host. Signed-off-by: Andre Przywara andre.przyw...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 13f6f7d..cdbd4d1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2143,22 +2143,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) else svm_disable_lbrv(svm); break; - case MSR_K7_EVNTSEL0: - case MSR_K7_EVNTSEL1: - case MSR_K7_EVNTSEL2: - case MSR_K7_EVNTSEL3: - case MSR_K7_PERFCTR0: - case MSR_K7_PERFCTR1: - case MSR_K7_PERFCTR2: - case MSR_K7_PERFCTR3: - /* -* Just discard all writes to the performance counters; this -* should keep both older linux and windows 64-bit guests -* happy -*/ - pr_unimpl(vcpu, unimplemented perfctr wrmsr: 0x%x data 0x%llx\n, ecx, data); - - break; case MSR_VM_HSAVE_PA: svm-hsave_msr = data; break; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b949db0..dc502b9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1025,18 +1025,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) rdtscll(host_tsc); guest_write_tsc(data, host_tsc); break; - case MSR_P6_PERFCTR0: - case MSR_P6_PERFCTR1: - case MSR_P6_EVNTSEL0: - case MSR_P6_EVNTSEL1: - /* -* Just discard all writes to the performance counters; this -* should keep both older linux and windows 64-bit guests -* happy -*/ - pr_unimpl(vcpu, unimplemented perfctr wrmsr: 0x%x data 0x%llx\n, msr_index, data); - - break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl VM_ENTRY_LOAD_IA32_PAT) { vmcs_write64(GUEST_IA32_PAT, data); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1b91ea7..8dae08d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -849,6 +849,36 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: return set_msr_mce(vcpu, msr, data); + + /* Performance counters are not protected by a CPUID bit, +* so we should check all of them in the generic path for the sake of +* cross vendor migration. +* Writing a zero into the event select MSRs disables them, +* which we perfectly emulate ;-). Any other value should be at least +* reported, some guests depend on them. +*/ + case MSR_P6_EVNTSEL0: + case MSR_P6_EVNTSEL1: + case MSR_K7_EVNTSEL0: + case MSR_K7_EVNTSEL1: + case MSR_K7_EVNTSEL2: + case MSR_K7_EVNTSEL3: + if (data != 0) + pr_unimpl(vcpu, unimplemented perfctr wrmsr: + 0x%x data 0x%llx\n, msr, data); + break; + /* at least RHEL 4 unconditionally writes to the perfctr registers, +* so we ignore writes to make it happy. +*/ + case MSR_P6_PERFCTR0: + case MSR_P6_PERFCTR1: + case MSR_K7_PERFCTR0: + case MSR_K7_PERFCTR1: + case MSR_K7_PERFCTR2: + case MSR_K7_PERFCTR3: + pr_unimpl(vcpu, unimplemented perfctr wrmsr: + 0x%x data 0x%llx\n, msr, data); + break; default: pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n, msr, data); return 1; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: VMX: more MSR_IA32_VMX_EPT_VPID_CAP capability bits
From: Marcelo Tosatti mtosa...@redhat.com Required for EPT misconfiguration handler. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index e7927a6..272514c 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -352,9 +352,16 @@ enum vmcs_field { #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 #define VMX_EPT_EXTENT_CONTEXT 1 #define VMX_EPT_EXTENT_GLOBAL 2 + +#define VMX_EPT_EXECUTE_ONLY_BIT (1ull) +#define VMX_EPT_PAGE_WALK_4_BIT(1ull 6) +#define VMX_EPTP_UC_BIT(1ull 8) +#define VMX_EPTP_WB_BIT(1ull 14) +#define VMX_EPT_2MB_PAGE_BIT (1ull 16) #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull 24) #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull 26) + #define VMX_EPT_DEFAULT_GAW3 #define VMX_EPT_MAX_GAW0x4 #define VMX_EPT_MT_EPTE_SHIFT 3 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index dc502b9..6e202dd 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -270,6 +270,26 @@ static inline bool cpu_has_vmx_flexpriority(void) cpu_has_vmx_virtualize_apic_accesses(); } +static inline bool cpu_has_vmx_ept_execute_only(void) +{ + return !!(vmx_capability.ept VMX_EPT_EXECUTE_ONLY_BIT); +} + +static inline bool cpu_has_vmx_eptp_uncacheable(void) +{ + return !!(vmx_capability.ept VMX_EPTP_UC_BIT); +} + +static inline bool cpu_has_vmx_eptp_writeback(void) +{ + return !!(vmx_capability.ept VMX_EPTP_WB_BIT); +} + +static inline bool cpu_has_vmx_ept_2m_page(void) +{ + return !!(vmx_capability.ept VMX_EPT_2MB_PAGE_BIT); +} + static inline int cpu_has_vmx_invept_individual_addr(void) { return !!(vmx_capability.ept VMX_EPT_EXTENT_INDIVIDUAL_BIT); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: make for_each_shadow_entry aware of largepages
From: Marcelo Tosatti mtosa...@redhat.com This way there is no need to add explicit checks in every for_each_shadow_entry user. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1b47a69..d440f64 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1282,6 +1282,11 @@ static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) { if (iterator-level PT_PAGE_TABLE_LEVEL) return false; + + if (iterator-level == PT_PAGE_TABLE_LEVEL) + if (is_large_pte(*iterator-sptep)) + return false; + iterator-index = SHADOW_PT_INDEX(iterator-addr, iterator-level); iterator-sptep = ((u64 *)__va(iterator-shadow_addr)) + iterator-index; return true; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: add kvm_mmu_get_spte_hierarchy helper
From: Marcelo Tosatti mtosa...@redhat.com Required by EPT misconfiguration handler. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d440f64..edf3dea 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3014,6 +3014,24 @@ out: return r; } +int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) +{ + struct kvm_shadow_walk_iterator iterator; + int nr_sptes = 0; + + spin_lock(vcpu-kvm-mmu_lock); + for_each_shadow_entry(vcpu, addr, iterator) { + sptes[iterator.level-1] = *iterator.sptep; + nr_sptes++; + if (!is_shadow_present_pte(*iterator.sptep)) + break; + } + spin_unlock(vcpu-kvm-mmu_lock); + + return nr_sptes; +} +EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); + #ifdef AUDIT static const char *audit_msg; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 016bf71..61a1b38 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -37,6 +37,8 @@ #define PT32_ROOT_LEVEL 2 #define PT32E_ROOT_LEVEL 3 +int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); + static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) { if (unlikely(vcpu-kvm-arch.n_free_mmu_pages KVM_MIN_FREE_MMU_PAGES)) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: VMX: conditionally disable 2M pages
From: Marcelo Tosatti mtosa...@redhat.com Disable usage of 2M pages if VMX_EPT_2MB_PAGE_BIT (bit 16) is clear in MSR_IA32_VMX_EPT_VPID_CAP and EPT is enabled. [avi: s/largepages_disabled/largepages_enabled/ to avoid negative logic] Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1fe3785..6c0902e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1381,6 +1381,9 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_tpr_shadow()) kvm_x86_ops-update_cr8_intercept = NULL; + if (enable_ept !cpu_has_vmx_ept_2m_page()) + kvm_disable_largepages(); + return alloc_kvm_area(); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e4e78db..1b48092 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -219,6 +219,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, int user_alloc); +void kvm_disable_largepages(void); void kvm_arch_flush_shadow(struct kvm *kvm); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9fab08e..d604812 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -85,6 +85,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, static bool kvm_rebooting; +static bool largepages_enabled = true; + #ifdef KVM_CAP_DEVICE_ASSIGNMENT static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, int assigned_dev_id) @@ -1171,9 +1173,11 @@ int __kvm_set_memory_region(struct kvm *kvm, ugfn = new.userspace_addr PAGE_SHIFT; /* * If the gfn and userspace address are not aligned wrt each -* other, disable large page support for this slot +* other, or if explicitly asked to, disable large page +* support for this slot */ - if ((base_gfn ^ ugfn) (KVM_PAGES_PER_HPAGE - 1)) + if ((base_gfn ^ ugfn) (KVM_PAGES_PER_HPAGE - 1) || + !largepages_enabled) for (i = 0; i largepages; ++i) new.lpage_info[i].write_count = 1; } @@ -1286,6 +1290,12 @@ out: return r; } +void kvm_disable_largepages(void) +{ + largepages_enabled = false; +} +EXPORT_SYMBOL_GPL(kvm_disable_largepages); + int is_error_page(struct page *page) { return page == bad_page; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU audit: largepage handling
From: Marcelo Tosatti mtosa...@redhat.com Make the audit code aware of largepages. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1ed7362..1b47a69 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3034,12 +3034,11 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, u64 ent = sp-spt[i]; if (is_shadow_present_pte(ent)) { - if (sp-role.level 1 !is_large_pte(ent)) { + if (!is_last_spte(ent, sp-role.level)) { struct kvm_mmu_page *child; child = page_header(ent PT64_BASE_ADDR_MASK); __mmu_spte_walk(kvm, child, fn); - } - if (sp-role.level == 1) + } else fn(kvm, sp, sp-spt[i]); } } @@ -3084,10 +3083,9 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, continue; va = canonicalize(va); - if (level 1) { - if (is_shadow_present_pte(ent)) - audit_mappings_page(vcpu, ent, va, level - 1); - } else { + if (is_shadow_present_pte(ent) !is_last_spte(ent, level)) + audit_mappings_page(vcpu, ent, va, level - 1); + else { gpa_t gpa = vcpu-arch.mmu.gva_to_gpa(vcpu, va); gfn_t gfn = gpa PAGE_SHIFT; pfn_t pfn = gfn_to_pfn(vcpu-kvm, gfn); @@ -3184,7 +3182,8 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) return; } - rmapp = gfn_to_rmap(kvm, rev_sp-gfns[sptep - rev_sp-spt], 0); + rmapp = gfn_to_rmap(kvm, rev_sp-gfns[sptep - rev_sp-spt], + is_large_pte(*sptep)); if (!*rmapp) { if (!printk_ratelimit()) return; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Fix dirty bit tracking for slots with large pages
From: Izik Eidus iei...@redhat.com When slot is already allocated and being asked to be tracked we need to break the large pages. This code flush the mmu when someone ask a slot to start dirty bit tracking. Cc: sta...@kernel.org Signed-off-by: Izik Eidus iei...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d604812..8939ffa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1190,6 +1190,8 @@ int __kvm_set_memory_region(struct kvm *kvm, if (!new.dirty_bitmap) goto out_free; memset(new.dirty_bitmap, 0, dirty_bytes); + if (old.npages) + kvm_arch_flush_shadow(kvm); } #endif /* not defined CONFIG_S390 */ -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Drop useless atomic test from timer function
From: Jan Kiszka jan.kis...@siemens.com The current code tries to optimize the setting of KVM_REQ_PENDING_TIMER but used atomic_inc_and_test - which always returns true unless pending had the invalid value of -1 on entry. This patch drops the test part preserving the original semantic but expressing it less confusingly. Signed-off-by: Jan Kiszka jan.kis...@siemens.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index 1baed41..eea4043 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c @@ -15,9 +15,9 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) * case anyway. */ if (ktimer-reinject || !atomic_read(ktimer-pending)) { + atomic_inc(ktimer-pending); /* FIXME: this code should not know anything about vcpus */ - if (!atomic_inc_and_test(ktimer-pending)) - set_bit(KVM_REQ_PENDING_TIMER, vcpu-requests); + set_bit(KVM_REQ_PENDING_TIMER, vcpu-requests); } if (waitqueue_active(q)) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Fix racy event propagation in timer
From: Jan Kiszka jan.kis...@siemens.com Minor issue that likely had no practical relevance: the kvm timer function so far incremented the pending counter and then may reset it again to 1 in case reinjection was disabled. This opened a small racy window with the corresponding VCPU loop that may have happened to run on another (real) CPU and already consumed the value. Fix it by skipping the incrementation in case pending is already 0. This opens a different race windows, but may only rarely cause lost events in case we do not care about them anyway (!reinject). Signed-off-by: Jan Kiszka jan.kis...@siemens.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index 85cc743..1baed41 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c @@ -9,12 +9,16 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) int restart_timer = 0; wait_queue_head_t *q = vcpu-wq; - /* FIXME: this code should not know anything about vcpus */ - if (!atomic_inc_and_test(ktimer-pending)) - set_bit(KVM_REQ_PENDING_TIMER, vcpu-requests); - - if (!ktimer-reinject) - atomic_set(ktimer-pending, 1); + /* +* There is a race window between reading and incrementing, but we do +* not care about potentially loosing timer events in the !reinject +* case anyway. +*/ + if (ktimer-reinject || !atomic_read(ktimer-pending)) { + /* FIXME: this code should not know anything about vcpus */ + if (!atomic_inc_and_test(ktimer-pending)) + set_bit(KVM_REQ_PENDING_TIMER, vcpu-requests); + } if (waitqueue_active(q)) wake_up_interruptible(q); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: x86: Ignore reads to K7 EVNTSEL MSRs
In commit 7fe29e0faacb650d31b9e9f538203a157bec821d we ignored the reads to the P6 EVNTSEL MSRs. That fixed crashes on Intel machines. Ignore the reads to K7 EVNTSEL MSRs as well to fix this on AMD hosts. This fixes Kaspersky antivirus crashing Windows guests on AMD hosts. Signed-off-by: Amit Shah amit.s...@redhat.com --- arch/x86/kvm/x86.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1b91ea7..c5b44c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -957,6 +957,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_VM_HSAVE_PA: case MSR_P6_EVNTSEL0: case MSR_P6_EVNTSEL1: + case MSR_K7_EVNTSEL0: data = 0; break; case MSR_MTRRcap: -- 1.6.2.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Sun, 2009-06-14 at 12:34 +0300, Michael S. Tsirkin wrote: On Fri, Jun 12, 2009 at 04:53:27PM +0100, Mark McLoughlin wrote: The other obvious piece to add to it would be PCI addresses, so that even if you remove a device, the addresses assigned to existing devices don't change. Could you clarify this requirement please? Avi clarified, but I've written it up here too: https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses Cheers, Mark. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Sun, 2009-06-14 at 12:50 +0300, Michael S. Tsirkin wrote: On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote: However, in order to retain compat for that SCSI device (e.g. ensuring the PCI address doesn't change as other devices are added an removed), we're back to the same problem ... either: 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure out what address to use, libvirt would need to query qemu for what address was originally allocated to device or it would do all the PCI address allocation itself ... This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yep, most people seem to agree that it makes sense to allow this, but some believe it should only be via a machine description file, not the command line. However, the first problem is that it isn't a solution to the guest ABI problem more generally. And the second problem is that for e.g. libvirt to use it, it would have to be possible to query qemu for what PCI slots were assigned to the devices - libvirt would need to be able to parse 'info pci' and match the devices listed with the devices specified on the command line. Again, details written up here: https://fedoraproject.org/wiki/Features/KVM_Stable_PCI_Addresses Cheers, Mark. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 12:08 PM, Mark McLoughlin wrote: This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yep, most people seem to agree that it makes sense to allow this, but some believe it should only be via a machine description file, not the command line. I don't understand this opposition. It's clear a machine config file is a long way in our future. It's also clear lack of stable PCI addresses hurts us now. However, the first problem is that it isn't a solution to the guest ABI problem more generally. pci_addr was never meant to bring world peace, just stable PCI addresses. The other issues should be addressed separately. And the second problem is that for e.g. libvirt to use it, it would have to be possible to query qemu for what PCI slots were assigned to the devices - libvirt would need to be able to parse 'info pci' and match the devices listed with the devices specified on the command line. If all devices (including vga, ide) are set up with pci_addr, then this is unneeded. You do need to export available slot numbers from qemu. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/14/2009 12:47 PM, Michael S. Tsirkin wrote: Michael S. Tsirkin wrote: If we want to remove a device from under a running guest, you need hotplug. So we can't just remove several lines from the config and hope that it'll work simply because the PCI address is stable. Why not? E.g. configuration cycles address a specific bus/slot. You need cooperation from guest if you want to move a device. By remove several lines from the config I understood the guest needs to be restarted. Of course if you don't restart the guest you need true hotplug. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote: On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote: However, in order to retain compat for that SCSI device (e.g. ensuring the PCI address doesn't change as other devices are added an removed), we're back to the same problem ... either: 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure out what address to use, libvirt would need to query qemu for what address was originally allocated to device or it would do all the PCI address allocation itself ... This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yes, the user build the machine using the command line and monitor (or, in 2017, the machine configuration file), then turns on the power. Command line options are the parts lying around when we start. btw, -drive needs to be separated: -controller type=lsi1234,pci_addr=foobar,name=blah -drive file=foo.img,controller=blah,index=0 -drive file=bar.img,controller=blah,index=1 Drives to not have pci addresses. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM PATCH v2 2/2] kvm: use POLLHUP to close an irqfd instead of an explicit ioctl
On Sun, Jun 14, 2009 at 11:39:21PM -0400, Gregory Haskins wrote: Michael S. Tsirkin wrote: On Sun, Jun 14, 2009 at 08:53:11AM -0400, Gregory Haskins wrote: Michael S. Tsirkin wrote: On Thu, Jun 04, 2009 at 08:48:12AM -0400, Gregory Haskins wrote: +static void +irqfd_disconnect(struct _irqfd *irqfd) +{ +struct kvm *kvm; + +mutex_lock(irqfd-lock); + +kvm = rcu_dereference(irqfd-kvm); +rcu_assign_pointer(irqfd-kvm, NULL); + +mutex_unlock(irqfd-lock); + +if (!kvm) +return; mutex_lock(kvm-lock); -kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1); -kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0); +list_del(irqfd-list); mutex_unlock(kvm-lock); + +/* + * It is important to not drop the kvm reference until the next grace + * period because there might be lockless references in flight up + * until then + */ +synchronize_srcu(irqfd-srcu); +kvm_put_kvm(kvm); } So irqfd object will persist after kvm goes away, until eventfd is closed? Yep, by design. It becomes part of the eventfd and is thus associated with its lifetime. Consider it as if we made our own anon-fd implementation for irqfd and the lifetime looks similar. The difference is that we are reusing eventfd and its interface semantics. static int irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); +unsigned long flags = (unsigned long)key; -/* - * The wake_up is called with interrupts disabled. Therefore we need - * to defer the IRQ injection until later since we need to acquire the - * kvm-lock to do so. - */ -schedule_work(irqfd-work); +if (flags POLLIN) +/* + * The POLLIN wake_up is called with interrupts disabled. + * Therefore we need to defer the IRQ injection until later + * since we need to acquire the kvm-lock to do so. + */ +schedule_work(irqfd-inject); + +if (flags POLLHUP) { +/* + * The POLLHUP is called unlocked, so it theoretically should + * be safe to remove ourselves from the wqh using the locked + * variant of remove_wait_queue() + */ +remove_wait_queue(irqfd-wqh, irqfd-wait); +flush_work(irqfd-inject); +irqfd_disconnect(irqfd); + +cleanup_srcu_struct(irqfd-srcu); +kfree(irqfd); +} return 0; } And it is removed by this function when eventfd is closed. But what prevents the kvm module from going away, meanwhile? Well, we hold a reference to struct kvm until we call irqfd_disconnect(). If kvm closes first, we disconnect and disassociate all references to kvm leaving irqfd-kvm = NULL. Likewise, if irqfd closes first, we disassociate with kvm with the above quoted logic. In either case, we are holding a kvm reference up until that disconnect point. Therefore kvm should not be able to disappear before that disconnect, and after that point we do not care. Yes, we do care. Here's the scenario in more detail: - kvm is closed - irq disconnect is called - kvm is put - kvm module is removed: all irqs are disconnected - eventfd closes and triggers callback into removed kvm module - crash [ lightbulb turns on] Ah, now I see the point you were making. I thought you were talking about the .text in kvm_set_irq() (which would be protected by my kvm_get_kvm() reference afaict). But you are actually talking about the irqfd .text itself. Indeed, you are correct that is this currently a race. Good catch! If that is not sufficient to prevent kvm.ko from going away in the middle, then IMO kvm_get_kvm() has a bug, not irqfd. ;) However, I believe everything is actually ok here. -Greg BTW, why can't we remove irqfds in kvm_release? Well, this would be ideal but we run into that bi-directional reference thing that we talked about earlier and we both agree is non-trivial to solve. Solving this locking problem would incidentally also pave the way for restoring the DEASSIGN feature, so patches welcome! So far the only workable approach that I see is reverting the POLLHUP patch. I agree it looks pretty, but DEASSIGN and closing the races is more important IMO. And locking will definitely become much simpler. In the meantime, I think we can close the
Re: [PATCH 1/6] env-kvm_cpu_state.init is always zero here.
On 06/14/2009 01:52 PM, Gleb Natapov wrote: Signed-off-by: Gleb Natapovg...@redhat.com --- qemu-kvm.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 2aeb17c..ec911ef 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -412,7 +412,7 @@ static int kvm_main_loop_cpu(CPUState *env) if (env-kvm_cpu_state.sipi_needed) update_regs_for_sipi(env); } - if (!env-halted !env-kvm_cpu_state.init) + if (!env-halted) kvm_cpu_exec(env); env-exit_request = 0; env-exception_index = EXCP_INTERRUPT; Are you sure? Can't a reset reenable it? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/6] env-kvm_cpu_state.init is always zero here.
On Mon, Jun 15, 2009 at 12:55:27PM +0300, Avi Kivity wrote: On 06/14/2009 01:52 PM, Gleb Natapov wrote: Signed-off-by: Gleb Natapovg...@redhat.com --- qemu-kvm.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 2aeb17c..ec911ef 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -412,7 +412,7 @@ static int kvm_main_loop_cpu(CPUState *env) if (env-kvm_cpu_state.sipi_needed) update_regs_for_sipi(env); } -if (!env-halted !env-kvm_cpu_state.init) +if (!env-halted) kvm_cpu_exec(env); env-exit_request = 0; env-exception_index = EXCP_INTERRUPT; Are you sure? Can't a reset reenable it? The thing is used only with userspace irq chip. If env-kvm_cpu_state.init == 1 update_regs_for_init() is called three line above and it will set kvm_cpu_state.init to zero. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] Handle vcpu init/sipi by calling a function on vcpu
On 06/14/2009 01:52 PM, Gleb Natapov wrote: Instead of having special case in vcpu event loop. I'm a little worried about two vcpus INITing each other simultaneously and deadlocking. INIT/SIPI are async events, the initiator should not wait for them. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/6] env-kvm_cpu_state.init is always zero here.
On 06/15/2009 12:58 PM, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 12:55:27PM +0300, Avi Kivity wrote: On 06/14/2009 01:52 PM, Gleb Natapov wrote: Signed-off-by: Gleb Natapovg...@redhat.com --- qemu-kvm.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 2aeb17c..ec911ef 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -412,7 +412,7 @@ static int kvm_main_loop_cpu(CPUState *env) if (env-kvm_cpu_state.sipi_needed) update_regs_for_sipi(env); } - if (!env-halted !env-kvm_cpu_state.init) + if (!env-halted) kvm_cpu_exec(env); env-exit_request = 0; env-exception_index = EXCP_INTERRUPT; Are you sure? Can't a reset reenable it? The thing is used only with userspace irq chip. If env-kvm_cpu_state.init == 1 update_regs_for_init() is called three line above and it will set kvm_cpu_state.init to zero. Right, and nothing can sleep in between. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Sun, 2009-06-14 at 10:58 +0300, Avi Kivity wrote: Mark McLoughlin wrote: I think the point is that you don't need version numbers if you have a proper device tree. How do you add a new attribute to the device tree and, when a supplied device tree lacking said attribute, distinguish between a device tree from an old version of qemu (i.e. use the old default) and a partial device tree from the VM manager (i.e. use the new default) ? -baseline 0.10 That's a version number :-) (I was responding to Anthony's you don't need a version number) Cheers, Mark. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] Handle vcpu init/sipi by calling a function on vcpu
On Mon, Jun 15, 2009 at 01:03:39PM +0300, Avi Kivity wrote: On 06/14/2009 01:52 PM, Gleb Natapov wrote: Instead of having special case in vcpu event loop. I'm a little worried about two vcpus INITing each other simultaneously and deadlocking. INIT/SIPI are async events, the initiator should not wait for them. I thought to add on_vcpu_async() for that (if this case is worth warring about). -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] Handle vcpu init/sipi by calling a function on vcpu
On 06/15/2009 01:11 PM, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 01:03:39PM +0300, Avi Kivity wrote: On 06/14/2009 01:52 PM, Gleb Natapov wrote: Instead of having special case in vcpu event loop. I'm a little worried about two vcpus INITing each other simultaneously and deadlocking. INIT/SIPI are async events, the initiator should not wait for them. I thought to add on_vcpu_async() for that (if this case is worth warring about). A generic on_vcpu_async() would need to allocate, that might be expoitable. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] Handle vcpu init/sipi by calling a function on vcpu
On Mon, Jun 15, 2009 at 01:14:21PM +0300, Avi Kivity wrote: On 06/15/2009 01:11 PM, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 01:03:39PM +0300, Avi Kivity wrote: On 06/14/2009 01:52 PM, Gleb Natapov wrote: Instead of having special case in vcpu event loop. I'm a little worried about two vcpus INITing each other simultaneously and deadlocking. INIT/SIPI are async events, the initiator should not wait for them. I thought to add on_vcpu_async() for that (if this case is worth warring about). A generic on_vcpu_async() would need to allocate, that might be expoitable. Then what about processing events while waiting in on_vcpu()? -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] Handle vcpu init/sipi by calling a function on vcpu
On 06/15/2009 01:16 PM, Gleb Natapov wrote: A generic on_vcpu_async() would need to allocate, that might be expoitable. Then what about processing events while waiting in on_vcpu()? Could work, but prefer a simpler solution. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Mon, Jun 15, 2009 at 12:43:48PM +0300, Avi Kivity wrote: On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote: On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote: However, in order to retain compat for that SCSI device (e.g. ensuring the PCI address doesn't change as other devices are added an removed), we're back to the same problem ... either: 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure out what address to use, libvirt would need to query qemu for what address was originally allocated to device or it would do all the PCI address allocation itself ... This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yes, the user build the machine using the command line and monitor (or, in 2017, the machine configuration file), then turns on the power. Command line options are the parts lying around when we start. btw, -drive needs to be separated: -controller type=lsi1234,pci_addr=foobar,name=blah -drive file=foo.img,controller=blah,index=0 -drive file=bar.img,controller=blah,index=1 Drives to not have pci addresses. Maybe we need a generic 'bus options' flag. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Mon, Jun 15, 2009 at 12:27:08PM +0300, Avi Kivity wrote: On 06/15/2009 12:08 PM, Mark McLoughlin wrote: This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yep, most people seem to agree that it makes sense to allow this, but some believe it should only be via a machine description file, not the command line. I don't understand this opposition. It's clear a machine config file is a long way in our future. It's also clear lack of stable PCI addresses hurts us now. However, the first problem is that it isn't a solution to the guest ABI problem more generally. pci_addr was never meant to bring world peace, just stable PCI addresses. The other issues should be addressed separately. And the second problem is that for e.g. libvirt to use it, it would have to be possible to query qemu for what PCI slots were assigned to the devices - libvirt would need to be able to parse 'info pci' and match the devices listed with the devices specified on the command line. If all devices (including vga, ide) are set up with pci_addr, then this is unneeded. Right. I think it could be an all or nothing at all approach. You do need to export available slot numbers from qemu. Why would a slot be unavailable? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote: You do need to export available slot numbers from qemu. Why would a slot be unavailable? Because it does not exist? -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote: You do need to export available slot numbers from qemu. Why would a slot be unavailable? Because it does not exist? We can create a slot with any number, can't we? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote: On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote: You do need to export available slot numbers from qemu. Why would a slot be unavailable? Because it does not exist? We can create a slot with any number, can't we? What do you mean? If the mobo has 4 slots you can't create fifth. KVM describes 32 slots in the BIOS. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote: On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote: You do need to export available slot numbers from qemu. Why would a slot be unavailable? Because it does not exist? We can create a slot with any number, can't we? What do you mean? If the mobo has 4 slots you can't create fifth. KVM describes 32 slots in the BIOS. Do you mean the KVM kernel module here? I don't know much about the BIOS. Can't qemu control the number of slots declared? -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote: On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote: On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote: You do need to export available slot numbers from qemu. Why would a slot be unavailable? Because it does not exist? We can create a slot with any number, can't we? What do you mean? If the mobo has 4 slots you can't create fifth. KVM describes 32 slots in the BIOS. Do you mean the KVM kernel module here? I don't know much about the No I don't mean KVM kernel module here. BIOS. Can't qemu control the number of slots declared? Qemu represents HW, BIOS drives this HW. They should be in sync on such important issues like pci slot configuration. Even if QEMU can control the number of slots declared (which it can't easily do), it will be able to do it only on startup (before BIOS runs). The way to have dynamic number of slots may be pci bridge emulation. Not sure what is needed from BIOS for that. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote: You do need to export available slot numbers from qemu. Why would a slot be unavailable? A slot needs to be configured in ACPI, and not be taken by onboard chips (piix takes slot 0, for example). -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Activate Virtualization On Demand v2
X86 CPUs need to have some magic happening to enable the virtualization extensions on them. This magic can result in unpleasant results for users, like blocking other VMMs from working (vmx) or using invalid TLB entries (svm). Currently KVM activates virtualization when the respective kernel module is loaded. This blocks us from autoloading KVM modules without breaking other VMMs. To circumvent this problem at least a bit, this patch introduces on demand activation of virtualization. This means, that instead virtualization is enabled on creation of the first virtual machine and disabled on destruction of the last one. So using this, KVM can be easily autoloaded, while keeping other hypervisors usable. --- v2 uses kvm_lock and traces failures atomically Signed-off-by: Alexander Graf ag...@suse.de --- arch/ia64/kvm/kvm-ia64.c|8 ++- arch/powerpc/kvm/powerpc.c |2 +- arch/s390/kvm/kvm-s390.c|2 +- arch/x86/include/asm/kvm_host.h |2 +- arch/x86/kvm/svm.c | 13 -- arch/x86/kvm/vmx.c |7 +++- arch/x86/kvm/x86.c |4 +- include/linux/kvm_host.h|2 +- virt/kvm/kvm_main.c | 82 +-- 9 files changed, 96 insertions(+), 26 deletions(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 906d597..3141a92 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { long status; long tmp_base; @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage) slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); local_irq_restore(saved_psr); if (slot 0) - return; + return -EINVAL; spin_lock(vp_lock); status = ia64_pal_vp_init_env(kvm_vsa_base ? @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage) __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, tmp_base); if (status != 0) { printk(KERN_WARNINGkvm: Failed to Enable VT Support\n); - return ; + return -EINVAL; } if (!kvm_vsa_base) { @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage) } spin_unlock(vp_lock); ia64_ptr_entry(0x3, slot); + + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9057335..6558ab7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -80,7 +80,8 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { + return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index cbfe91e..a14e676 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -70,7 +70,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { /* Section: not file related */ -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { /* every s390 is virtualization enabled ;-) */ + return 0; } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4627627..72d5075 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -463,7 +463,7 @@ struct descriptor_table { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - void (*hardware_enable)(void *dummy); /* __init */ + int (*hardware_enable)(void *dummy); void (*hardware_disable)(void *dummy); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 04ee964..47a8b94 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -245,7 +245,7 @@ static void svm_hardware_disable(void *garbage) cpu_svm_disable(); } -static void svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void *garbage) { struct svm_cpu_data *svm_data; @@ -254,16 +254,20 @@ static void svm_hardware_enable(void *garbage) struct desc_struct *gdt; int me = raw_smp_processor_id(); + rdmsrl(MSR_EFER, efer); + if (efer EFER_SVME) + return -EBUSY; + if (!has_svm()) { printk(KERN_ERR svm_cpu_init: err EOPNOTSUPP on %d\n, me); - return; + return -EINVAL; } svm_data = per_cpu(svm_data, me); if (!svm_data) { printk(KERN_ERR svm_cpu_init: svm_data
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 12:09 PM, Mark McLoughlin wrote: I think the point is that you don't need version numbers if you have a proper device tree. How do you add a new attribute to the device tree and, when a supplied device tree lacking said attribute, distinguish between a device tree from an old version of qemu (i.e. use the old default) and a partial device tree from the VM manager (i.e. use the new default) ? -baseline 0.10 That's a version number :-) (I was responding to Anthony's you don't need a version number) If you want to prevent incompatibilities, you need to make everything new (potentially including bugfixes) non-default. Eventually the default configuration becomes increasingly unusable and you need a new baseline. You must still be able to fall back to the old baseline for older guests. I don't think games with configuration files can hide that. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Mon, Jun 15, 2009 at 02:14:15PM +0300, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 02:07:53PM +0300, Michael S. Tsirkin wrote: On Mon, Jun 15, 2009 at 01:52:13PM +0300, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 01:46:53PM +0300, Michael S. Tsirkin wrote: On Mon, Jun 15, 2009 at 01:44:56PM +0300, Gleb Natapov wrote: On Mon, Jun 15, 2009 at 01:32:49PM +0300, Michael S. Tsirkin wrote: You do need to export available slot numbers from qemu. Why would a slot be unavailable? Because it does not exist? We can create a slot with any number, can't we? What do you mean? If the mobo has 4 slots you can't create fifth. KVM describes 32 slots in the BIOS. Do you mean the KVM kernel module here? I don't know much about the No I don't mean KVM kernel module here. BIOS. Can't qemu control the number of slots declared? Qemu represents HW, BIOS drives this HW. They should be in sync on such important issues like pci slot configuration. As a simple solution, let's stick to 32 slots per bus. That's the maximum that the PCI spec allows, anyway. Even if QEMU can control the number of slots declared (which it can't easily do), it will be able to do it only on startup (before BIOS runs). That's OK - this is when the machine description is read. The way to have dynamic number of slots may be pci bridge emulation. Not sure what is needed from BIOS for that. Since bridge can be hot-plugged, probably nothing? But we don't necessarily need dynamic number of slots IMO. -- Gleb. -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints
Avi Kivity a...@redhat.com writes: On 06/15/2009 12:08 PM, Mark McLoughlin wrote: This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yep, most people seem to agree that it makes sense to allow this, but some believe it should only be via a machine description file, not the command line. I don't understand this opposition. It's clear a machine config file is a long way in our future. It's also clear lack of stable PCI addresses hurts us now. Correct. However, the first problem is that it isn't a solution to the guest ABI problem more generally. pci_addr was never meant to bring world peace, just stable PCI addresses. The other issues should be addressed separately. And the second problem is that for e.g. libvirt to use it, it would have to be possible to query qemu for what PCI slots were assigned to the devices - libvirt would need to be able to parse 'info pci' and match the devices listed with the devices specified on the command line. If all devices (including vga, ide) are set up with pci_addr, then this is unneeded. You do need to export available slot numbers from qemu. Not really. QEMU gives just the host bridge a fixed slot[*]. All the other slots are available. The real problem is devices that get implicitly added, like the SCSI controller. Those devices get their slots auto-assigned, which can interfere with slot numbers chosen by the user. We need a way to avoid that, as you suggested elsewhere in this thread. [*] There's an exception or two for oddball targets. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/4] Implement Hyper-V MSRs v2
On 19.05.2009, at 14:52, Avi Kivity wrote: Alexander Graf wrote: Hyper-V uses some MSRs, some of which are actually reserved for BIOS usage. But let's be nice today and have it its way, because otherwise it fails terribly. v2 changes: - remove the 0x4081 MSR definition - add pr_unimpl() on unimplemented writes Signed-off-by: Alexander Graf ag...@suse.de --- arch/x86/kvm/svm.c |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ef43a18..4b4eadd 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2034,6 +2034,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) case MSR_VM_HSAVE_PA: svm-hsave_msr = data; break; + case MSR_VM_CR: + case MSR_VM_IGNNE: + case MSR_K8_HWCR: + pr_unimpl(vcpu, unimplemented wrmsr: 0x%x data 0x%llx\n, ecx, data); + break; We can be nicer, if the write doesn't set bits which we don't implement, we can let it proceed silently. See for example MSR_IA32_DEBUGCTLMSR. Most likely the values written are already correctly implemented (by doing nothing). Actually we implement very little of the bits. See http://support.amd.com/us/Processor_TechDocs/31116-Public-GH-BKDG_3-28_5-28-09.pdf for what we're missing ;-). So it might make sense to always warn for now, see what OSs use and only filter out those bits they actually try to access (and maybe implement them). Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints
(adding cc) On 06/15/2009 02:35 PM, Markus Armbruster wrote: Not really. QEMU gives just the host bridge a fixed slot[*]. All the other slots are available. qemu needs to export these two bits of information: the first free slot and the number of slots. More generally, which slots are open. We can assume 1:31, but that's unlovely. The real problem is devices that get implicitly added, like the SCSI controller. Those devices get their slots auto-assigned, which can interfere with slot numbers chosen by the user. We need a way to avoid that, as you suggested elsewhere in this thread. Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=, and -drive pci_addr= (and later, -disk-controller)? Stalling while waiting for the ultimate config file is only generating pain and out-of-tree patches. (I'd be quite happy constructing the entire machine config on the command line, but I realize it's just me) -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] Nested SVM: Improve interrupt injection v2
On 19.05.2009, at 15:22, Gleb Natapov wrote: On Tue, May 19, 2009 at 12:54:03PM +0200, Alexander Graf wrote: While trying to get Hyper-V running, I realized that the interrupt injection mechanisms that are in place right now are not 100% correct. This patch makes nested SVM's interrupt injection behave more like on a real machine. v2 calls BUG_ON when svm_set_irq is called with GIF=0 Signed-off-by: Alexander Graf ag...@suse.de --- arch/x86/kvm/svm.c | 39 --- 1 files changed, 24 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fa2a710..5b14c9d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1517,7 +1517,8 @@ static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1, /* Kill any pending exceptions */ if (svm-vcpu.arch.exception.pending == true) nsvm_printk(WARNING: Pending Exception\n); - svm-vcpu.arch.exception.pending = false; + kvm_clear_exception_queue(svm-vcpu); + kvm_clear_interrupt_queue(svm-vcpu); What about pending NMI here? NMI injected to the guest? That should have triggered by now and caused an #NMI exit, no? Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Mon, Jun 15, 2009 at 02:27:14PM +0300, Avi Kivity wrote: On 06/15/2009 01:32 PM, Michael S. Tsirkin wrote: You do need to export available slot numbers from qemu. Why would a slot be unavailable? A slot needs to be configured in ACPI, Can we configure all possible 32 slots? and not be taken by onboard chips (piix takes slot 0, for example). piix is the root complex, isn't it? Are there other examples? If not, we could teach management about the root complex being special ... -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] Nested SVM: Improve interrupt injection v2
On Mon, Jun 15, 2009 at 01:47:08PM +0200, Alexander Graf wrote: On 19.05.2009, at 15:22, Gleb Natapov wrote: On Tue, May 19, 2009 at 12:54:03PM +0200, Alexander Graf wrote: While trying to get Hyper-V running, I realized that the interrupt injection mechanisms that are in place right now are not 100% correct. This patch makes nested SVM's interrupt injection behave more like on a real machine. v2 calls BUG_ON when svm_set_irq is called with GIF=0 Signed-off-by: Alexander Graf ag...@suse.de --- arch/x86/kvm/svm.c | 39 --- 1 files changed, 24 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fa2a710..5b14c9d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1517,7 +1517,8 @@ static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1, /* Kill any pending exceptions */ if (svm-vcpu.arch.exception.pending == true) nsvm_printk(WARNING: Pending Exception\n); - svm-vcpu.arch.exception.pending = false; + kvm_clear_exception_queue(svm-vcpu); + kvm_clear_interrupt_queue(svm-vcpu); What about pending NMI here? NMI injected to the guest? That should have triggered by now and caused an #NMI exit, no? I don't really understand what this code is doing, but there are three types of events exception/interrupt/nmi you clear only two of them. If you are sure this is correct then OK. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote: A slot needs to be configured in ACPI, Can we configure all possible 32 slots? That's what we do. But one is always taken. In the future, perhaps more. and not be taken by onboard chips (piix takes slot 0, for example). piix is the root complex, isn't it? Are there other examples? If not, we could teach management about the root complex being special ... We should just tell the user which slots are open. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Re: Configuration vs. compat hints
Avi Kivity wrote: (I'd be quite happy constructing the entire machine config on the command line, but I realize it's just me) It is not just you. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM PATCH v2 2/2] kvm: use POLLHUP to close an irqfd instead of an explicit ioctl
Michael S. Tsirkin wrote: On Sun, Jun 14, 2009 at 11:39:21PM -0400, Gregory Haskins wrote: Michael S. Tsirkin wrote: On Sun, Jun 14, 2009 at 08:53:11AM -0400, Gregory Haskins wrote: Michael S. Tsirkin wrote: On Thu, Jun 04, 2009 at 08:48:12AM -0400, Gregory Haskins wrote: +static void +irqfd_disconnect(struct _irqfd *irqfd) +{ +struct kvm *kvm; + +mutex_lock(irqfd-lock); + +kvm = rcu_dereference(irqfd-kvm); +rcu_assign_pointer(irqfd-kvm, NULL); + +mutex_unlock(irqfd-lock); + +if (!kvm) +return; mutex_lock(kvm-lock); -kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1); -kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0); +list_del(irqfd-list); mutex_unlock(kvm-lock); + +/* + * It is important to not drop the kvm reference until the next grace + * period because there might be lockless references in flight up + * until then + */ +synchronize_srcu(irqfd-srcu); +kvm_put_kvm(kvm); } So irqfd object will persist after kvm goes away, until eventfd is closed? Yep, by design. It becomes part of the eventfd and is thus associated with its lifetime. Consider it as if we made our own anon-fd implementation for irqfd and the lifetime looks similar. The difference is that we are reusing eventfd and its interface semantics. static int irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); +unsigned long flags = (unsigned long)key; -/* - * The wake_up is called with interrupts disabled. Therefore we need - * to defer the IRQ injection until later since we need to acquire the - * kvm-lock to do so. - */ -schedule_work(irqfd-work); +if (flags POLLIN) +/* + * The POLLIN wake_up is called with interrupts disabled. + * Therefore we need to defer the IRQ injection until later + * since we need to acquire the kvm-lock to do so. + */ +schedule_work(irqfd-inject); + +if (flags POLLHUP) { +/* + * The POLLHUP is called unlocked, so it theoretically should + * be safe to remove ourselves from the wqh using the locked + * variant of remove_wait_queue() + */ +remove_wait_queue(irqfd-wqh, irqfd-wait); +flush_work(irqfd-inject); +irqfd_disconnect(irqfd); + +cleanup_srcu_struct(irqfd-srcu); +kfree(irqfd); +} return 0; } And it is removed by this function when eventfd is closed. But what prevents the kvm module from going away, meanwhile? Well, we hold a reference to struct kvm until we call irqfd_disconnect(). If kvm closes first, we disconnect and disassociate all references to kvm leaving irqfd-kvm = NULL. Likewise, if irqfd closes first, we disassociate with kvm with the above quoted logic. In either case, we are holding a kvm reference up until that disconnect point. Therefore kvm should not be able to disappear before that disconnect, and after that point we do not care. Yes, we do care. Here's the scenario in more detail: - kvm is closed - irq disconnect is called - kvm is put - kvm module is removed: all irqs are disconnected - eventfd closes and triggers callback into removed kvm module - crash [ lightbulb turns on] Ah, now I see the point you were making. I thought you were talking about the .text in kvm_set_irq() (which would be protected by my kvm_get_kvm() reference afaict). But you are actually talking about the irqfd .text itself. Indeed, you are correct that is this currently a race. Good catch! If that is not sufficient to prevent kvm.ko from going away in the middle, then IMO kvm_get_kvm() has a bug, not irqfd. ;) However, I believe everything is actually ok here. -Greg BTW, why can't we remove irqfds in kvm_release? Well, this would be ideal but we run into that bi-directional reference thing that we talked about earlier and we both agree is non-trivial to solve. Solving this locking problem would incidentally also pave the way for restoring the DEASSIGN feature, so patches welcome! So far the only workable approach that I see is reverting the POLLHUP patch. I agree it looks pretty, but DEASSIGN and closing the races is more important IMO. And locking will definitely become
[KVM-AUTOTEST PATCH 1/5] Add new module kvm_subprocess
This module is intended to be used for controlling all child processes in KVM tests: both QEMU processes and SSH/SCP/Telnet processes. Processes started with this module keep running and can be interacted with even after the parent process exits. The current run_bg() utility tracks a child process as long as the parent process is running. When the parent process exits, the tracking thread terminates and cannot resume when needed. Currently SSH/SCP/Telnet communication is handled by kvm_utils.kvm_spawn, which does not allow the child process to run after the parent process exits. Thus, open SSH/SCP/Telnet sessions cannot be reused by tests following the one in which they are opened. The new module provides a solution to these two problems, and also saves some code by reusing common code required both for QEMU processes and SSH/SCP/Telnet processes. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_subprocess.py | 870 1 files changed, 870 insertions(+), 0 deletions(-) create mode 100644 client/tests/kvm/kvm_subprocess.py diff --git a/client/tests/kvm/kvm_subprocess.py b/client/tests/kvm/kvm_subprocess.py new file mode 100644 index 000..a6090ad --- /dev/null +++ b/client/tests/kvm/kvm_subprocess.py @@ -0,0 +1,870 @@ +#!/usr/bin/python +import sys, subprocess, pty, select, os, time, signal, re, termios, fcntl +import threading, logging +import common, kvm_utils + + +A class and functions used for running and controlling child processes. + +...@copyright: 2008-2009 Red Hat Inc. + + + +def _lock(filename): +if not os.path.exists(filename): +open(filename, w).close() +fd = os.open(filename, os.O_RDWR) +fcntl.lockf(fd, fcntl.LOCK_EX) +return fd + + +def _unlock(fd): +fcntl.lockf(fd, fcntl.LOCK_UN) +os.close(fd) + + +def _locked(filename): +try: +fd = os.open(filename, os.O_RDWR) +except: +return False +try: +fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) +except: +os.close(fd) +return True +fcntl.lockf(fd, fcntl.LOCK_UN) +os.close(fd) +return False + + +def _wait(filename): +fd = _lock(filename) +_unlock(fd) + + +def _get_filenames(base_dir, id): +return map(lambda s: os.path.join(base_dir, s + - + id), + (pid, status, output, outpipe1, outpipe2, inpipe, +lock-server-running, lock-client-starting)) + + +class kvm_spawn: + +This class is used for spawning and controlling a child process. + +A new instance of this class can either run a new server (a small Python +program that reads output from the child process and reports it to the +client and to a text file) or attach to an already running server. +When a server is started it runs the child process. +The server reports output from the child's STDOUT and STDERR via 3 +channels: two named pipes and a text file. +The text file can be accessed at any time using get_output(). +The first named pipe is used by _tail(), a function that runs in the +background and reports new output from the child as it is produced. +The second named pipe is used by a set of functions that read and parse +output as requested by the user in an interactive manner, similar to +pexpect. +The server also receives input from the client and sends it to the child +process. +An instance of this class can be pickled. When unpickled it automatically +resumes _tail() if needed. + + +def __init__(self, command=None, id=None, termination_func=None, + output_func=None, output_prefix=, linesep=\n, + prompt=r[\#\$]\s*$, status_test_command=echo $?): + +Initialize the class and run command as a child process. + +@param command: Command that will be run, or None if accessing an +already running process. +@param id: id of an already running instance, if accessing a running +instance, or None if starting a new one. +@param termination_func: Function to call when the process exits. The +function must accept a single exit status parameter. +@param output_func: Function to call whenever a line of output is +available from the STDOUT or STDERR streams of the process. +The function must accept a single string parameter. The string +does not include the final newline. +@param output_prefix: String to prepend to lines sent to output_func. +@param linesep: Line separator to be appended to strings sent to the +child process using sendline(). +@param prompt: Regular expression to be used with read_up_to_prompt(). +@param status_test_command: Command to be used for getting the last +exit status of commands run inside a shell (used by +get_command_status_output() and
[KVM-AUTOTEST PATCH 2/5] Modify kvm_vm and kvm_preprocessing to use the new kvm_subprocess module
Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_preprocessing.py | 17 +- client/tests/kvm/kvm_vm.py| 103 + 2 files changed, 44 insertions(+), 76 deletions(-) diff --git a/client/tests/kvm/kvm_preprocessing.py b/client/tests/kvm/kvm_preprocessing.py index f60cfe8..7fac46a 100644 --- a/client/tests/kvm/kvm_preprocessing.py +++ b/client/tests/kvm/kvm_preprocessing.py @@ -1,7 +1,7 @@ import sys, os, time, commands, re, logging from autotest_lib.client.bin import test from autotest_lib.client.common_lib import error -import kvm_vm, kvm_utils +import kvm_vm, kvm_utils, kvm_subprocess def preprocess_image(test, params): @@ -167,17 +167,6 @@ def preprocess(test, params, env): @param params: A dict containing all VM and image parameters. @param env: The environment (a dict-like object). -# Verify the identities of all living VMs -for vm in env.values(): -if not kvm_utils.is_vm(vm): -continue -if vm.is_dead(): -continue -if not vm.verify_process_identity(): -logging.debug(VM '%s' seems to have been replaced by another - process % vm.name) -vm.pid = None - # Destroy and remove VMs that are no longer needed in the environment requested_vms = kvm_utils.get_sub_dict_names(params, vms) for key in env.keys(): @@ -239,8 +228,8 @@ def postprocess(test, params, env): # Remove them all logging.debug('keep_ppm_files' not specified; removing all PPM files from results dir...) -kvm_utils.run_bg(rm -vf %s % os.path.join(test.debugdir, *.ppm), - None, logging.debug, (rm) , timeout=5.0) +rm_cmd = rm -vf %s % os.path.join(test.debugdir, *.ppm) +kvm_subprocess.run_fg(rm_cmd, logging.debug, (rm) , timeout=5.0) def postprocess_on_error(test, params, env): diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index 5028161..9aea3fb 100644 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -1,6 +1,6 @@ #!/usr/bin/python import time, socket, os, logging, fcntl -import kvm_utils +import kvm_utils, kvm_subprocess Utility classes and functions to handle Virtual Machine creation using qemu. @@ -54,17 +54,22 @@ def create_image(params, qemu_img_path, image_dir): qemu_img_cmd += %s % size logging.debug(Running qemu-img command:\n%s % qemu_img_cmd) -(status, pid, output) = kvm_utils.run_bg(qemu_img_cmd, None, - logging.debug, (qemu-img) , - timeout=30) +(status, output) = kvm_subprocess.run_fg(qemu_img_cmd, logging.debug, + (qemu-img) , timeout=30) -if status: -logging.debug(qemu-img exited with status %d % status) -logging.error(Could not create image %s % image_filename) +if status is None: +logging.error(Timeout elapsed while waiting for qemu-img command + to complete:\n%s % qemu_img_cmd) +return None +elif status != 0: +logging.error(Could not create image; + qemu-img command failed:\n%s % qemu_img_cmd) +logging.error(Status: %s % status) +logging.error(Output: + kvm_utils.format_str_for_message(output)) return None if not os.path.exists(image_filename): -logging.debug(Image file does not exist for some reason) -logging.error(Could not create image %s % image_filename) +logging.error(Image could not be created for some reason; + qemu-img command:\n%s % qemu_img_cmd) return None logging.info(Image created in %s % image_filename) @@ -106,7 +111,7 @@ class VM: @param image_dir: The directory where images reside @param iso_dir: The directory where ISOs reside -self.pid = None +self.process = None self.name = name self.params = params @@ -153,28 +158,6 @@ class VM: return VM(name, params, qemu_path, image_dir, iso_dir) -def verify_process_identity(self): - -Make sure .pid really points to the original qemu process. If .pid -points to the same process that was created with the create method, -or to a dead process, return True. Otherwise return False. - -if self.is_dead(): -return True -filename = /proc/%d/cmdline % self.pid -if not os.path.exists(filename): -logging.debug(Filename %s does not exist % filename) -return False -file = open(filename) -cmdline = file.read() -file.close() -if not self.qemu_path in cmdline: -return False -if not self.monitor_file_name in cmdline: -return False -return True - -
[KVM-AUTOTEST PATCH 3/5] Modify remote_login and remote_scp in kvm_utils to use kvm_subprocess
Also remove a reference to kvm_log that was left behind. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_utils.py | 13 - 1 files changed, 8 insertions(+), 5 deletions(-) diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index 4bc8dc7..09af62d 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -2,6 +2,7 @@ import md5, thread, subprocess, time, string, random, socket, os, signal, pty import select, re, logging from autotest_lib.client.bin import utils from autotest_lib.client.common_lib import error +import kvm_subprocess KVM test utility functions. @@ -635,7 +636,8 @@ def remote_login(command, password, prompt, linesep=\n, timeout=10): @return Return the kvm_spawn object on success and None on failure. -sub = kvm_spawn(command, linesep) +sub = kvm_subprocess.kvm_spawn(command) +sub.set_linesep(linesep) sub.set_prompt(prompt) password_prompt_count = 0 @@ -670,7 +672,7 @@ def remote_login(command, password, prompt, linesep=\n, timeout=10): sub.close() return None elif match == 4: # Connection refused -kvm_log.debug(Got 'Connection refused') +logging.debug(Got 'Connection refused') sub.close() return None elif match == 5: # prompt @@ -702,7 +704,7 @@ def remote_scp(command, password, timeout=300, login_timeout=10): @return: True if the transfer succeeds and False on failure. -sub = kvm_spawn(command) +sub = kvm_subprocess.kvm_spawn(command) password_prompt_count = 0 _timeout = login_timeout @@ -733,9 +735,10 @@ def remote_scp(command, password, timeout=300, login_timeout=10): sub.close() return False else: # match == None -logging.debug(Timeout or process terminated) +logging.debug(Timeout elapsed or process terminated) +status = sub.get_status() sub.close() -return sub.poll() == 0 +return status == 0 def scp_to_remote(host, port, username, password, local_path, remote_path, -- 1.5.4.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[KVM-AUTOTEST PATCH 4/5] Modify run_autotest() in kvm_tests.py to use the new kvm_subprocess module.
Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_tests.py |9 + 1 files changed, 5 insertions(+), 4 deletions(-) diff --git a/client/tests/kvm/kvm_tests.py b/client/tests/kvm/kvm_tests.py index 54d2a7a..ffe9116 100644 --- a/client/tests/kvm/kvm_tests.py +++ b/client/tests/kvm/kvm_tests.py @@ -1,6 +1,6 @@ import time, os, logging from autotest_lib.client.common_lib import utils, error -import kvm_utils, ppm_utils, scan_results +import kvm_utils, kvm_subprocess, ppm_utils, scan_results KVM test definitions. @@ -237,15 +237,16 @@ def run_autotest(test, params, env): cmd += --exclude=*.pyc cmd += --exclude=*.svn cmd += --exclude=*.git -kvm_utils.run_bg(cmd % (test.bindir, tarred_autotest_path), timeout=30) +kvm_subprocess.run_fg(cmd % (test.bindir, tarred_autotest_path), timeout=30) # tar the contents of bindir/autotest/tests/test_name cmd = cd %s; tar cvjf %s %s/* cmd += --exclude=*.pyc cmd += --exclude=*.svn cmd += --exclude=*.git -kvm_utils.run_bg(cmd % (os.path.join(test.bindir, autotest, tests), -tarred_test_path, test_name), timeout=30) +kvm_subprocess.run_fg(cmd % + (os.path.join(test.bindir, autotest, tests), + tarred_test_path, test_name), timeout=30) # Check if we need to copy autotest.tar.bz2 copy = False -- 1.5.4.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[KVM-AUTOTEST PATCH 5/5] Remove kvm_spawn and run_bg() from kvm_utils.py.
These are now provided by kvm_subprocess.py. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/kvm_utils.py | 477 + 1 files changed, 2 insertions(+), 475 deletions(-) diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index 09af62d..0ddb7fc 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -231,390 +231,8 @@ def check_kvm_source_dir(source_dir): raise error.TestError(Unknown source dir layout, cannot proceed.) -# The following are a class and functions used for SSH, SCP and Telnet -# communication with guests. - -class kvm_spawn: - -This class is used for spawning and controlling a child process. - - -def __init__(self, command, linesep=\n): - -Initialize the class and run command as a child process. - -@param command: Command that will be run. -@param linesep: Line separator for the given platform. - -self.exitstatus = None -self.linesep = linesep -(pid, fd) = pty.fork() -if pid == 0: -os.execv(/bin/sh, [/bin/sh, -c, command]) -else: -self.pid = pid -self.fd = fd - - -def set_linesep(self, linesep): - -Sets the line separator string (usually \\n). - -@param linesep: Line separator character. - -self.linesep = linesep - - -def is_responsive(self, timeout=5.0): - -Return True if the session is responsive. - -Send a newline to the child process (e.g. SSH or Telnet) and read some -output using read_nonblocking. -If all is OK, some output should be available (e.g. the shell prompt). -In that case return True. Otherwise return False. - -@param timeout: Timeout that will happen before we consider the -process unresponsive - -self.read_nonblocking(timeout=0.1) -self.sendline() -output = self.read_nonblocking(timeout=timeout) -if output.strip(): -return True -return False - - -def poll(self): - -If the process exited, return its exit status. Otherwise return None. -The exit status is stored for use in subsequent calls. - -if self.exitstatus != None: -return self.exitstatus -pid, status = os.waitpid(self.pid, os.WNOHANG) -if pid: -self.exitstatus = os.WEXITSTATUS(status) -return self.exitstatus -else: -return None - - -def close(self): - -Close the session (close the process filedescriptors and kills the -process ID), and return the exit status. - -try: -os.close(self.fd) -os.kill(self.pid, signal.SIGTERM) -except OSError: -pass -return self.poll() - - -def sendline(self, str=): - -Sends a string followed by a line separator to the child process. - -@param str: String that will be sent to the child process. - -try: -os.write(self.fd, str + self.linesep) -except OSError: -pass - - -def read_nonblocking(self, timeout=1.0): - -Read from child until there is nothing to read for timeout seconds. - -@param timeout: Time (seconds) of wait before we give up reading from -the child process. - -data = -while True: -r, w, x = select.select([self.fd], [], [], timeout) -if self.fd in r: -try: -data += os.read(self.fd, 1024) -except OSError: -return data -else: -return data - - -def match_patterns(self, str, patterns): - -Match str against a list of patterns. - -Return the index of the first pattern that matches a substring of str. -None and empty strings in patterns are ignored. -If no match is found, return None. - -@param patterns: List of strings (regular expression patterns). - -for i in range(len(patterns)): -if not patterns[i]: -continue -exp = re.compile(patterns[i]) -if exp.search(str): -return i - - -def read_until_output_matches(self, patterns, filter=lambda(x):x, - timeout=30.0, internal_timeout=1.0, - print_func=None): - -Read using read_nonblocking until a match is found using match_patterns, -or until timeout expires. Before attempting to search for a match, the -data is filtered using the filter function provided. - -@brief: Read from child using read_nonblocking until a pattern -matches. -@param patterns: List of strings (regular
Re: [PATCH] Activate Virtualization On Demand v2
On Mon, Jun 15, 2009 at 01:30:05PM +0200, Alexander Graf wrote: X86 CPUs need to have some magic happening to enable the virtualization extensions on them. This magic can result in unpleasant results for users, like blocking other VMMs from working (vmx) or using invalid TLB entries (svm). Currently KVM activates virtualization when the respective kernel module is loaded. This blocks us from autoloading KVM modules without breaking other VMMs. That will only become interesting if we every have such a thing in mainline. So NACK, lots of complication for no good reason. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Activate Virtualization On Demand v2
On 15.06.2009, at 14:17, Christoph Hellwig wrote: On Mon, Jun 15, 2009 at 01:30:05PM +0200, Alexander Graf wrote: X86 CPUs need to have some magic happening to enable the virtualization extensions on them. This magic can result in unpleasant results for users, like blocking other VMMs from working (vmx) or using invalid TLB entries (svm). Currently KVM activates virtualization when the respective kernel module is loaded. This blocks us from autoloading KVM modules without breaking other VMMs. That will only become interesting if we every have such a thing in mainline. So NACK, lots of complication for no good reason. I don't want to fight political battles here. Seriously - we're out of kindergarden. There are users out there who want to have VBox/VMware and kvm installed in parallel and can't have both kernel modules loaded at the same time. We're only hurting _our_ users, not the others if we keep people from having kvm*.ko loaded. Sigh. Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Activate Virtualization On Demand v2
On Mon, Jun 15, 2009 at 02:25:01PM +0200, Alexander Graf wrote: I don't want to fight political battles here. So stop that crap. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: x86: Ignore reads to K7 EVNTSEL MSRs
On 06/15/2009 10:55 AM, Amit Shah wrote: In commit 7fe29e0faacb650d31b9e9f538203a157bec821d we ignored the reads to the P6 EVNTSEL MSRs. That fixed crashes on Intel machines. Ignore the reads to K7 EVNTSEL MSRs as well to fix this on AMD hosts. This fixes Kaspersky antivirus crashing Windows guests on AMD hosts. Applied, thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
Avi Kivity wrote: On 06/15/2009 12:08 PM, Mark McLoughlin wrote: This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yep, most people seem to agree that it makes sense to allow this, but some believe it should only be via a machine description file, not the command line. I don't understand this opposition. It's clear a machine config file is a long way in our future. It's also clear lack of stable PCI addresses hurts us now. Is there opposition? I don't ever recall seeing a patch... I think it's a perfectly fine idea. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2803638 ] kvm-86: winxp guest + kvmnet.sys virtio unstable
Bugs item #2803638, was opened at 2009-06-09 19:45 Message generated for change (Comment added) made by thekozmo You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2803638group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: Geoffrey Brimhall (vinyvat) Assigned to: Nobody/Anonymous (nobody) Summary: kvm-86: winxp guest + kvmnet.sys virtio unstable Initial Comment: * cpu model: Core i7 920 * kvm version: kvm 86 * host kernel version: 2.6.29.4 debian lenny * host kernel arch: x86_64 * guest: winxp 32bit sp3 * qemu command line: qemu-system-x86_64 -boot c -hda ./winxp.qcow2 -cdrom winxp-sp3.iso -smp 2 -m 1024 -vga std -net nic,model=virtio,vlan=0,macaddr=00:15:60:51:09:BD -net tap,vlan=0,ifname=tap0,script=/etc/kvm/kvm-ifup -soundhw es1370 -localtime -k en-us -name -usb When using kvmnet.sys in guest winxp32, guest crashes ( blue-screen with IRQ_NOT_LESS_OR_EQUAL most common ) on complex web2.0 browser pages ( lots of ajax and form fields ). Switching back to default rtl8139 driver ( ie remove model=virtio from command line options ) gets system back to being very stable. Reproduced crash using all permutations of kvm-85 and 2.6.26 kernel also. -- Comment By: Dor Laor (thekozmo) Date: 2009-06-15 15:43 Message: Once we'll get authorization to release new windows drivers it will get solved. -- Comment By: Geoffrey Brimhall (vinyvat) Date: 2009-06-09 20:32 Message: Also have 6GB ram on system. -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2803638group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Re: Configuration vs. compat hints
Avi Kivity a...@redhat.com writes: (adding cc) On 06/15/2009 02:35 PM, Markus Armbruster wrote: Not really. QEMU gives just the host bridge a fixed slot[*]. All the other slots are available. qemu needs to export these two bits of information: the first free slot and the number of slots. More generally, which slots are open. We can assume 1:31, but that's unlovely. Point. The real problem is devices that get implicitly added, like the SCSI controller. Those devices get their slots auto-assigned, which can interfere with slot numbers chosen by the user. We need a way to avoid that, as you suggested elsewhere in this thread. Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=, and -drive pci_addr= (and later, -disk-controller)? Stalling while waiting for the ultimate config file is only generating pain and out-of-tree patches. Yup. I got bit-rotten patches for pci_addr=, and I can unrot them if they're wanted. (I'd be quite happy constructing the entire machine config on the command line, but I realize it's just me) Ha, .bash_history as config file... -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On Mon, Jun 15, 2009 at 02:56:42PM +0300, Avi Kivity wrote: On 06/15/2009 02:48 PM, Michael S. Tsirkin wrote: A slot needs to be configured in ACPI, Can we configure all possible 32 slots? That's what we do. But one is always taken. In the future, perhaps more. and not be taken by onboard chips (piix takes slot 0, for example). piix is the root complex, isn't it? Are there other examples? If not, we could teach management about the root complex being special ... We should just tell the user which slots are open. This might be tricky if the config is passed in with the command line flags. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
Avi Kivity wrote: On 06/14/2009 12:50 PM, Michael S. Tsirkin wrote: On Fri, Jun 12, 2009 at 05:48:23PM +0100, Mark McLoughlin wrote: However, in order to retain compat for that SCSI device (e.g. ensuring the PCI address doesn't change as other devices are added an removed), we're back to the same problem ... either: 1) Use '-drive file=foo.img,if=scsi,pci_addr=foo'; in order to figure out what address to use, libvirt would need to query qemu for what address was originally allocated to device or it would do all the PCI address allocation itself ... This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yes, the user build the machine using the command line and monitor (or, in 2017, the machine configuration file), Considering pbrook just posted a machine config for arm, I think it would be rather sad if pc wasn't converted to it by 2017... then turns on the power. Command line options are the parts lying around when we start. btw, -drive needs to be separated: -controller type=lsi1234,pci_addr=foobar,name=blah -drive file=foo.img,controller=blah,index=0 -drive file=bar.img,controller=blah,index=1 Drives to not have pci addresses. Drivers don't have indexes and buses but we specify it on the -drive line. -drive is convenient syntax. It stops being convenient when you force it to be two options. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
Avi Kivity wrote: On 06/15/2009 12:09 PM, Mark McLoughlin wrote: I think the point is that you don't need version numbers if you have a proper device tree. How do you add a new attribute to the device tree and, when a supplied device tree lacking said attribute, distinguish between a device tree from an old version of qemu (i.e. use the old default) and a partial device tree from the VM manager (i.e. use the new default) ? -baseline 0.10 That's a version number :-) (I was responding to Anthony's you don't need a version number) If you want to prevent incompatibilities, you need to make everything new (potentially including bugfixes) non-default. Eventually the default configuration becomes increasingly unusable and you need a new baseline. You must still be able to fall back to the old baseline for older guests. I don't think games with configuration files can hide that. -M pc1 -M pc2 etc. This is pretty easy to maintain with config files. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Re: Configuration vs. compat hints
Markus Armbruster wrote: Avi Kivity a...@redhat.com writes: Paul/Anthony, can we have -vga pci_addr=, -usb-controller pci_addr=, and -drive pci_addr= (and later, -disk-controller)? Stalling while waiting for the ultimate config file is only generating pain and out-of-tree patches. Yup. I got bit-rotten patches for pci_addr=, and I can unrot them if they're wanted. Yes, would be good to have patches on the list to discuss. In principle, I have no objection to this. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
Avi Kivity wrote: On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote: We should just tell the user which slots are open. This might be tricky if the config is passed in with the command line flags. qemu -show-available-pci-slots Why does the user care? Let QEMU allocate the PCI slot, then query it to see what slot it assigned and remember that. It's not a good idea to have management applications attempt to do PCI slot allocation. For instance, one day we may decide to make virtio devices multi-function. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote: We should just tell the user which slots are open. This might be tricky if the config is passed in with the command line flags. qemu -show-available-pci-slots (the qemu equivalent to KVM_CHECK_EXTENSION) -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM PATCH v2 2/2] kvm: use POLLHUP to close an irqfd instead of an explicit ioctl
On Mon, Jun 15, 2009 at 08:08:18AM -0400, Gregory Haskins wrote: @@ -123,6 +124,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) cleanup_srcu_struct(irqfd-srcu); kfree(irqfd); + module_put(THIS_MODULE); } return 0; module_put(THIS_MODULE) is always a bug unless you know that someone has a reference to the current module: the module could go away between this call and returning from function. Hmm. I understand what you are saying conceptually (i.e. the .text could get yanked before we hit the next line of code, in this case the return 0). However, holding a reference when you _know_ someone else holds a reference to me says that one of the references is redundant. In addition, there is certainly plenty of precedence for module_put(THIS_MODULE) all throughout the kernel (including module_put_and_exit()). Are those broken as well? Maybe not, but I don't know why. It works fine as long as you don't unload any modules though :) Rusty, could you enlighten us please? -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 03:41 PM, Anthony Liguori wrote: Yep, most people seem to agree that it makes sense to allow this, but some believe it should only be via a machine description file, not the command line. I don't understand this opposition. It's clear a machine config file is a long way in our future. It's also clear lack of stable PCI addresses hurts us now. Is there opposition? I don't ever recall seeing a patch... Izik Eidus posted a patch (using a different syntax) in November 2007. I think it's a perfectly fine idea. Good. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 03:45 PM, Anthony Liguori wrote: This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yes, the user build the machine using the command line and monitor (or, in 2017, the machine configuration file), Considering pbrook just posted a machine config for arm, I think it would be rather sad if pc wasn't converted to it by 2017... I'd be sad too, but not surprised. then turns on the power. Command line options are the parts lying around when we start. btw, -drive needs to be separated: -controller type=lsi1234,pci_addr=foobar,name=blah -drive file=foo.img,controller=blah,index=0 -drive file=bar.img,controller=blah,index=1 Drives to not have pci addresses. Drivers don't have indexes and buses but we specify it on the -drive line. Drives do have indexes. On old parallel scsi drives you set the index by clicking a button on the back of the drive to cycle through scsi addresses 0-7. An IDE drive's index is determined by the cable (master/slave). A SATA drive's index is determined by which header on the motherboard the drive connects to. If by bus you mean the if= parameter, then drives certainly do have buses. Just try connecting the scsi drive from the previous paragraph to a USB port. -drive is convenient syntax. It stops being convenient when you force it to be two options. controller= defaults to some builtin thing which autoinstantiates when necessary, so the old -drive syntax works. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints
Anthony Liguori anth...@codemonkey.ws writes: Avi Kivity wrote: On 06/15/2009 12:08 PM, Mark McLoughlin wrote: This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yep, most people seem to agree that it makes sense to allow this, but some believe it should only be via a machine description file, not the command line. I don't understand this opposition. It's clear a machine config file is a long way in our future. It's also clear lack of stable PCI addresses hurts us now. Is there opposition? I don't ever recall seeing a patch... http://www.archivum.info/qemu-de...@nongnu.org/2009-01/msg01458.html I think it's a perfectly fine idea. Off to dust off my patch series. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 03:52 PM, Anthony Liguori wrote: Avi Kivity wrote: On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote: We should just tell the user which slots are open. This might be tricky if the config is passed in with the command line flags. qemu -show-available-pci-slots Why does the user care? Let QEMU allocate the PCI slot, then query it to see what slot it assigned and remember that. It's a roundabout way of doing things. As an example, if you try to fit too many devices into the machine, you have to try to add all devices and watch for a qemu error. If you know in advance how many slots you have, you never enter into that situation (and you need to show the limit to the user anyway). It's not a good idea to have management applications attempt to do PCI slot allocation. For instance, one day we may decide to make virtio devices multi-function. Non-virtio, as well. But we can't make that the default, so the user will have to specify this anyway. Given that you can't hotunplug individual functions, the user will have to specify exactly how functions are aggregated into devices. My recommendation would be for a GUI to allow the user to select a 'quad port virtio NIC' or 'dual port virtio scsi controller' rather than trying to do anything automatic. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 03:48 PM, Anthony Liguori wrote: device tree lacking said attribute, distinguish between a device tree from an old version of qemu (i.e. use the old default) and a partial device tree from the VM manager (i.e. use the new default) ? -baseline 0.10 That's a version number :-) (I was responding to Anthony's you don't need a version number) If you want to prevent incompatibilities, you need to make everything new (potentially including bugfixes) non-default. Eventually the default configuration becomes increasingly unusable and you need a new baseline. You must still be able to fall back to the old baseline for older guests. I don't think games with configuration files can hide that. How do you add a new attribute to the device tree and, when a supplied -M pc1 -M pc2 Certainly preferable to -baseline. This is pretty easy to maintain with config files. Let's not tie the two together. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
Hi, Yes, the user build the machine using the command line and monitor (or, in 2017, the machine configuration file), Considering pbrook just posted a machine config for arm, I think it would be rather sad if pc wasn't converted to it by 2017... It shouldn't last until 2017, but the process isn't that trivial. Some qemu code / control flow needs serious restruction until it is in a state that creating the devices from a fdt can actually work. Drivers don't have indexes and buses but we specify it on the -drive line. -drive is convenient syntax. It stops being convenient when you force it to be two options. One more issue: -drive also mixes host and guest configuration. These must be separated too. cheers, Gerd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/4] Add rudimentary Hyper-V guest support v3
Now that we have nested SVM in place, let's make use of it and virtualize something non-kvm. The first interesting target that came to my mind here was Hyper-V. This patchset makes Windows Server 2008 boot with Hyper-V, which runs the dom0 in virtualized mode already. It hangs somewhere in IDE code when booted, so I haven't been able to run a second VM within for now yet. Please keep in mind that Hyper-V won't work unless you apply the userspace patches too and the PAT bit patch --- v2 changes: - remove reserved PAT check patch (Avi will do this) - remove #PF inject on emulated_read - take comments from v1 into account (listed individually) v3 changes: - forward-port to current git Alexander Graf (4): Add definition for IGNNE MSR Implement Hyper-V MSRs v2 Nested SVM: Implement INVLPGA v2 Nested SVM: Improve interrupt injection v2 arch/x86/include/asm/msr-index.h |1 + arch/x86/kvm/svm.c | 59 +++-- 2 files changed, 44 insertions(+), 16 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/4] Add definition for IGNNE MSR
Hyper-V tried to access MSR_IGNNE, so let's at least have a definition for it in our headers. Signed-off-by: Alexander Graf ag...@suse.de --- arch/x86/include/asm/msr-index.h |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec41fc1..e273549 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -372,6 +372,7 @@ /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 +#define MSR_VM_IGNNE0xc0010115 #define MSR_VM_HSAVE_PA 0xc0010117 #endif /* _ASM_X86_MSR_INDEX_H */ -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
Avi Kivity wrote: On 06/15/2009 03:45 PM, Anthony Liguori wrote: This last option makes sense to me: in a real world the user has control over where he places the device on the bus, so why not with qemu? Yes, the user build the machine using the command line and monitor (or, in 2017, the machine configuration file), Considering pbrook just posted a machine config for arm, I think it would be rather sad if pc wasn't converted to it by 2017... I'd be sad too, but not surprised. then turns on the power. Command line options are the parts lying around when we start. btw, -drive needs to be separated: -controller type=lsi1234,pci_addr=foobar,name=blah -drive file=foo.img,controller=blah,index=0 -drive file=bar.img,controller=blah,index=1 Drives to not have pci addresses. Drivers don't have indexes and buses but we specify it on the -drive line. Drives do have indexes. On old parallel scsi drives you set the index by clicking a button on the back of the drive to cycle through scsi addresses 0-7. An IDE drive's index is determined by the cable (master/slave). A SATA drive's index is determined by which header on the motherboard the drive connects to. It's not at all that simple. SCSI has a hierarchical address mechanism with 0-7 targets but then potentially multiple LUNs per target. Today, we always emulate a single LUN per target but if we ever wanted to support more than 7 disks on a SCSI controller, we would have to add multiple LUN support too. So the current linear unit= parameter is actually pretty broken for SCSI. For IDE, it's a combination of bus, slot, and master/slave. For virtio, it's just a PCI address. What we really need is something that is more opaque and controller specific. For instance, if we were going to do controllers... -controller type=lsi1234,pci_addr=foobar,name=blah -controller-disk controller=blah,target=0,lun=1,name=sda -controller type=ide,pci_addr=barfoo,name=ide -controller-disk controller=ide,slot=secondary,cable=slave,name=hdd -drive file=foo.img,controller-disk=sda -drive file=bar.img,controller-disk=hdd And having -hdd file=foo.img be short-hand for -drive file=%s,controller-disk=%s. If by bus you mean the if= parameter, then drives certainly do have buses. Just try connecting the scsi drive from the previous paragraph to a USB port. No, I meant drive file=foo.img,bus=3. If that doesn't seem obvious what it should do to you that's because it isn't at all obvious :-) It ends up skipping a predefined number of locations in the drive table. This is pretty broken fundamentally because it assumes controllers always support a fixed number of devices. Nothing really respects bus_id though so in practice, I assume it's almost universally broken. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/4] Implement Hyper-V MSRs
Hyper-V uses some MSRs, some of which are actually reserved for BIOS usage. But let's be nice today and have it its way, because otherwise it fails terribly. v2 changes: - remove the 0x4081 MSR definition - add pr_unimpl() on unimplemented writes Signed-off-by: Alexander Graf ag...@suse.de --- arch/x86/kvm/svm.c |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ef43a18..4b4eadd 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2034,6 +2034,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) case MSR_VM_HSAVE_PA: svm-hsave_msr = data; break; + case MSR_VM_CR: + case MSR_VM_IGNNE: + case MSR_K8_HWCR: + pr_unimpl(vcpu, unimplemented wrmsr: 0x%x data 0x%llx\n, ecx, data); + break; default: return kvm_set_msr_common(vcpu, ecx, data); } -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/4] Nested SVM: Implement INVLPGA
SVM adds another way to do INVLPG by ASID which Hyper-V makes use of, so let's implement it! For now we just do the same thing invlpg does, as asid switching means we flush the mmu anyways. That might change one day though. v2 makes invlpga do the same as invlpg, not flush the whole mmu Signed-off-by: Alexander Graf ag...@suse.de --- arch/x86/kvm/svm.c | 15 ++- 1 files changed, 14 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4b4eadd..fa2a710 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1785,6 +1785,19 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } +static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + struct kvm_vcpu *vcpu = svm-vcpu; + nsvm_printk(INVLPGA\n); + + /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ + kvm_mmu_invlpg(vcpu, vcpu-arch.regs[VCPU_REGS_RAX]); + + svm-next_rip = kvm_rip_read(svm-vcpu) + 3; + skip_emulated_instruction(svm-vcpu); + return 1; +} + static int invalid_op_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { @@ -2130,7 +2143,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_INVD] = emulate_on_interception, [SVM_EXIT_HLT] = halt_interception, [SVM_EXIT_INVLPG] = invlpg_interception, - [SVM_EXIT_INVLPGA] = invalid_op_interception, + [SVM_EXIT_INVLPGA] = invlpga_interception, [SVM_EXIT_IOIO] = io_interception, [SVM_EXIT_MSR] = msr_interception, [SVM_EXIT_TASK_SWITCH] = task_switch_interception, -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/4] Nested SVM: Improve interrupt injection
While trying to get Hyper-V running, I realized that the interrupt injection mechanisms that are in place right now are not 100% correct. This patch makes nested SVM's interrupt injection behave more like on a real machine. v2 calls BUG_ON when svm_set_irq is called with GIF=0 Signed-off-by: Alexander Graf ag...@suse.de --- arch/x86/kvm/svm.c | 39 --- 1 files changed, 24 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fa2a710..5b14c9d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1517,7 +1517,8 @@ static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1, /* Kill any pending exceptions */ if (svm-vcpu.arch.exception.pending == true) nsvm_printk(WARNING: Pending Exception\n); - svm-vcpu.arch.exception.pending = false; + kvm_clear_exception_queue(svm-vcpu); + kvm_clear_interrupt_queue(svm-vcpu); /* Restore selected save entries */ svm-vmcb-save.es = hsave-save.es; @@ -1585,7 +1586,8 @@ static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1, svm-nested_vmcb = svm-vmcb-save.rax; /* Clear internal status */ - svm-vcpu.arch.exception.pending = false; + kvm_clear_exception_queue(svm-vcpu); + kvm_clear_interrupt_queue(svm-vcpu); /* Save the old vmcb, so we don't need to pick what we save, but can restore everything when a VMEXIT occurs */ @@ -2277,21 +2279,14 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) ((/*control-int_vector 4*/ 0xf) V_INTR_PRIO_SHIFT); } -static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - svm-vmcb-control.event_inj = nr | - SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; -} - static void svm_set_irq(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - nested_svm_intr(svm); + BUG_ON(!(svm-vcpu.arch.hflags HF_GIF_MASK)); - svm_queue_irq(vcpu, vcpu-arch.interrupt.nr); + svm-vmcb-control.event_inj = vcpu-arch.interrupt.nr | + SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; } static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) @@ -2319,13 +2314,25 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) struct vmcb *vmcb = svm-vmcb; return (vmcb-save.rflags X86_EFLAGS_IF) !(vmcb-control.int_state SVM_INTERRUPT_SHADOW_MASK) - (svm-vcpu.arch.hflags HF_GIF_MASK); + (svm-vcpu.arch.hflags HF_GIF_MASK) + !is_nested(svm); } static void enable_irq_window(struct kvm_vcpu *vcpu) { - svm_set_vintr(to_svm(vcpu)); - svm_inject_irq(to_svm(vcpu), 0x0); + struct vcpu_svm *svm = to_svm(vcpu); + nsvm_printk(Trying to open IRQ window\n); + + nested_svm_intr(svm); + + /* In case GIF=0 we can't rely on the CPU to tell us when +* GIF becomes 1, because that's a separate STGI/VMRUN intercept. +* The next time we get that intercept, this function will be +* called again though and we'll get the vintr intercept. */ + if (svm-vcpu.arch.hflags HF_GIF_MASK) { + svm_set_vintr(svm); + svm_inject_irq(svm, 0x0); + } } static void enable_nmi_window(struct kvm_vcpu *vcpu) @@ -2393,6 +2400,8 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) case SVM_EXITINTINFO_TYPE_EXEPT: /* In case of software exception do not reinject an exception vector, but re-execute and instruction instead */ + if (is_nested(svm)) + break; if (kvm_exception_is_soft(vector)) break; if (exitintinfo SVM_EXITINTINFO_VALID_ERR) { -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/8] Small cpu loop cleanups
Also fix info cpus to show correct halt status with in kernel irq chip. I removed patch that uses on_vcpu() for init/sipi handling for now. Gleb Natapov (8): env-kvm_cpu_state.init is always zero here. Do not use env-halted to decide where halted state should be handled. Call kvm_arch_load_regs() instead of kvm_load_registers() Rename kvm_(load|save)_mpstate to kvm_arch_(load|save)_mpstate Retrieve mp state info in cpu_synchronize_state() env-exception_index is not used by kvm code. s-cpu_env cannot be zero here. env-exit_request is not used by kvm. hw/apic.c |6 ++ qemu-kvm-ia64.c |6 ++ qemu-kvm-x86.c|9 +++-- qemu-kvm.c| 39 +++ qemu-kvm.h|4 target-i386/machine.c |6 +++--- target-ia64/machine.c |4 ++-- vl.c |1 - 8 files changed, 47 insertions(+), 28 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/8] Rename kvm_(load|save)_mpstate to kvm_arch_(load|save)_mpstate
To be consistent with other function naming. Signed-off-by: Gleb Natapov g...@redhat.com --- qemu-kvm-ia64.c |4 ++-- qemu-kvm-x86.c|4 ++-- qemu-kvm.h|2 ++ target-i386/machine.c |6 +++--- target-ia64/machine.c |4 ++-- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c index d33c1c3..234602c 100644 --- a/qemu-kvm-ia64.c +++ b/qemu-kvm-ia64.c @@ -98,7 +98,7 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) { } -void kvm_save_mpstate(CPUState *env) +void kvm_arch_save_mpstate(CPUState *env) { #ifdef KVM_CAP_MP_STATE int r; @@ -112,7 +112,7 @@ void kvm_save_mpstate(CPUState *env) #endif } -void kvm_load_mpstate(CPUState *env) +void kvm_arch_load_mpstate(CPUState *env) { #ifdef KVM_CAP_MP_STATE struct kvm_mp_state mp_state = { .mp_state = env-mp_state }; diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 729d600..8e6fb75 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -295,7 +295,7 @@ void kvm_load_tsc(CPUState *env) perror(kvm_set_tsc FAILED.\n); } -void kvm_save_mpstate(CPUState *env) +void kvm_arch_save_mpstate(CPUState *env) { #ifdef KVM_CAP_MP_STATE int r; @@ -309,7 +309,7 @@ void kvm_save_mpstate(CPUState *env) #endif } -void kvm_load_mpstate(CPUState *env) +void kvm_arch_load_mpstate(CPUState *env) { #ifdef KVM_CAP_MP_STATE struct kvm_mp_state mp_state = { .mp_state = env-mp_state }; diff --git a/qemu-kvm.h b/qemu-kvm.h index fa40542..3b73fe9 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -69,6 +69,8 @@ int kvm_arch_qemu_create_context(void); void kvm_arch_save_regs(CPUState *env); void kvm_arch_load_regs(CPUState *env); +void kvm_arch_load_mpstate(CPUState *env); +void kvm_arch_save_mpstate(CPUState *env); int kvm_arch_qemu_init_env(CPUState *cenv); void kvm_arch_pre_kvm_run(void *opaque, CPUState *env); void kvm_arch_post_kvm_run(void *opaque, CPUState *env); diff --git a/target-i386/machine.c b/target-i386/machine.c index 07df1e1..14942c0 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -34,7 +34,7 @@ void cpu_save(QEMUFile *f, void *opaque) if (kvm_enabled()) { kvm_save_registers(env); -kvm_save_mpstate(env); +kvm_arch_save_mpstate(env); } for(i = 0; i CPU_NB_REGS; i++) @@ -369,12 +369,12 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) kvm_load_tsc(env); if (version_id = 5) { qemu_get_be32s(f, env-mp_state); -kvm_load_mpstate(env); +kvm_arch_load_mpstate(env); } } else { kvm_load_registers(env); kvm_load_tsc(env); -kvm_load_mpstate(env); +kvm_arch_load_mpstate(env); } } return 0; diff --git a/target-ia64/machine.c b/target-ia64/machine.c index dd205c5..70ef379 100644 --- a/target-ia64/machine.c +++ b/target-ia64/machine.c @@ -10,7 +10,7 @@ void cpu_save(QEMUFile *f, void *opaque) if (kvm_enabled()) { kvm_save_registers(env); -kvm_save_mpstate(env); +kvm_arch_save_mpstate(env); } } @@ -20,7 +20,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) if (kvm_enabled()) { kvm_load_registers(env); -kvm_load_mpstate(env); +kvm_arch_load_mpstate(env); } return 0; } -- 1.6.2.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/8] Call kvm_arch_load_regs() instead of kvm_load_registers()
The call is done from vcpu thread. Signed-off-by: Gleb Natapov g...@redhat.com --- qemu-kvm.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 7676e02..5fa7154 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -399,7 +399,7 @@ static int kvm_main_loop_cpu(CPUState *env) #endif cpu_single_env = env; -kvm_load_registers(env); +kvm_arch_load_regs(env); while (1) { while (!has_work(env)) -- 1.6.2.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/8] env-kvm_cpu_state.init is always zero here.
Signed-off-by: Gleb Natapov g...@redhat.com --- qemu-kvm.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 2aeb17c..ec911ef 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -412,7 +412,7 @@ static int kvm_main_loop_cpu(CPUState *env) if (env-kvm_cpu_state.sipi_needed) update_regs_for_sipi(env); } - if (!env-halted !env-kvm_cpu_state.init) + if (!env-halted) kvm_cpu_exec(env); env-exit_request = 0; env-exception_index = EXCP_INTERRUPT; -- 1.6.2.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/8] env-exit_request is not used by kvm.
Remove its use from kvm code. Signed-off-by: Gleb Natapov g...@redhat.com --- qemu-kvm-x86.c |3 +-- qemu-kvm.c |3 --- vl.c |1 - 3 files changed, 1 insertions(+), 6 deletions(-) diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 6865385..5460136 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -634,8 +634,7 @@ void kvm_arch_post_kvm_run(void *opaque, CPUState *env) int kvm_arch_has_work(CPUState *env) { -if (env-exit_request || -((env-interrupt_request CPU_INTERRUPT_HARD) +if (((env-interrupt_request CPU_INTERRUPT_HARD) (env-eflags IF_MASK)) || (env-interrupt_request CPU_INTERRUPT_NMI)) return 1; diff --git a/qemu-kvm.c b/qemu-kvm.c index 2930a1d..bbdb03a 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -168,8 +168,6 @@ static int kvm_pre_run(void *opaque, void *data) kvm_arch_pre_kvm_run(opaque, env); -if (env-exit_request) - return 1; pthread_mutex_unlock(qemu_mutex); return 0; } @@ -441,7 +439,6 @@ static int kvm_main_loop_cpu(CPUState *env) } if (!env-halted || kvm_irqchip_in_kernel(kvm_context)) kvm_cpu_exec(env); - env-exit_request = 0; kvm_main_loop_wait(env, 0); } pthread_mutex_unlock(qemu_mutex); diff --git a/vl.c b/vl.c index 845ed54..c08299c 100644 --- a/vl.c +++ b/vl.c @@ -3724,7 +3724,6 @@ void qemu_system_reset_request(void) } if (cpu_single_env) { qemu_kvm_cpu_stop(cpu_single_env); -cpu_exit(cpu_single_env); } qemu_notify_event(); } -- 1.6.2.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/8] Retrieve mp state info in cpu_synchronize_state()
And set env-halted based on the value to show accurate vcpu state in QEMU monitor. Signed-off-by: Gleb Natapov g...@redhat.com --- qemu-kvm.c | 27 +++ qemu-kvm.h |2 ++ 2 files changed, 29 insertions(+), 0 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 5fa7154..3ae4b45 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -200,6 +200,33 @@ void kvm_save_registers(CPUState *env) on_vcpu(env, kvm_do_save_registers, env); } +static void kvm_do_load_mpstate(void *_env) +{ +CPUState *env = _env; + +kvm_arch_load_mpstate(env); +} + +void kvm_load_mpstate(CPUState *env) +{ +if (kvm_enabled() qemu_system_ready) +on_vcpu(env, kvm_do_load_mpstate, env); +} + +static void kvm_do_save_mpstate(void *_env) +{ +CPUState *env = _env; + +kvm_arch_save_mpstate(env); +env-halted = (env-mp_state == KVM_MP_STATE_HALTED); +} + +void kvm_save_mpstate(CPUState *env) +{ +if (kvm_enabled()) +on_vcpu(env, kvm_do_save_mpstate, env); +} + int kvm_cpu_exec(CPUState *env) { int r; diff --git a/qemu-kvm.h b/qemu-kvm.h index 3b73fe9..22452e9 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -221,11 +221,13 @@ static inline int kvm_sync_vcpus(void) { return 0; } static inline void kvm_arch_get_registers(CPUState *env) { kvm_save_registers(env); +kvm_save_mpstate(env); } static inline void kvm_arch_put_registers(CPUState *env) { kvm_load_registers(env); +kvm_load_mpstate(env); } static inline void cpu_synchronize_state(CPUState *env, int modified) -- 1.6.2.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/8] env-exception_index is not used by kvm code.
Signed-off-by: Gleb Natapov g...@redhat.com --- qemu-kvm-ia64.c |2 -- qemu-kvm-x86.c |2 -- qemu-kvm.c |1 - 3 files changed, 0 insertions(+), 5 deletions(-) diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c index 234602c..477d24c 100644 --- a/qemu-kvm-ia64.c +++ b/qemu-kvm-ia64.c @@ -35,7 +35,6 @@ int kvm_arch_halt(void *opaque, kvm_vcpu_context_t vcpu) { CPUState *env = cpu_single_env; env-hflags |= HF_HALTED_MASK; -env-exception_index = EXCP_HLT; return 1; } @@ -135,7 +134,6 @@ void kvm_arch_cpu_reset(CPUState *env) } else { env-interrupt_request = ~CPU_INTERRUPT_HARD; env-halted = 1; - env-exception_index = EXCP_HLT; } } diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 8e6fb75..6865385 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -611,7 +611,6 @@ int kvm_arch_halt(void *opaque, kvm_vcpu_context_t vcpu) (env-eflags IF_MASK)) !(env-interrupt_request CPU_INTERRUPT_NMI)) { env-halted = 1; - env-exception_index = EXCP_HLT; } return 1; } @@ -707,7 +706,6 @@ void kvm_arch_cpu_reset(CPUState *env) } else { env-interrupt_request = ~CPU_INTERRUPT_HARD; env-halted = 1; - env-exception_index = EXCP_HLT; } } } diff --git a/qemu-kvm.c b/qemu-kvm.c index 3ae4b45..2930a1d 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -442,7 +442,6 @@ static int kvm_main_loop_cpu(CPUState *env) if (!env-halted || kvm_irqchip_in_kernel(kvm_context)) kvm_cpu_exec(env); env-exit_request = 0; -env-exception_index = EXCP_INTERRUPT; kvm_main_loop_wait(env, 0); } pthread_mutex_unlock(qemu_mutex); -- 1.6.2.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/8] s-cpu_env cannot be zero here.
Remove redundant check. Signed-off-by: Gleb Natapov g...@redhat.com --- hw/apic.c |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index f186202..eac54fd 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -471,8 +471,7 @@ static void apic_init_ipi(APICState *s) s-cpu_env-halted = 1; if (kvm_enabled() !qemu_kvm_irqchip_in_kernel()) - if (s-cpu_env) - kvm_apic_init(s-cpu_env); +kvm_apic_init(s-cpu_env); } /* send a SIPI message to the CPU to start it */ -- 1.6.2.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
Avi Kivity wrote: On 06/15/2009 03:52 PM, Anthony Liguori wrote: Avi Kivity wrote: On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote: We should just tell the user which slots are open. This might be tricky if the config is passed in with the command line flags. qemu -show-available-pci-slots Why does the user care? Let QEMU allocate the PCI slot, then query it to see what slot it assigned and remember that. It's a roundabout way of doing things. Having libvirt do PCI slot allocation scares me. It assumes we can return a whitelist of available slots, and then let libvirt just randomly assign things. There's knowledge though in slot assignment that's board-specific. For instance, depending on how many LNK lines you have, you may want to put things in slots in such a way to optimize interrupt balancing or something like that. Some platforms have quirks about expecting a particular slot to have a particular device. It's still an optimal device but it has to be in that slot. You can't really express that via an available slot list. Non-virtio, as well. But we can't make that the default, so the user will have to specify this anyway. Given that you can't hotunplug individual functions, the user will have to specify exactly how functions are aggregated into devices. My recommendation would be for a GUI to allow the user to select a 'quad port virtio NIC' or 'dual port virtio scsi controller' rather than trying to do anything automatic. Yeah, I haven't thought much about that. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
Avi Kivity wrote: Certainly preferable to -baseline. This is pretty easy to maintain with config files. Let's not tie the two together. I mentioned it because it suggests a good transition. We at least have to think through how things map to the post-config file world regardless of whether that's a few months from now or a decade :-) Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Autotest] [KVM-AUTOTEST PATCH 1/4] Make all programs on kvm test use /usr/bin/python
- Martin Bligh mbl...@google.com wrote: On Wed, Jun 10, 2009 at 4:01 AM, Alexey Eromenkoaerom...@redhat.com wrote: Even better would be to use /usr/bin/python2. That doesn't seem to exist, on Ubuntu at least. Red Hat systems have it. /usr/bin/python2 is a symlink to /usr/bin/python (which is python2 executable) Is there any Ubuntu-compatible way of achieving this? -Alexey -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 04:20 PM, Anthony Liguori wrote: then turns on the power. Command line options are the parts lying around when we start. btw, -drive needs to be separated: -controller type=lsi1234,pci_addr=foobar,name=blah -drive file=foo.img,controller=blah,index=0 -drive file=bar.img,controller=blah,index=1 Drives to not have pci addresses. Drivers don't have indexes and buses but we specify it on the -drive line. Drives do have indexes. On old parallel scsi drives you set the index by clicking a button on the back of the drive to cycle through scsi addresses 0-7. An IDE drive's index is determined by the cable (master/slave). A SATA drive's index is determined by which header on the motherboard the drive connects to. It's not at all that simple. SCSI has a hierarchical address mechanism with 0-7 targets but then potentially multiple LUNs per target. Today, we always emulate a single LUN per target but if we ever wanted to support more than 7 disks on a SCSI controller, we would have to add multiple LUN support too. So the current linear unit= parameter is actually pretty broken for SCSI. Well, another level in the hierarchy, but I don't think it materially changes things. For IDE, it's a combination of bus, slot, and master/slave. For virtio, it's just a PCI address. What we really need is something that is more opaque and controller specific. virtio also has a bus (did you mean the pci bus for IDE?), master/slave is the index. virtio doesn't have index, but IMO that was a mistake and we should have designed it as a disk controller in the first place. For instance, if we were going to do controllers... -controller type=lsi1234,pci_addr=foobar,name=blah -controller-disk controller=blah,target=0,lun=1,name=sda -controller type=ide,pci_addr=barfoo,name=ide -controller-disk controller=ide,slot=secondary,cable=slave,name=hdd -drive file=foo.img,controller-disk=sda -drive file=bar.img,controller-disk=hdd And having -hdd file=foo.img be short-hand for -drive file=%s,controller-disk=%s. Yeah. If by bus you mean the if= parameter, then drives certainly do have buses. Just try connecting the scsi drive from the previous paragraph to a USB port. No, I meant drive file=foo.img,bus=3. If that doesn't seem obvious what it should do to you that's because it isn't at all obvious :-) It ends up skipping a predefined number of locations in the drive table. This is pretty broken fundamentally because it assumes controllers always support a fixed number of devices. Nothing really respects bus_id though so in practice, I assume it's almost universally broken. Isn't the drive table something totally internal? And how does bus= relate to it? Confused. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
Avi Kivity wrote: On 06/15/2009 04:20 PM, Anthony Liguori wrote: It's not at all that simple. SCSI has a hierarchical address mechanism with 0-7 targets but then potentially multiple LUNs per target. Today, we always emulate a single LUN per target but if we ever wanted to support more than 7 disks on a SCSI controller, we would have to add multiple LUN support too. So the current linear unit= parameter is actually pretty broken for SCSI. Well, another level in the hierarchy, but I don't think it materially changes things. Depends on whether you expect to say index=0,lun=3 or index=3. If you mean the later, then it's quite conceivable that each target supports less than the maximum number of LUNs. This makes things pretty confusing to the user because they have to know that in the current implementation, index=0 is valid, index=1 isn't, but index=8 is. No, I meant drive file=foo.img,bus=3. If that doesn't seem obvious what it should do to you that's because it isn't at all obvious :-) It ends up skipping a predefined number of locations in the drive table. This is pretty broken fundamentally because it assumes controllers always support a fixed number of devices. Nothing really respects bus_id though so in practice, I assume it's almost universally broken. Isn't the drive table something totally internal? And how does bus= relate to it? The reality of unit=X,bus=Y,if=Z is that they expand to: drive_table_index=Y*max_devs[Z] + X Whereas max_devs = {ide:4, scsi: 7, *:0} How drive_table_index is interpreted is if specific. For if=scsi, each lsi device gets a base drive table index that starts at bus_index * 7. For virtio, the first empty spot in drive_table results in no more drives being created. It's broken by design. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 04:23 PM, Anthony Liguori wrote: Avi Kivity wrote: On 06/15/2009 03:52 PM, Anthony Liguori wrote: Avi Kivity wrote: On 06/15/2009 03:41 PM, Michael S. Tsirkin wrote: We should just tell the user which slots are open. This might be tricky if the config is passed in with the command line flags. qemu -show-available-pci-slots Why does the user care? Let QEMU allocate the PCI slot, then query it to see what slot it assigned and remember that. It's a roundabout way of doing things. Having libvirt do PCI slot allocation scares me. It assumes we can return a whitelist of available slots, and then let libvirt just randomly assign things. There's knowledge though in slot assignment that's board-specific. For instance, depending on how many LNK lines you have, you may want to put things in slots in such a way to optimize interrupt balancing or something like that. How would qemu know which slots to optimize for? In practice, I don't see that as a real problem. We should (a) add an ioapic and four more pci links (b) recommend that slots be assigned in ascending order, and everything works. I don't see your concern about libvirt allocating slots. If a human can plug a card into a slot, so can libvirt. Doing an interactive back-and-forth (equivalent to plugging a card while blindfolded, then looking to see which slot we hit) is certainly more difficult. Some platforms have quirks about expecting a particular slot to have a particular device. It's still an optimal device but it has to be in that slot. You can't really express that via an available slot list. I'll be surprised if we ever measure different dma speeds on different slots in the qemu virtual pci bus. If we do, we'll find a way to express them: $ qemu -print-pci slot 0:01: available 33MHz slot 0:02: available 33MHz slot 0:03: available 66MHz I feel a little silly typing this. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Configuration vs. compat hints [was Re: [Qemu-devel] [PATCHv3 03/13] qemu: add routines to manage PCI capabilities]
On 06/15/2009 04:24 PM, Anthony Liguori wrote: Avi Kivity wrote: Certainly preferable to -baseline. This is pretty easy to maintain with config files. Let's not tie the two together. I mentioned it because it suggests a good transition. We at least have to think through how things map to the post-config file world regardless of whether that's a few months from now or a decade :-) Sure, it's good both from the transitional point of view and in its own right. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] kvm-s390: streamline memslot handling - rebased v2
From: Christian Ehrhardt ehrha...@linux.vnet.ibm.com As requested this is a rebased patch on top of the already applied v3 of the patch series. *updates to applied version* - remove dependency to KVM_REQ_MMU_RELOAD in generic code - remove explicit barrier after test_and_clear_bit as it is implied - ensure the wait_on_bit waiter is notified - ensure dropping vcpu all requests while freeing a vcpu - kickout only scheduled vcpus (its superfluous and wait might hang forever on not running vcpus) - kvm_arch_set_memory_region waits until the bit is consumed by the vcpu This patch relocates the variables kvm-s390 uses to track guest mem addr/size. As discussed dropping the variables at struct kvm_arch level allows to use the common vcpu-request based mechanism to reload guest memory if e.g. changes via set_memory_region. The kick mechanism introduced in this series is used to ensure running vcpus leave guest state to catch the update. Signed-off-by: Christian Ehrhardt ehrha...@linux.vnet.ibm.com --- [diffstat] arch/s390/kvm/kvm-s390.c | 27 --- arch/s390/kvm/kvm-s390.h |6 ++ virt/kvm/kvm_main.c |6 ++ 3 files changed, 32 insertions(+), 7 deletions(-) Index: kvm/arch/s390/kvm/kvm-s390.c === --- kvm.orig/arch/s390/kvm/kvm-s390.c +++ kvm/arch/s390/kvm/kvm-s390.c @@ -674,6 +674,12 @@ long kvm_arch_vcpu_ioctl(struct file *fi return -EINVAL; } +static int wait_bit_schedule(void *word) +{ + schedule(); + return 0; +} + /* Section: memory related */ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, @@ -681,6 +687,7 @@ int kvm_arch_set_memory_region(struct kv int user_alloc) { int i; + struct kvm_vcpu *vcpu; /* A few sanity checks. We can have exactly one memory slot which has to start at guest virtual zero and which has to be located at a @@ -706,13 +713,19 @@ int kvm_arch_set_memory_region(struct kv /* request update of sie control block for all available vcpus */ for (i = 0; i KVM_MAX_VCPUS; ++i) { - if (kvm-vcpus[i]) { - if (test_and_set_bit(KVM_REQ_MMU_RELOAD, - kvm-vcpus[i]-requests)) - continue; - kvm_s390_inject_sigp_stop(kvm-vcpus[i], - ACTION_VCPUREQUEST_ON_STOP); - } + vcpu = kvm-vcpus[i]; + if (!vcpu) + continue; + + if (!test_and_set_bit(KVM_REQ_MMU_RELOAD, vcpu-requests)) + continue; + + if (vcpu-cpu == -1) + continue; + + kvm_s390_inject_sigp_stop(vcpu, ACTION_VCPUREQUEST_ON_STOP); + wait_on_bit(vcpu-requests, KVM_REQ_MMU_RELOAD, + wait_bit_schedule, TASK_UNINTERRUPTIBLE); } return 0; Index: kvm/arch/s390/kvm/kvm-s390.h === --- kvm.orig/arch/s390/kvm/kvm-s390.h +++ kvm/arch/s390/kvm/kvm-s390.h @@ -92,6 +92,12 @@ static inline unsigned long kvm_s390_han if (!vcpu-requests) return 0; + /* requests that can be handled at all levels */ + if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, vcpu-requests)) { + wake_up_bit(vcpu-requests, KVM_REQ_MMU_RELOAD); + kvm_s390_vcpu_set_mem(vcpu); + } + return vcpu-requests; } Index: kvm/virt/kvm/kvm_main.c === --- kvm.orig/virt/kvm/kvm_main.c +++ kvm/virt/kvm/kvm_main.c @@ -1681,6 +1681,12 @@ static int kvm_vcpu_mmap(struct file *fi static int kvm_vcpu_release(struct inode *inode, struct file *filp) { struct kvm_vcpu *vcpu = filp-private_data; + int i; + + vcpu-requests = 0; + smp_mb(); + for (i = 0; i sizeof(vcpu-requests); i++) + wake_up_bit(vcpu-requests, i); kvm_put_kvm(vcpu-kvm); return 0; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3] kvm-s390: update vcpu-cpu - rebased
From: Christian Ehrhardt ehrha...@linux.vnet.ibm.com As requested this is a rebased patch on top of the already applied v3 of the patch series. kvm on s390 formerly ignored vcpu-cpu. This patch adds set/unset vcpu-cpu in kvm_arch_vcpu_load/put to allow further architecture unification e.g. let generic code not find -1 on currently scheduled vcpus. Signed-off-by: Christian Ehrhardt ehrha...@linux.vnet.ibm.com --- [diffstat] kvm-s390.c |2 ++ 1 file changed, 2 insertions(+) [diff] Index: kvm/arch/s390/kvm/kvm-s390.c === --- kvm.orig/arch/s390/kvm/kvm-s390.c +++ kvm/arch/s390/kvm/kvm-s390.c @@ -244,6 +244,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcp void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + vcpu-cpu = cpu; save_fp_regs(vcpu-arch.host_fpregs); save_access_regs(vcpu-arch.host_acrs); vcpu-arch.guest_fpregs.fpc = FPC_VALID_MASK; @@ -253,6 +254,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + vcpu-cpu = -1; save_fp_regs(vcpu-arch.guest_fpregs); save_access_regs(vcpu-arch.guest_acrs); restore_fp_regs(vcpu-arch.host_fpregs); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html