This is the host part of kvm clocksource implementation. As it does not include clockevents, it is a fairly simple implementation. We only have to register a per-vcpu area, and start writting to it periodically.
The area is binary compatible with xen, as we use the same shadow_info structure. Signed-off-by: Glauber de Oliveira Costa <[EMAIL PROTECTED]> --- arch/x86/kvm/x86.c | 79 +++++++++++++++++++++++++++++++++++++++++++- include/asm-x86/kvm_host.h | 4 ++ include/asm-x86/kvm_para.h | 37 ++++++++++++++++++++ include/linux/kvm.h | 1 + 4 files changed, 120 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8a90403..53b5692 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -19,6 +19,7 @@ #include "irq.h" #include "mmu.h" +#include <linux/clocksource.h> #include <linux/kvm.h> #include <linux/fs.h> #include <linux/vmalloc.h> @@ -412,7 +413,7 @@ static u32 msrs_to_save[] = { #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TIME_STAMP_COUNTER, + MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_PARAVIRT_CLOCK, }; static unsigned num_msrs_to_save; @@ -467,6 +468,60 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) return kvm_set_msr(vcpu, index, *data); } +#define WC_OFFSET offsetof(struct xen_shared_info, wc_version) + +static void kvm_write_guest_time(struct kvm_vcpu *v) +{ + struct timespec ts, wc_ts; + int wc_args[3]; /* version, wc_sec, wc_nsec */ + unsigned long flags; + struct kvm_vcpu_arch *vcpu = &v->arch; + struct xen_shared_info *shared_kaddr; + + if ((!vcpu->shared_page)) + return; + + /* Keep irq disabled to prevent changes to the clock */ + local_irq_save(flags); + kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, + &vcpu->hv_clock.tsc_timestamp); + wc_ts = current_kernel_time(); + ktime_get_ts(&ts); + local_irq_restore(flags); + + /* With all the info we got, fill in the values */ + wc_args[1] = wc_ts.tv_sec; + wc_args[2] = wc_ts.tv_nsec; + + vcpu->hv_clock.system_time = ts.tv_nsec + + (NSEC_PER_SEC * (u64)ts.tv_sec); + /* + * The interface expects us to write an even number signaling that the + * update is finished. Since the guest won't see the intermediate states, + * we just write "2" at the end + */ + wc_args[0] = 2; + vcpu->hv_clock.version = 2; + + preempt_disable(); + + shared_kaddr = kmap_atomic(vcpu->shared_page, KM_USER0); + + /* + * We could write everything at once, but it can break future + * implementations. We're just a tiny and lonely clock, so let's + * write only what matters here + */ + memcpy(&shared_kaddr->wc_version, wc_args, sizeof(wc_args)); + memcpy(&shared_kaddr->vcpu_info[v->vcpu_id].time, &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); + + kunmap_atomic(shared_kaddr, KM_USER0); + preempt_enable(); + + mark_page_dirty(v->kvm, vcpu->shared_info >> PAGE_SHIFT); +} + int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) { @@ -494,6 +549,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) case MSR_IA32_MISC_ENABLE: vcpu->arch.ia32_misc_enable_msr = data; break; + case MSR_KVM_PARAVIRT_CLOCK: { + vcpu->arch.shared_info = data; + + vcpu->arch.hv_clock.tsc_to_system_mul = + clocksource_khz2mult(tsc_khz, 22); + vcpu->arch.hv_clock.tsc_shift = 22; + + down_write(¤t->mm->mmap_sem); + vcpu->arch.shared_page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); + up_write(¤t->mm->mmap_sem); + if (is_error_page(vcpu->arch.shared_page)) + vcpu->arch.shared_page = NULL; + break; + } default: pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); return 1; @@ -553,6 +622,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) data = vcpu->arch.shadow_efer; break; #endif + case MSR_KVM_PARAVIRT_CLOCK: + data = vcpu->arch.shared_info; + break; + default: pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); return 1; @@ -680,6 +753,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_USER_MEMORY: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_CLOCKSOURCE: r = 1; break; case KVM_CAP_VAPIC: @@ -737,6 +811,7 @@ out: void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); + kvm_write_guest_time(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -3190,6 +3265,8 @@ static void kvm_free_vcpus(struct kvm *kvm) kvm_unload_vcpu_mmu(kvm->vcpus[i]); for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { + if (kvm->vcpus[i]->arch.shared_page) + kvm_release_page_dirty(kvm->vcpus[i]->arch.shared_page); kvm_arch_vcpu_free(kvm->vcpus[i]); kvm->vcpus[i] = NULL; } diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index d6db0de..9a66b90 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -261,6 +261,10 @@ struct kvm_vcpu_arch { /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; + + struct xen_vcpu_time_info hv_clock; + gpa_t shared_info; + struct page *shared_page; }; struct kvm_mem_alias { diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h index c6f3fd8..145107d 100644 --- a/include/asm-x86/kvm_para.h +++ b/include/asm-x86/kvm_para.h @@ -10,10 +10,47 @@ * paravirtualization, the appropriate feature bit should be checked. */ #define KVM_CPUID_FEATURES 0x40000001 +#define KVM_FEATURE_CLOCKSOURCE 0 + +#define MSR_KVM_PARAVIRT_CLOCK 0x11 #ifdef __KERNEL__ #include <asm/processor.h> +/* xen binary-compatible interfaces. See xen headers for details */ +struct xen_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; +}; + +struct xen_vcpu_info { + uint8_t pad[32]; + struct xen_vcpu_time_info time; +}; + +#define XEN_MAX_VIRT_CPUS 32 + +struct xen_shared_info { + struct xen_vcpu_info vcpu_info[XEN_MAX_VIRT_CPUS]; + + unsigned long evt[2]; + + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ + + unsigned long pad[12]; +}; + + +extern void kvmclock_init(void); + + /* This instruction is vmcall. On non-VT architectures, it will generate a * trap that we will then rewrite to the appropriate instruction. */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4de4fd2..78ce53f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -232,6 +232,7 @@ struct kvm_vapic_addr { #define KVM_CAP_SET_TSS_ADDR 4 #define KVM_CAP_EXT_CPUID 5 #define KVM_CAP_VAPIC 6 +#define KVM_CAP_CLOCKSOURCE 7 /* * ioctls for VM fds -- 1.5.0.6 ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel