Allow a guest to register a second location for the VCPU time info

structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW).
This is intended to allow the guest kernel to map this information
into a usermode accessible page, so that usermode can efficiently
calculate system time from the TSC without having to make a syscall.

Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com>

Index: vsyscall/arch/x86/include/asm/kvm_para.h
===================================================================
--- vsyscall.orig/arch/x86/include/asm/kvm_para.h
+++ vsyscall/arch/x86/include/asm/kvm_para.h
@@ -23,6 +23,7 @@
 #define KVM_FEATURE_ASYNC_PF           4
 #define KVM_FEATURE_STEAL_TIME         5
 #define KVM_FEATURE_PV_EOI             6
+#define KVM_FEATURE_USERSPACE_CLOCKSOURCE 7
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
@@ -39,6 +40,7 @@
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 #define MSR_KVM_STEAL_TIME  0x4b564d03
 #define MSR_KVM_PV_EOI_EN      0x4b564d04
+#define MSR_KVM_USERSPACE_TIME      0x4b564d05
 
 struct kvm_steal_time {
        __u64 steal;
Index: vsyscall/Documentation/virtual/kvm/msr.txt
===================================================================
--- vsyscall.orig/Documentation/virtual/kvm/msr.txt
+++ vsyscall/Documentation/virtual/kvm/msr.txt
@@ -125,6 +125,22 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01
        Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
        leaf prior to usage.
 
+MSR_KVM_USERSPACE_TIME:  0x4b564d05
+
+Allow a guest to register a second location for the VCPU time info
+structure for each vcpu (as described by MSR_KVM_SYSTEM_TIME_NEW).
+This is intended to allow the guest kernel to map this information
+into a usermode accessible page, so that usermode can efficiently
+calculate system time from the TSC without having to make a syscall.
+
+Relationship with master copy (MSR_KVM_SYSTEM_TIME_NEW):
+
+- This MSR must be enabled only when the master is enabled.
+- Disabling updates to the master automatically disables
+updates for this copy.
+
+Availability of this MSR must be checked via bit 7 in 0x4000001 cpuid
+leaf prior to usage.
 
 MSR_KVM_WALL_CLOCK:  0x11
 
Index: vsyscall/arch/x86/include/asm/kvm_host.h
===================================================================
--- vsyscall.orig/arch/x86/include/asm/kvm_host.h
+++ vsyscall/arch/x86/include/asm/kvm_host.h
@@ -415,10 +415,13 @@ struct kvm_vcpu_arch {
        int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 
        gpa_t time;
+       gpa_t uspace_time;
        struct pvclock_vcpu_time_info hv_clock;
        unsigned int hw_tsc_khz;
        unsigned int time_offset;
+       unsigned int uspace_time_offset;
        struct page *time_page;
+       struct page *uspace_time_page;
        /* set guest stopped flag in pvclock flags field */
        bool pvclock_set_guest_stopped_request;
 
Index: vsyscall/arch/x86/kvm/x86.c
===================================================================
--- vsyscall.orig/arch/x86/kvm/x86.c
+++ vsyscall/arch/x86/kvm/x86.c
@@ -809,13 +809,13 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN    10
+#define KVM_SAVE_MSRS_BEGIN    11
 static u32 msrs_to_save[] = {
        MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
        MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
        HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
        HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
-       MSR_KVM_PV_EOI_EN,
+       MSR_KVM_PV_EOI_EN, MSR_KVM_USERSPACE_TIME,
        MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
        MSR_STAR,
 #ifdef CONFIG_X86_64
@@ -1135,16 +1135,43 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu
 
 EXPORT_SYMBOL_GPL(kvm_write_tsc);
 
+static void kvm_write_pvtime(struct kvm_vcpu *v, struct page *page,
+                            unsigned int offset_in_page, gpa_t gpa)
+{
+       struct kvm_vcpu_arch *vcpu = &v->arch;
+       void *shared_kaddr;
+       struct pvclock_vcpu_time_info *guest_hv_clock;
+       u8 pvclock_flags;
+
+       shared_kaddr = kmap_atomic(page);
+
+       guest_hv_clock = shared_kaddr + offset_in_page;
+
+       /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+       pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+
+       if (vcpu->pvclock_set_guest_stopped_request) {
+               pvclock_flags |= PVCLOCK_GUEST_STOPPED;
+               vcpu->pvclock_set_guest_stopped_request = false;
+       }
+
+       vcpu->hv_clock.flags = pvclock_flags;
+
+       memcpy(shared_kaddr + offset_in_page, &vcpu->hv_clock,
+              sizeof(vcpu->hv_clock));
+
+       kunmap_atomic(shared_kaddr);
+
+       mark_page_dirty(v->kvm, gpa >> PAGE_SHIFT);
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
        unsigned long flags;
        struct kvm_vcpu_arch *vcpu = &v->arch;
-       void *shared_kaddr;
        unsigned long this_tsc_khz;
        s64 kernel_ns, max_kernel_ns;
        u64 tsc_timestamp;
-       struct pvclock_vcpu_time_info *guest_hv_clock;
-       u8 pvclock_flags;
 
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
@@ -1235,26 +1262,11 @@ static int kvm_guest_time_update(struct 
         */
        vcpu->hv_clock.version += 2;
 
-       shared_kaddr = kmap_atomic(vcpu->time_page);
-
-       guest_hv_clock = shared_kaddr + vcpu->time_offset;
-
-       /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
-       pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+       kvm_write_pvtime(v, vcpu->time_page, vcpu->time_offset, vcpu->time);
+       if (vcpu->uspace_time_page)
+               kvm_write_pvtime(v, vcpu->uspace_time_page,
+                                vcpu->uspace_time_offset, vcpu->uspace_time);
 
-       if (vcpu->pvclock_set_guest_stopped_request) {
-               pvclock_flags |= PVCLOCK_GUEST_STOPPED;
-               vcpu->pvclock_set_guest_stopped_request = false;
-       }
-
-       vcpu->hv_clock.flags = pvclock_flags;
-
-       memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-              sizeof(vcpu->hv_clock));
-
-       kunmap_atomic(shared_kaddr);
-
-       mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
        return 0;
 }
 
@@ -1549,6 +1561,15 @@ static void kvmclock_reset(struct kvm_vc
        }
 }
 
+static void kvmclock_uspace_reset(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.uspace_time = 0;
+       if (vcpu->arch.uspace_time_page) {
+               kvm_release_page_dirty(vcpu->arch.uspace_time_page);
+               vcpu->arch.uspace_time_page = NULL;
+       }
+}
+
 static void accumulate_steal_time(struct kvm_vcpu *vcpu)
 {
        u64 delta;
@@ -1639,6 +1660,31 @@ int kvm_set_msr_common(struct kvm_vcpu *
                vcpu->kvm->arch.wall_clock = data;
                kvm_write_wall_clock(vcpu->kvm, data);
                break;
+       case MSR_KVM_USERSPACE_TIME: {
+               kvmclock_uspace_reset(vcpu);
+
+               if (!vcpu->arch.time_page && (data & 1))
+                       return 1;
+
+               vcpu->arch.uspace_time = data;
+               kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+
+               /* we verify if the enable bit is set... */
+               if (!(data & 1))
+                       break;
+
+               /* ...but clean it before doing the actual write */
+               vcpu->arch.uspace_time_offset = data & ~(PAGE_MASK | 1);
+
+               vcpu->arch.uspace_time_page = gfn_to_page(vcpu->kvm,
+                                                         data >> PAGE_SHIFT);
+
+               if (is_error_page(vcpu->arch.uspace_time_page)) {
+                       kvm_release_page_clean(vcpu->arch.uspace_time_page);
+                       vcpu->arch.uspace_time_page = NULL;
+               }
+               break;
+       }
        case MSR_KVM_SYSTEM_TIME_NEW:
        case MSR_KVM_SYSTEM_TIME: {
                kvmclock_reset(vcpu);
@@ -1647,8 +1693,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
                kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
                /* we verify if the enable bit is set... */
-               if (!(data & 1))
+               if (!(data & 1)) {
+                       kvmclock_uspace_reset(vcpu);
                        break;
+               }
 
                /* ...but clean it before doing the actual write */
                vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
@@ -1656,8 +1704,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
                vcpu->arch.time_page =
                                gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
 
-               if (is_error_page(vcpu->arch.time_page))
+               if (is_error_page(vcpu->arch.time_page)) {
                        vcpu->arch.time_page = NULL;
+                       kvmclock_uspace_reset(vcpu);
+               }
 
                break;
        }
@@ -2010,6 +2060,9 @@ int kvm_get_msr_common(struct kvm_vcpu *
        case MSR_KVM_SYSTEM_TIME_NEW:
                data = vcpu->arch.time;
                break;
+       case MSR_KVM_USERSPACE_TIME:
+               data = vcpu->arch.uspace_time;
+               break;
        case MSR_KVM_ASYNC_PF_EN:
                data = vcpu->arch.apf.msr_val;
                break;
@@ -2195,6 +2248,7 @@ int kvm_dev_ioctl_check_extension(long e
        case KVM_CAP_KVMCLOCK_CTRL:
        case KVM_CAP_READONLY_MEM:
        case KVM_CAP_IRQFD_RESAMPLE:
+       case KVM_CAP_USERSPACE_CLOCKSOURCE:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
@@ -6017,6 +6071,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+       kvmclock_uspace_reset(vcpu);
        kvmclock_reset(vcpu);
 
        free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
Index: vsyscall/arch/x86/kvm/cpuid.c
===================================================================
--- vsyscall.orig/arch/x86/kvm/cpuid.c
+++ vsyscall/arch/x86/kvm/cpuid.c
@@ -411,7 +411,9 @@ static int do_cpuid_ent(struct kvm_cpuid
                             (1 << KVM_FEATURE_CLOCKSOURCE2) |
                             (1 << KVM_FEATURE_ASYNC_PF) |
                             (1 << KVM_FEATURE_PV_EOI) |
-                            (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
+                            (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
+                            (1 << KVM_FEATURE_USERSPACE_CLOCKSOURCE);
+
 
                if (sched_info_on())
                        entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
Index: vsyscall/include/uapi/linux/kvm.h
===================================================================
--- vsyscall.orig/include/uapi/linux/kvm.h
+++ vsyscall/include/uapi/linux/kvm.h
@@ -626,6 +626,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_READONLY_MEM 81
 #endif
 #define KVM_CAP_IRQFD_RESAMPLE 82
+#define KVM_CAP_USERSPACE_CLOCKSOURCE 83
 
 #ifdef KVM_CAP_IRQ_ROUTING
 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to