From: David Woodhouse <[email protected]>
Initialize the TSC generation and master clock state at VM creation
time, rather than relying on kvm_update_masterclock() from
kvm_arch_vcpu_postcreate().
Set cur_tsc_generation=1, cur_tsc_khz, cur_tsc_scaling_ratio,
cur_tsc_offset, cur_tsc_nsec and kvmclock_offset from an atomic
ktime/TSC snapshot. This establishes a coherent epoch so that:
- The first vCPU synchronizes to the existing generation (getting
the pre-computed TSC offset) rather than computing its own.
- pvclock_update_vm_gtod_copy() succeeds immediately (no zero
tsc_hz from uninitialized cur_tsc_scaling_ratio).
- KVM_GET_CLOCK returns valid {host_tsc, realtime} fields as soon
as any vCPU exists.
- KVM_[GS]ET_CLOCK_GUEST works without a prior KVM_RUN.
- No O(N) iteration of all vCPUs on each vCPU creation.
This replaces the kvm_update_masterclock() call that was added to
kvm_arch_vcpu_postcreate() to work around the missing initialization.
Signed-off-by: David Woodhouse <[email protected]>
Assisted-by: Kiro (claude-opus-4.6-1m)
---
arch/x86/kvm/x86.c | 45 +++++++++++++++++++++++++++++++++++++--------
1 file changed, 37 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 58016af64932..d35f2ac3ef49 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13161,8 +13161,6 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
return;
vcpu_load(vcpu);
kvm_synchronize_tsc(vcpu, NULL);
- if (!vcpu->kvm->arch.use_master_clock)
- kvm_update_masterclock(vcpu->kvm, NULL);
vcpu_put(vcpu);
/* poll control enabled by default */
@@ -13572,6 +13570,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long
type)
{
int ret;
unsigned long flags;
+ u64 kvmclock_host_tsc;
+ s64 kvmclock_ns;
if (!kvm_is_vm_type_supported(type))
return -EINVAL;
@@ -13603,18 +13603,47 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long
type)
seqcount_raw_spinlock_init(&kvm->arch.pvclock_sc,
&kvm->arch.tsc_write_lock);
ratelimit_state_init(&kvm->arch.kvmclock_update_rs, HZ, 10);
ratelimit_set_flags(&kvm->arch.kvmclock_update_rs,
RATELIMIT_MSG_ON_RELEASE);
- kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
- kvm->arch.all_vcpus_matched_freq = true;
-
- raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
- pvclock_update_vm_gtod_copy(kvm);
- raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
kvm->arch.apic_bus_cycle_ns = APIC_BUS_CYCLE_NS_DEFAULT;
kvm->arch.guest_can_read_msr_platform_info = true;
kvm->arch.enable_pmu = enable_pmu;
+ kvm->arch.all_vcpus_matched_freq = true;
+ kvm->arch.all_vcpus_matched_tsc = true;
+ kvm->arch.cur_tsc_scaling_ratio = kvm_caps.default_tsc_scaling_ratio;
+ kvm->arch.cur_tsc_khz = kvm->arch.default_tsc_khz;
+
+ raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
+
+ /*
+ * Establish the initial TSC generation for synchronization logic.
+ * The first vCPU will sync to this, starting its guest TSC at the
+ * number of cycles elapsed since VM creation rather than at zero.
+ * This ensures coherence with the kvmclock epoch (also set here)
+ * and means the master clock snapshot taken below is immediately
+ * valid — no need to redo it at first vCPU creation.
+ */
+#ifdef CONFIG_X86_64
+ if (!kvm_get_time_and_clockread(&kvmclock_ns, &kvmclock_host_tsc))
+#endif
+ {
+ /*
+ * If not simultaneous, we want the TSC to be zero slightly
+ * *after* the kvmclock is zero.
+ */
+ kvmclock_ns = get_kvmclock_base_ns();
+ kvmclock_host_tsc = rdtsc();
+ }
+ kvm->arch.cur_tsc_write = 0;
+ kvm->arch.cur_tsc_offset = -(s64)kvmclock_host_tsc;
+ kvm->arch.cur_tsc_nsec = kvmclock_ns;
+ kvm->arch.cur_tsc_generation = 1;
+ kvm->arch.kvmclock_offset = -kvmclock_ns;
+
+ pvclock_update_vm_gtod_copy(kvm);
+ raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
+
#if IS_ENABLED(CONFIG_HYPERV)
spin_lock_init(&kvm->arch.hv_root_tdp_lock);
kvm->arch.hv_root_tdp = INVALID_PAGE;
--
2.54.0