On 02/21/2017 03:09 AM, Tian, Kevin wrote: >> From: Boris Ostrovsky [mailto:boris.ostrov...@oracle.com] >> Sent: Saturday, February 18, 2017 1:40 AM >> >> vpmu_enabled() (used by hvm/pv_cpuid() to properly report 0xa leaf >> for Intel processors) is based on the value of VPMU_CONTEXT_ALLOCATED >> bit. This is problematic: >> * For HVM guests VPMU context is allocated lazily, during the first >> access to VPMU MSRs. Since the leaf is typically queried before guest >> attempts to read or write the MSRs it is likely that CPUID will report >> no PMU support >> * For PV guests the context is allocated eagerly but only in responce to >> guest's XENPMU_init hypercall. There is a chance that the guest will >> try to read CPUID before making this hypercall. >> >> This patch introduces VPMU_AVAILABLE flag which is set (subject to vpmu_mode >> constraints) during VCPU initialization for both PV and HVM guests. Since >> this flag is expected to be managed together with vpmu_count, get/put_vpmu() >> are added to simplify code. >> >> vpmu_enabled() (renamed to vpmu_available()) can now use this new flag. >> >> (As a side affect this patch also fixes a race in pvpmu_init() where we >> increment vcpu_count in vpmu_initialise() after checking vpmu_mode) >> >> Signed-off-by: Boris Ostrovsky <boris.ostrov...@oracle.com> >> --- >> v2: >> * Rename VPMU_ENABLED to VPMU_AVAILABLE (and vpmu_enabled() to >> vpmu_available() >> * Check VPMU_AVAILABLE flag in put_vpmu() under lock >> >> xen/arch/x86/cpu/vpmu.c | 111 >> ++++++++++++++++++++++++++++++--------------- >> xen/arch/x86/cpuid.c | 8 ++-- >> xen/arch/x86/domain.c | 5 ++ >> xen/arch/x86/hvm/svm/svm.c | 5 -- >> xen/arch/x86/hvm/vmx/vmx.c | 5 -- >> xen/include/asm-x86/vpmu.h | 6 ++- >> 6 files changed, 88 insertions(+), 52 deletions(-) >> >> diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c >> index 35a9403..b30769d 100644 >> --- a/xen/arch/x86/cpu/vpmu.c >> +++ b/xen/arch/x86/cpu/vpmu.c >> @@ -456,32 +456,21 @@ int vpmu_load(struct vcpu *v, bool_t from_guest) >> return 0; >> } >> >> -void vpmu_initialise(struct vcpu *v) >> +static int vpmu_arch_initialise(struct vcpu *v) >> { >> struct vpmu_struct *vpmu = vcpu_vpmu(v); >> uint8_t vendor = current_cpu_data.x86_vendor; >> int ret; >> - bool_t is_priv_vpmu = is_hardware_domain(v->domain); >> >> BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ); >> BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ); >> BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ); >> BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ); >> >> - ASSERT(!vpmu->flags && !vpmu->context); >> + ASSERT(!(vpmu->flags & ~VPMU_AVAILABLE) && !vpmu->context); >> >> - if ( !is_priv_vpmu ) >> - { >> - /* >> - * Count active VPMUs so that we won't try to change vpmu_mode while >> - * they are in use. >> - * vpmu_mode can be safely updated while dom0's VPMUs are active and >> - * so we don't need to include it in the count. >> - */ >> - spin_lock(&vpmu_lock); >> - vpmu_count++; >> - spin_unlock(&vpmu_lock); >> - } >> + if ( !vpmu_available(v) ) >> + return 0; >> >> switch ( vendor ) >> { >> @@ -501,7 +490,7 @@ void vpmu_initialise(struct vcpu *v) >> opt_vpmu_enabled = 0; >> vpmu_mode = XENPMU_MODE_OFF; >> } >> - return; /* Don't bother restoring vpmu_count, VPMU is off forever */ >> + return -EINVAL; >> } >> >> vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED; >> @@ -509,15 +498,63 @@ void vpmu_initialise(struct vcpu *v) >> if ( ret ) >> printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v); >> >> - /* Intel needs to initialize VPMU ops even if VPMU is not in use */ >> - if ( !is_priv_vpmu && >> - (ret || (vpmu_mode == XENPMU_MODE_OFF) || >> - (vpmu_mode == XENPMU_MODE_ALL)) ) >> + return ret; >> +} >> + >> +static void get_vpmu(struct vcpu *v) >> +{ >> + spin_lock(&vpmu_lock); >> + >> + /* >> + * Count active VPMUs so that we won't try to change vpmu_mode while >> + * they are in use. >> + * vpmu_mode can be safely updated while dom0's VPMUs are active and >> + * so we don't need to include it in the count. >> + */ >> + if ( !is_hardware_domain(v->domain) && >> + (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) ) >> + { >> + vpmu_count++; >> + vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE); >> + } >> + else if ( is_hardware_domain(v->domain) && >> + (vpmu_mode != XENPMU_MODE_OFF) ) >> + vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE); >> + >> + spin_unlock(&vpmu_lock); >> +} >> + >> +static void put_vpmu(struct vcpu *v) >> +{ >> + spin_lock(&vpmu_lock); >> + >> + if ( !vpmu_is_set(vcpu_vpmu(v), VPMU_AVAILABLE) ) >> + goto out; > just use !vpmu_available(v)
Yes. > >> + >> + if ( !is_hardware_domain(v->domain) && >> + (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) ) >> { >> - spin_lock(&vpmu_lock); >> vpmu_count--; >> - spin_unlock(&vpmu_lock); >> + vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE); >> } >> + else if ( is_hardware_domain(v->domain) && >> + (vpmu_mode != XENPMU_MODE_OFF) ) >> + vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE); >> + >> + out: >> + spin_unlock(&vpmu_lock); >> +} >> + >> +void vpmu_initialise(struct vcpu *v) >> +{ >> + get_vpmu(v); >> + >> + /* >> + * Guests without LAPIC (i.e. PV) call vpmu_arch_initialise() >> + * from pvpmu_init(). >> + */ > implication is that PV VPMU is not counted then? No. get_vpmu() is what does the counting now. Since we do vcpu_initialise() -> vpmu_initialise() for all type of VCPUs both PV and HVM VPMUs are counted here. But we defer arch-specific intialization (which doesn't do the counting) for PV until the hypercall. > I think it's > not aligned with original policy, where only privileged VPMU > i.e. Dom0 is not counted. > >> + if ( has_vlapic(v->domain) && vpmu_arch_initialise(v) ) >> + put_vpmu(v); >> } >> >> static void vpmu_clear_last(void *arg) >> @@ -526,7 +563,7 @@ static void vpmu_clear_last(void *arg) >> this_cpu(last_vcpu) = NULL; >> } >> >> -void vpmu_destroy(struct vcpu *v) >> +static void vpmu_arch_destroy(struct vcpu *v) >> { >> struct vpmu_struct *vpmu = vcpu_vpmu(v); >> >> @@ -551,11 +588,13 @@ void vpmu_destroy(struct vcpu *v) >> vpmu_save_force, v, 1); >> vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); >> } >> +} >> >> - spin_lock(&vpmu_lock); >> - if ( !is_hardware_domain(v->domain) ) >> - vpmu_count--; >> - spin_unlock(&vpmu_lock); >> +void vpmu_destroy(struct vcpu *v) >> +{ >> + vpmu_arch_destroy(v); >> + >> + put_vpmu(v); >> } >> >> static int pvpmu_init(struct domain *d, xen_pmu_params_t *params) >> @@ -565,13 +604,15 @@ static int pvpmu_init(struct domain *d, >> xen_pmu_params_t >> *params) >> struct page_info *page; >> uint64_t gfn = params->val; >> >> - if ( (vpmu_mode == XENPMU_MODE_OFF) || >> - ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) ) >> - return -EINVAL; >> - >> if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) ) >> return -EINVAL; >> >> + v = d->vcpu[params->vcpu]; >> + vpmu = vcpu_vpmu(v); >> + >> + if ( !vpmu_available(v) ) >> + return -ENOENT; >> + > I didn't see where get_vpmu is invoked for PV. Then why would > above condition ever set here? get_vpmu is called from vpmu_initialise(). -boris > >> page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC); >> if ( !page ) >> return -EINVAL; >> @@ -582,9 +623,6 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t >> *params) >> return -EINVAL; >> } >> >> - v = d->vcpu[params->vcpu]; >> - vpmu = vcpu_vpmu(v); >> - >> spin_lock(&vpmu->vpmu_lock); >> >> if ( v->arch.vpmu.xenpmu_data ) >> @@ -602,7 +640,8 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t >> *params) >> return -ENOMEM; >> } >> >> - vpmu_initialise(v); >> + if ( vpmu_arch_initialise(v) ) >> + put_vpmu(v); > Since no get_vpmu why should we put_vpmu here? > > I feel a bit lost about your handling of PV case here... > > Thanks > Kevin _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org https://lists.xen.org/xen-devel