On Sat, Apr 04, 2015 at 04:14:41AM +0200, Dario Faggioli wrote: > This allows for a new item to be passed as part of the psr= > boot option: "percpu_cmt". If that is specified, Xen tries, > at boot time, to associate an RMID to each core. > > XXX This all looks rather straightforward, if it weren't > for the fact that it is, apparently, more common than > I though to run out of RMID. For example, on a dev box > we have in Cambridge, there are 144 pCPUs and only 71 > RMIDs. > > In this preliminary version, nothing particularly smart > happens if we run out of RMIDs, we just fail attaching > the remaining cores and that's it. In future, I'd > probably like to: > + check whether the operation have any chance to > succeed up front (by comparing number of pCPUs with > available RMIDs) > + on unexpected failure, rollback everything... it > seems to make more sense to me than just leaving > the system half configured for per-cpu CMT > > Thoughts? > > XXX Another idea I just have is to allow the user to > somehow specify a different 'granularity'. Something > like allowing 'percpu_cmt'|'percore_cmt'|'persocket_cmt' > with the following meaning: > + 'percpu_cmt': as in this patch > + 'percore_cmt': same RMID to hthreads of the same core > + 'persocket_cmt': same RMID to all cores of the same > socket. > > 'percore_cmt' would only allow gathering info on a > per-core basis... still better than nothing if we > do not have enough RMIDs for each pCPUs.
Could we allocate nr_online_cpus() / nr_pmids() and have some CPUs share the same PMIDs? > > 'persocket_cmt' would basically only allow to track the > amount of free L3 on each socket (by subtracting the > monitored value from the total). Again, still better > than nothing, would use very few RMIDs, and I could > think of ways of using this information in a few > places in the scheduler... > > Again, thought? > > XXX Finally, when a domain with its own RMID executes on > a core that also has its own RMID, domain monitoring > just overrides per-CPU monitoring. That means the > cache occupancy reported fo that pCPU is not accurate. > > For reasons why this situation is difficult to deal > with properly, see the document in the cover letter. > > Ideas on how to deal with this, either about how to > make it work or how to handle the thing from a > 'policying' perspective (i.e., which one mechanism > should be disabled or penalized?), are very welcome > > Signed-off-by: Dario Faggioli <dario.faggi...@citrix.com> > --- > xen/arch/x86/psr.c | 72 > ++++++++++++++++++++++++++++++++++++--------- > xen/include/asm-x86/psr.h | 11 ++++++- > 2 files changed, 67 insertions(+), 16 deletions(-) > > diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c > index 0f2a6ce..a71391c 100644 > --- a/xen/arch/x86/psr.c > +++ b/xen/arch/x86/psr.c > @@ -26,10 +26,13 @@ struct psr_assoc { > > struct psr_cmt *__read_mostly psr_cmt; > static bool_t __initdata opt_psr; > +static bool_t __initdata opt_cpu_cmt; > static unsigned int __initdata opt_rmid_max = 255; > static uint64_t rmid_mask; > static DEFINE_PER_CPU(struct psr_assoc, psr_assoc); > > +DEFINE_PER_CPU(unsigned int, pcpu_rmid); > + > static void __init parse_psr_param(char *s) > { > char *ss, *val_str; > @@ -57,6 +60,8 @@ static void __init parse_psr_param(char *s) > val_str); > } > } > + else if ( !strcmp(s, "percpu_cmt") ) > + opt_cpu_cmt = 1; > else if ( val_str && !strcmp(s, "rmid_max") ) > opt_rmid_max = simple_strtoul(val_str, NULL, 0); > > @@ -94,8 +99,8 @@ static void __init init_psr_cmt(unsigned int rmid_max) > } > > psr_cmt->rmid_max = min(psr_cmt->rmid_max, psr_cmt->l3.rmid_max); > - psr_cmt->rmid_to_dom = xmalloc_array(domid_t, psr_cmt->rmid_max + 1UL); > - if ( !psr_cmt->rmid_to_dom ) > + psr_cmt->rmids = xmalloc_array(domid_t, psr_cmt->rmid_max + 1UL); > + if ( !psr_cmt->rmids ) > { > xfree(psr_cmt); > psr_cmt = NULL; > @@ -107,56 +112,86 @@ static void __init init_psr_cmt(unsigned int rmid_max) > * with it. To reduce the waste of RMID, reserve RMID 0 for all CPUs that > * have no domain being monitored. > */ > - psr_cmt->rmid_to_dom[0] = DOMID_XEN; > + psr_cmt->rmids[0] = DOMID_XEN; > for ( rmid = 1; rmid <= psr_cmt->rmid_max; rmid++ ) > - psr_cmt->rmid_to_dom[rmid] = DOMID_INVALID; > + psr_cmt->rmids[rmid] = DOMID_INVALID; > > printk(XENLOG_INFO "Cache Monitoring Technology enabled, RMIDs: %u\n", > psr_cmt->rmid_max); > } > > -/* Called with domain lock held, no psr specific lock needed */ > -int psr_alloc_rmid(struct domain *d) > +static int _psr_alloc_rmid(unsigned int *trmid, unsigned int id) > { > unsigned int rmid; > > ASSERT(psr_cmt_enabled()); > > - if ( d->arch.psr_rmid > 0 ) > + if ( *trmid > 0 ) > return -EEXIST; > > for ( rmid = 1; rmid <= psr_cmt->rmid_max; rmid++ ) > { > - if ( psr_cmt->rmid_to_dom[rmid] != DOMID_INVALID ) > + if ( psr_cmt->rmids[rmid] != DOMID_INVALID ) > continue; > > - psr_cmt->rmid_to_dom[rmid] = d->domain_id; > + psr_cmt->rmids[rmid] = id; > break; > } > > /* No RMID available, assign RMID=0 by default. */ > if ( rmid > psr_cmt->rmid_max ) > { > - d->arch.psr_rmid = 0; > + *trmid = 0; > return -EUSERS; > } > > - d->arch.psr_rmid = rmid; > + *trmid = rmid; > > return 0; > } > > +int psr_alloc_pcpu_rmid(unsigned int cpu) > +{ > + int ret; > + > + /* XXX Any locking required? */ It shouldn't be needed on the per-cpu resources in the hotplug CPU routines. > + ret = _psr_alloc_rmid(&per_cpu(pcpu_rmid, cpu), DOMID_XEN); > + if ( !ret ) > + printk(XENLOG_DEBUG "using RMID %u for CPU %u\n", > + per_cpu(pcpu_rmid, cpu), cpu); > + > + return ret; > +} > + > /* Called with domain lock held, no psr specific lock needed */ > -void psr_free_rmid(struct domain *d) > +int psr_alloc_rmid(struct domain *d) > { > - unsigned int rmid; > + return _psr_alloc_rmid(&d->arch.psr_rmid, d->domain_id); > +} > > - rmid = d->arch.psr_rmid; > +static void _psr_free_rmid(unsigned int rmid) > +{ > /* We do not free system reserved "RMID=0". */ > if ( rmid == 0 ) > return; > > - psr_cmt->rmid_to_dom[rmid] = DOMID_INVALID; > + psr_cmt->rmids[rmid] = DOMID_INVALID; > +} > + > +void psr_free_pcpu_rmid(unsigned int cpu) > +{ > + printk(XENLOG_DEBUG "Freeing RMID %u. CPU %u no longer monitored\n", > + per_cpu(pcpu_rmid, cpu), cpu); > + > + /* XXX Any locking required? */ No idea. Not clear who calls this. > + _psr_free_rmid(per_cpu(pcpu_rmid, cpu)); > + per_cpu(pcpu_rmid, cpu) = 0; > +} > + > +/* Called with domain lock held, no psr specific lock needed */ > +void psr_free_rmid(struct domain *d) > +{ > + _psr_free_rmid(d->arch.psr_rmid); > d->arch.psr_rmid = 0; > } > > @@ -184,6 +219,10 @@ static inline void psr_assoc_reg_write(struct psr_assoc > *psra, uint64_t reg) > > static inline void psr_assoc_rmid(uint64_t *reg, unsigned int rmid) > { > + /* Domain not monitored: switch to the RMID of the pcpu (if any) */ > + if ( rmid == 0 ) > + rmid = this_cpu(pcpu_rmid); > + > *reg = (*reg & ~rmid_mask) | (rmid & rmid_mask); > } > > @@ -202,6 +241,9 @@ void psr_ctxt_switch_to(struct domain *d) > > static void psr_cpu_init(unsigned int cpu) > { > + if ( opt_cpu_cmt && !psr_alloc_pcpu_rmid(cpu) ) > + printk(XENLOG_INFO "pcpu %u: using RMID %u\n", > + cpu, per_cpu(pcpu_rmid, cpu)); > psr_assoc_init(); > } > > diff --git a/xen/include/asm-x86/psr.h b/xen/include/asm-x86/psr.h > index 585350c..b70f605 100644 > --- a/xen/include/asm-x86/psr.h > +++ b/xen/include/asm-x86/psr.h > @@ -33,17 +33,26 @@ struct psr_cmt_l3 { > struct psr_cmt { > unsigned int rmid_max; > unsigned int features; > - domid_t *rmid_to_dom; > + domid_t *rmids; > struct psr_cmt_l3 l3; > }; > > extern struct psr_cmt *psr_cmt; > > +/* > + * RMID associated to each core, to track the cache > + * occupancy contribution of the core itself. > + */ > +DECLARE_PER_CPU(unsigned int, pcpu_rmid); > + > static inline bool_t psr_cmt_enabled(void) > { > return !!psr_cmt; > } > > +int psr_alloc_pcpu_rmid(unsigned int cpu); > +void psr_free_pcpu_rmid(unsigned int cpu); > + > int psr_alloc_rmid(struct domain *d); > void psr_free_rmid(struct domain *d); > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel