Right now, vCPU migration delay is controlled by the vcpu_migration_delay boot parameter. This means the same value will always be used for every instance of Credit1, in any cpupool that will be created.
Also, in order to get and set such value, a special purpose libxc interface is defined, and used by the xenpm tool. And this is problematic if Xen is built without Credit1 support. This commit adds a vcpu_migr_delay field inside struct csched_private, so that we can get/set the migration delay indepently for each Credit1 instance, in different cpupools. Getting and setting now happens via XEN_SYSCTL_SCHEDOP_*, which is much better suited for this parameter. The value of the boot time parameter is used for initializing the vcpu_migr_delay field of the private structure of all the scheduler instances, when they're created. While there, save reading NOW() and doing any s_time_t operation, when the migration delay of a scheduler is zero (as it is, by default), in __csched_vcpu_is_cache_hot(). Finally, note that, from this commit on, using `xenpm {set,get}-vcpu-migration-delay' will have no effect any longer. A subsequent commit will re-enable it, for the sake of backwards-compatibility. Signed-off-by: Dario Faggioli <dfaggi...@suse.com> --- Cc: George Dunlap <george.dun...@eu.citrix.com> Cc: Andrew Cooper <andrew.coop...@citrix.com> --- Changes from v1: * improved the changelog, as suggested; * add the _US suffix to XEN_SYSCTL_CSCHED_MGR_DLY_MAX; * add the _us suffix vcpu_migration_delay too; * fix wrong time conversions; * drop redundant and wrong checks for [params]->vcpu_migration_delay to be 0. --- xen/common/sched_credit.c | 45 +++++++++++++++++++++++++++++-------------- xen/include/public/sysctl.h | 6 ++++++ 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c index 7c40ee2d00..1f4da65d98 100644 --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -214,7 +214,7 @@ struct csched_private { /* Period of master and tick in milliseconds */ unsigned int tick_period_us, ticks_per_tslice; - s_time_t ratelimit, tslice; + s_time_t ratelimit, tslice, vcpu_migr_delay; struct list_head active_sdom; uint32_t weight; @@ -677,24 +677,24 @@ __csched_vcpu_check(struct vcpu *vc) * implicit overheads such as cache-warming. 1ms (1000) has been measured * as a good value. */ -static unsigned int vcpu_migration_delay; -integer_param("vcpu_migration_delay", vcpu_migration_delay); +static unsigned int vcpu_migration_delay_us; +integer_param("vcpu_migration_delay", vcpu_migration_delay_us); void set_vcpu_migration_delay(unsigned int delay) { - vcpu_migration_delay = delay; + vcpu_migration_delay_us = delay; } unsigned int get_vcpu_migration_delay(void) { - return vcpu_migration_delay; + return vcpu_migration_delay_us; } -static inline int -__csched_vcpu_is_cache_hot(struct vcpu *v) +static inline bool +__csched_vcpu_is_cache_hot(const struct csched_private *prv, struct vcpu *v) { - int hot = ((NOW() - v->last_run_time) < - ((uint64_t)vcpu_migration_delay * 1000u)); + bool hot = prv->vcpu_migr_delay && + (NOW() - v->last_run_time) < prv->vcpu_migr_delay; if ( hot ) SCHED_STAT_CRANK(vcpu_hot); @@ -703,7 +703,8 @@ __csched_vcpu_is_cache_hot(struct vcpu *v) } static inline int -__csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu, cpumask_t *mask) +__csched_vcpu_is_migrateable(const struct csched_private *prv, struct vcpu *vc, + int dest_cpu, cpumask_t *mask) { /* * Don't pick up work that's hot on peer PCPU, or that can't (or @@ -714,7 +715,7 @@ __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu, cpumask_t *mask) */ ASSERT(!vc->is_running); - return !__csched_vcpu_is_cache_hot(vc) && + return !__csched_vcpu_is_cache_hot(prv, vc) && cpumask_test_cpu(dest_cpu, mask); } @@ -1251,7 +1252,8 @@ csched_sys_cntl(const struct scheduler *ops, || (params->ratelimit_us && (params->ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN)) - || MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms) ) + || MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms) + || params->vcpu_migr_delay_us > XEN_SYSCTL_CSCHED_MGR_DLY_MAX_US ) goto out; spin_lock_irqsave(&prv->lock, flags); @@ -1261,12 +1263,14 @@ csched_sys_cntl(const struct scheduler *ops, else if ( prv->ratelimit && !params->ratelimit_us ) printk(XENLOG_INFO "Disabling context switch rate limiting\n"); prv->ratelimit = MICROSECS(params->ratelimit_us); + prv->vcpu_migr_delay = MICROSECS(params->vcpu_migr_delay_us); spin_unlock_irqrestore(&prv->lock, flags); /* FALLTHRU */ case XEN_SYSCTL_SCHEDOP_getinfo: params->tslice_ms = prv->tslice / MILLISECS(1); params->ratelimit_us = prv->ratelimit / MICROSECS(1); + params->vcpu_migr_delay_us = prv->vcpu_migr_delay / MICROSECS(1); rc = 0; break; } @@ -1585,6 +1589,7 @@ csched_tick(void *_cpu) static struct csched_vcpu * csched_runq_steal(int peer_cpu, int cpu, int pri, int balance_step) { + const struct csched_private * const prv = CSCHED_PRIV(per_cpu(scheduler, cpu)); const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu); struct csched_vcpu *speer; struct list_head *iter; @@ -1634,7 +1639,7 @@ csched_runq_steal(int peer_cpu, int cpu, int pri, int balance_step) continue; affinity_balance_cpumask(vc, balance_step, cpumask_scratch); - if ( __csched_vcpu_is_migrateable(vc, cpu, cpumask_scratch) ) + if ( __csched_vcpu_is_migrateable(prv, vc, cpu, cpumask_scratch) ) { /* We got a candidate. Grab it! */ TRACE_3D(TRC_CSCHED_STOLEN_VCPU, peer_cpu, @@ -2091,7 +2096,7 @@ csched_dump(const struct scheduler *ops) "\tratelimit = %"PRI_stime"us\n" "\tcredits per msec = %d\n" "\tticks per tslice = %d\n" - "\tmigration delay = %uus\n", + "\tmigration delay = %"PRI_stime"us\n", prv->ncpus, prv->master, prv->credit, @@ -2103,7 +2108,7 @@ csched_dump(const struct scheduler *ops) prv->ratelimit / MICROSECS(1), CSCHED_CREDITS_PER_MSEC, prv->ticks_per_tslice, - vcpu_migration_delay); + prv->vcpu_migr_delay/ MICROSECS(1)); cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers); printk("idlers: %s\n", idlers_buf); @@ -2186,6 +2191,16 @@ csched_init(struct scheduler *ops) } else prv->ratelimit = MICROSECS(sched_ratelimit_us); + + if ( vcpu_migration_delay_us > XEN_SYSCTL_CSCHED_MGR_DLY_MAX_US ) + { + vcpu_migration_delay_us = 0; + printk("WARNING: vcpu_migration_delay outside of valid range [0,%d]us.\n" + "Resetting to default: %u\n", + XEN_SYSCTL_CSCHED_MGR_DLY_MAX_US, vcpu_migration_delay_us); + } + prv->vcpu_migr_delay = MICROSECS(vcpu_migration_delay_us); + return 0; } diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h index 3669e32524..8ba644d6f0 100644 --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -601,6 +601,12 @@ struct xen_sysctl_credit_schedule { #define XEN_SYSCTL_CSCHED_TSLICE_MIN 1 unsigned tslice_ms; unsigned ratelimit_us; + /* + * How long we consider a vCPU to be cache-hot on the + * CPU where it has run (max 100ms, in microseconds) + */ +#define XEN_SYSCTL_CSCHED_MGR_DLY_MAX_US (100 * 1000) + unsigned vcpu_migr_delay_us; }; struct xen_sysctl_credit2_schedule { _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel