Right now, vCPU migration delay is controlled by
the vcpu_migration_delay boot parameter. This means
the same value will always be used for every instance
of Credit1, in any cpupool that will be created.

Also, in order to get and set such value, a special
purpose libxc interface is defined, and used by the
xenpm tool. And this is problematic if Xen is built
without Credit1 support.

This commit adds a vcpu_migr_delay field inside
struct csched_private, so that we can get/set the
migration delay indepently for each Credit1 instance,
in different cpupools.

Getting and setting now happens via XEN_SYSCTL_SCHEDOP_*,
which is much better suited for this parameter.

The value of the boot time parameter is used for
initializing the vcpu_migr_delay field of the private
structure of all the scheduler instances, when they're
created.

While there, save reading NOW() and doing any s_time_t
operation, when the migration delay of a scheduler is
zero (as it is, by default), in
__csched_vcpu_is_cache_hot().

Finally, note that, from this commit on, using `xenpm
{set,get}-vcpu-migration-delay' will have no effect
any longer. A subsequent commit will re-enable it, for
the sake of backwards-compatibility.

Signed-off-by: Dario Faggioli <dfaggi...@suse.com>
---
Cc: George Dunlap <george.dun...@eu.citrix.com>
Cc: Andrew Cooper <andrew.coop...@citrix.com>
---
Changes from v1:
* improved the changelog, as suggested;
* add the _US suffix to XEN_SYSCTL_CSCHED_MGR_DLY_MAX;
* add the _us suffix vcpu_migration_delay too;
* fix wrong time conversions;
* drop redundant and wrong checks for [params]->vcpu_migration_delay to be 0.
---
 xen/common/sched_credit.c   |   45 +++++++++++++++++++++++++++++--------------
 xen/include/public/sysctl.h |    6 ++++++
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index 7c40ee2d00..1f4da65d98 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -214,7 +214,7 @@ struct csched_private {
 
     /* Period of master and tick in milliseconds */
     unsigned int tick_period_us, ticks_per_tslice;
-    s_time_t ratelimit, tslice;
+    s_time_t ratelimit, tslice, vcpu_migr_delay;
 
     struct list_head active_sdom;
     uint32_t weight;
@@ -677,24 +677,24 @@ __csched_vcpu_check(struct vcpu *vc)
  * implicit overheads such as cache-warming. 1ms (1000) has been measured
  * as a good value.
  */
-static unsigned int vcpu_migration_delay;
-integer_param("vcpu_migration_delay", vcpu_migration_delay);
+static unsigned int vcpu_migration_delay_us;
+integer_param("vcpu_migration_delay", vcpu_migration_delay_us);
 
 void set_vcpu_migration_delay(unsigned int delay)
 {
-    vcpu_migration_delay = delay;
+    vcpu_migration_delay_us = delay;
 }
 
 unsigned int get_vcpu_migration_delay(void)
 {
-    return vcpu_migration_delay;
+    return vcpu_migration_delay_us;
 }
 
-static inline int
-__csched_vcpu_is_cache_hot(struct vcpu *v)
+static inline bool
+__csched_vcpu_is_cache_hot(const struct csched_private *prv, struct vcpu *v)
 {
-    int hot = ((NOW() - v->last_run_time) <
-               ((uint64_t)vcpu_migration_delay * 1000u));
+    bool hot = prv->vcpu_migr_delay &&
+               (NOW() - v->last_run_time) < prv->vcpu_migr_delay;
 
     if ( hot )
         SCHED_STAT_CRANK(vcpu_hot);
@@ -703,7 +703,8 @@ __csched_vcpu_is_cache_hot(struct vcpu *v)
 }
 
 static inline int
-__csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu, cpumask_t *mask)
+__csched_vcpu_is_migrateable(const struct csched_private *prv, struct vcpu *vc,
+                             int dest_cpu, cpumask_t *mask)
 {
     /*
      * Don't pick up work that's hot on peer PCPU, or that can't (or
@@ -714,7 +715,7 @@ __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu, 
cpumask_t *mask)
      */
     ASSERT(!vc->is_running);
 
-    return !__csched_vcpu_is_cache_hot(vc) &&
+    return !__csched_vcpu_is_cache_hot(prv, vc) &&
            cpumask_test_cpu(dest_cpu, mask);
 }
 
@@ -1251,7 +1252,8 @@ csched_sys_cntl(const struct scheduler *ops,
              || (params->ratelimit_us
                  && (params->ratelimit_us > XEN_SYSCTL_SCHED_RATELIMIT_MAX
                      || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN))
-             || MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms) 
)
+             || MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms)
+             || params->vcpu_migr_delay_us > XEN_SYSCTL_CSCHED_MGR_DLY_MAX_US )
                 goto out;
 
         spin_lock_irqsave(&prv->lock, flags);
@@ -1261,12 +1263,14 @@ csched_sys_cntl(const struct scheduler *ops,
         else if ( prv->ratelimit && !params->ratelimit_us )
             printk(XENLOG_INFO "Disabling context switch rate limiting\n");
         prv->ratelimit = MICROSECS(params->ratelimit_us);
+        prv->vcpu_migr_delay = MICROSECS(params->vcpu_migr_delay_us);
         spin_unlock_irqrestore(&prv->lock, flags);
 
         /* FALLTHRU */
     case XEN_SYSCTL_SCHEDOP_getinfo:
         params->tslice_ms = prv->tslice / MILLISECS(1);
         params->ratelimit_us = prv->ratelimit / MICROSECS(1);
+        params->vcpu_migr_delay_us = prv->vcpu_migr_delay / MICROSECS(1);
         rc = 0;
         break;
     }
@@ -1585,6 +1589,7 @@ csched_tick(void *_cpu)
 static struct csched_vcpu *
 csched_runq_steal(int peer_cpu, int cpu, int pri, int balance_step)
 {
+    const struct csched_private * const prv = CSCHED_PRIV(per_cpu(scheduler, 
cpu));
     const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu);
     struct csched_vcpu *speer;
     struct list_head *iter;
@@ -1634,7 +1639,7 @@ csched_runq_steal(int peer_cpu, int cpu, int pri, int 
balance_step)
             continue;
 
         affinity_balance_cpumask(vc, balance_step, cpumask_scratch);
-        if ( __csched_vcpu_is_migrateable(vc, cpu, cpumask_scratch) )
+        if ( __csched_vcpu_is_migrateable(prv, vc, cpu, cpumask_scratch) )
         {
             /* We got a candidate. Grab it! */
             TRACE_3D(TRC_CSCHED_STOLEN_VCPU, peer_cpu,
@@ -2091,7 +2096,7 @@ csched_dump(const struct scheduler *ops)
            "\tratelimit          = %"PRI_stime"us\n"
            "\tcredits per msec   = %d\n"
            "\tticks per tslice   = %d\n"
-           "\tmigration delay    = %uus\n",
+           "\tmigration delay    = %"PRI_stime"us\n",
            prv->ncpus,
            prv->master,
            prv->credit,
@@ -2103,7 +2108,7 @@ csched_dump(const struct scheduler *ops)
            prv->ratelimit / MICROSECS(1),
            CSCHED_CREDITS_PER_MSEC,
            prv->ticks_per_tslice,
-           vcpu_migration_delay);
+           prv->vcpu_migr_delay/ MICROSECS(1));
 
     cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers);
     printk("idlers: %s\n", idlers_buf);
@@ -2186,6 +2191,16 @@ csched_init(struct scheduler *ops)
     }
     else
         prv->ratelimit = MICROSECS(sched_ratelimit_us);
+
+    if ( vcpu_migration_delay_us > XEN_SYSCTL_CSCHED_MGR_DLY_MAX_US )
+    {
+        vcpu_migration_delay_us = 0;
+        printk("WARNING: vcpu_migration_delay outside of valid range 
[0,%d]us.\n"
+               "Resetting to default: %u\n",
+               XEN_SYSCTL_CSCHED_MGR_DLY_MAX_US, vcpu_migration_delay_us);
+    }
+    prv->vcpu_migr_delay = MICROSECS(vcpu_migration_delay_us);
+
     return 0;
 }
 
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 3669e32524..8ba644d6f0 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -601,6 +601,12 @@ struct xen_sysctl_credit_schedule {
 #define XEN_SYSCTL_CSCHED_TSLICE_MIN 1
     unsigned tslice_ms;
     unsigned ratelimit_us;
+    /*
+     * How long we consider a vCPU to be cache-hot on the
+     * CPU where it has run (max 100ms, in microseconds)
+    */
+#define XEN_SYSCTL_CSCHED_MGR_DLY_MAX_US (100 * 1000)
+    unsigned vcpu_migr_delay_us;
 };
 
 struct xen_sysctl_credit2_schedule {


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Reply via email to