When Xen migrates an HVM guest, by default its shared_info can
only hold up to 32 CPUs. As such the hypercall
VCPUOP_register_vcpu_info was introduced which allowed us to
setup per-page areas for VCPUs. This means we can boot PVHVM
guest with more than 32 VCPUs. During migration the per-cpu
structure is allocated freshly by the hypervisor (vcpu_info_mfn
is set to INVALID_MFN) so that the newly migrated guest
can make an VCPUOP_register_vcpu_info hypercall.

Unfortunatly we end up triggering this condition in Xen:
/* Run this command on yourself or on other offline VCPUS. */
 if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )

which means we are unable to setup the per-cpu VCPU structures
for running vCPUS. The Linux PV code paths make this work by
iterating over every vCPU with:

 1) is target CPU up (VCPUOP_is_up hypercall?)
 2) if yes, then VCPUOP_down to pause it.
 3) VCPUOP_register_vcpu_info
 4) if it was down, then VCPUOP_up to bring it back up

But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
not allowed on HVM guests we can't do this. However with the
Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
we can do this. As such first check if VCPUOP_is_up is actually
possible before trying this dance.

As most of this dance code is done already in 'xen_setup_vcpu'
lets make it callable on both PV and HVM. This means moving one
of the checks out to 'xen_setup_runstate_info'.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.w...@oracle.com>
---
 arch/x86/xen/enlighten.c | 23 +++++++++++++++++------
 arch/x86/xen/suspend.c   |  7 +------
 arch/x86/xen/time.c      |  3 +++
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 46957ea..187dec6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -238,12 +238,23 @@ static void xen_vcpu_setup(int cpu)
 void xen_vcpu_restore(void)
 {
        int cpu;
+       bool vcpuops = true;
+       const struct cpumask *mask;
 
-       for_each_possible_cpu(cpu) {
+       mask = xen_pv_domain() ? cpu_possible_mask : cpu_online_mask;
+
+       /* Only Xen 4.5 and higher supports this. */
+       if (HYPERVISOR_vcpu_op(VCPUOP_is_up, smp_processor_id(), NULL) == 
-ENOSYS)
+               vcpuops = false;
+
+       for_each_cpu(cpu, mask) {
                bool other_cpu = (cpu != smp_processor_id());
-               bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
+               bool is_up = false;
 
-               if (other_cpu && is_up &&
+               if (vcpuops)
+                       is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
+
+               if (vcpuops && other_cpu && is_up &&
                    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
                        BUG();
 
@@ -252,7 +263,7 @@ void xen_vcpu_restore(void)
                if (have_vcpu_info_placement)
                        xen_vcpu_setup(cpu);
 
-               if (other_cpu && is_up &&
+               if (vcpuops && other_cpu && is_up &&
                    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
                        BUG();
        }
@@ -1704,8 +1715,8 @@ void __ref xen_hvm_init_shared_info(void)
         * in that case multiple vcpus might be online. */
        for_each_online_cpu(cpu) {
                /* Leave it to be NULL. */
-               if (cpu >= MAX_VIRT_CPUS)
-                       continue;
+               if (cpu >= MAX_VIRT_CPUS && cpu <= NR_CPUS)
+                       per_cpu(xen_vcpu, cpu) = NULL; /* Triggers 
xen_vcpu_setup.*/
                per_cpu(xen_vcpu, cpu) = 
&HYPERVISOR_shared_info->vcpu_info[cpu];
        }
 }
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 53b4c08..cd66397 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -31,15 +31,10 @@ static void xen_pv_pre_suspend(void)
 static void xen_hvm_post_suspend(int suspend_cancelled)
 {
 #ifdef CONFIG_XEN_PVHVM
-       int cpu;
        xen_hvm_init_shared_info();
        xen_callback_vector();
        xen_unplug_emulated_devices();
-       if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
-               for_each_online_cpu(cpu) {
-                       xen_setup_runstate_info(cpu);
-               }
-       }
+       xen_vcpu_restore();
 #endif
 }
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 55da33b..6882d0c 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -105,6 +105,9 @@ void xen_setup_runstate_info(int cpu)
 {
        struct vcpu_register_runstate_memory_area area;
 
+       if (xen_hvm_domain() && !(xen_feature(XENFEAT_hvm_safe_pvclock)))
+               return;
+
        area.addr.v = &per_cpu(xen_runstate, cpu);
 
        if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to