Author: gibbs
Date: Fri Sep 20 05:06:03 2013
New Revision: 255726
URL: http://svnweb.freebsd.org/changeset/base/255726

Log:
  Add support for suspend/resume/migration operations when running as a
  Xen PVHVM guest.
  
  Submitted by: Roger Pau Monné
  Sponsored by: Citrix Systems R&D
  Reviewed by:  gibbs
  Approved by:  re (blanket Xen)
  MFC after:    2 weeks
  
  sys/amd64/amd64/mp_machdep.c:
  sys/i386/i386/mp_machdep.c:
        - Make sure that are no MMU related IPIs pending on migration.
        - Reset pending IPI_BITMAP on resume.
        - Init vcpu_info on resume.
  
  sys/amd64/include/intr_machdep.h:
  sys/i386/include/intr_machdep.h:
  sys/x86/acpica/acpi_wakeup.c:
  sys/x86/x86/intr_machdep.c:
  sys/x86/isa/atpic.c:
  sys/x86/x86/io_apic.c:
  sys/x86/x86/local_apic.c:
        - Add a "suspend_cancelled" parameter to pic_resume().  For the
          Xen PIC, restoration of interrupt services differs between
          the aborted suspend and normal resume cases, so we must provide
          this information.
  
  sys/dev/acpica/acpi_timer.c:
  sys/dev/xen/timer/timer.c:
  sys/timetc.h:
        - Don't swap out "suspend safe" timers across a suspend/resume
          cycle.  This includes the Xen PV and ACPI timers.
  
  sys/dev/xen/control/control.c:
        - Perform proper suspend/resume process for PVHVM:
                - Suspend all APs before going into suspension, this allows us
                  to reset the vcpu_info on resume for each AP.
                - Reset shared info page and callback on resume.
  
  sys/dev/xen/timer/timer.c:
        - Implement suspend/resume support for the PV timer. Since FreeBSD
          doesn't perform a per-cpu resume of the timer, we need to call
          smp_rendezvous in order to correctly resume the timer on each CPU.
  
  sys/dev/xen/xenpci/xenpci.c:
        - Don't reset the PCI interrupt on each suspend/resume.
  
  sys/kern/subr_smp.c:
        - When suspending a PVHVM domain make sure there are no MMU IPIs
          in-flight, or we will get a lockup on resume due to the fact that
          pending event channels are not carried over on migration.
        - Implement a generic version of restart_cpus that can be used by
          suspended and stopped cpus.
  
  sys/x86/xen/hvm.c:
        - Implement resume support for the hypercall page and shared info.
        - Clear vcpu_info so it can be reset by APs when resuming from
          suspension.
  
  sys/dev/xen/xenpci/xenpci.c:
  sys/x86/xen/hvm.c:
  sys/x86/xen/xen_intr.c:
        - Support UP kernel configurations.
  
  sys/x86/xen/xen_intr.c:
        - Properly rebind per-cpus VIRQs and IPIs on resume.

Modified:
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/amd64/include/intr_machdep.h
  head/sys/dev/acpica/acpi_timer.c
  head/sys/dev/xen/control/control.c
  head/sys/dev/xen/timer/timer.c
  head/sys/dev/xen/xenpci/xenpci.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/include/intr_machdep.h
  head/sys/kern/subr_smp.c
  head/sys/sys/smp.h
  head/sys/sys/timetc.h
  head/sys/x86/acpica/acpi_wakeup.c
  head/sys/x86/isa/atpic.c
  head/sys/x86/x86/intr_machdep.c
  head/sys/x86/x86/io_apic.c
  head/sys/x86/x86/local_apic.c
  head/sys/x86/xen/hvm.c
  head/sys/x86/xen/xen_intr.c
  head/sys/xen/hvm.h

Modified: head/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- head/sys/amd64/amd64/mp_machdep.c   Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/amd64/amd64/mp_machdep.c   Fri Sep 20 05:06:03 2013        
(r255726)
@@ -1468,6 +1468,10 @@ cpususpend_handler(void)
 
        cpu = PCPU_GET(cpuid);
 
+#ifdef XENHVM
+       mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
+#endif
+
        if (savectx(susppcbs[cpu])) {
                ctx_fpusave(susppcbs[cpu]->pcb_fpususpend);
                wbinvd();
@@ -1486,11 +1490,23 @@ cpususpend_handler(void)
        while (!CPU_ISSET(cpu, &started_cpus))
                ia32_pause();
 
+#ifdef XENHVM
+       /*
+        * Reset pending bitmap IPIs, because Xen doesn't preserve pending
+        * event channels on migration.
+        */
+       cpu_ipi_pending[cpu] = 0;
+       /* register vcpu_info area */
+       xen_hvm_init_cpu();
+#endif
+
        /* Resume MCA and local APIC */
        mca_resume();
        lapic_setup(0);
 
        CPU_CLR_ATOMIC(cpu, &started_cpus);
+       /* Indicate that we are resumed */
+       CPU_CLR_ATOMIC(cpu, &suspended_cpus);
 }
 
 /*

Modified: head/sys/amd64/include/intr_machdep.h
==============================================================================
--- head/sys/amd64/include/intr_machdep.h       Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/amd64/include/intr_machdep.h       Fri Sep 20 05:06:03 2013        
(r255726)
@@ -102,7 +102,7 @@ struct pic {
        int (*pic_vector)(struct intsrc *);
        int (*pic_source_pending)(struct intsrc *);
        void (*pic_suspend)(struct pic *);
-       void (*pic_resume)(struct pic *);
+       void (*pic_resume)(struct pic *, bool suspend_cancelled);
        int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
            enum intr_polarity);
        int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
@@ -170,7 +170,7 @@ struct intsrc *intr_lookup_source(int ve
 int    intr_register_pic(struct pic *pic);
 int    intr_register_source(struct intsrc *isrc);
 int    intr_remove_handler(void *cookie);
-void   intr_resume(void);
+void   intr_resume(bool suspend_cancelled);
 void   intr_suspend(void);
 void   intrcnt_add(const char *name, u_long **countp);
 void   nexus_add_irq(u_long irq);

Modified: head/sys/dev/acpica/acpi_timer.c
==============================================================================
--- head/sys/dev/acpica/acpi_timer.c    Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/dev/acpica/acpi_timer.c    Fri Sep 20 05:06:03 2013        
(r255726)
@@ -189,6 +189,7 @@ acpi_timer_probe(device_t dev)
     else
        acpi_timer_timecounter.tc_counter_mask = 0x00ffffff;
     acpi_timer_timecounter.tc_frequency = acpi_timer_frequency;
+    acpi_timer_timecounter.tc_flags = TC_FLAGS_SUSPEND_SAFE;
     if (testenv("debug.acpi.timer_test"))
        acpi_timer_boot_test();
 
@@ -285,6 +286,14 @@ acpi_timer_suspend_handler(struct timeco
                acpi_timer_eh = NULL;
        }
 
+       if ((timecounter->tc_flags & TC_FLAGS_SUSPEND_SAFE) != 0) {
+               /*
+                * If we are using a suspend safe timecounter, don't
+                * save/restore it across suspend/resume.
+                */
+               return;
+       }
+
        KASSERT(newtc == &acpi_timer_timecounter,
            ("acpi_timer_suspend_handler: wrong timecounter"));
 

Modified: head/sys/dev/xen/control/control.c
==============================================================================
--- head/sys/dev/xen/control/control.c  Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/dev/xen/control/control.c  Fri Sep 20 05:06:03 2013        
(r255726)
@@ -119,11 +119,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/taskqueue.h>
 #include <sys/types.h>
 #include <sys/vnode.h>
-
-#ifndef XENHVM
 #include <sys/sched.h>
 #include <sys/smp.h>
-#endif
+#include <sys/eventhandler.h>
 
 #include <geom/geom.h>
 
@@ -140,6 +138,10 @@ __FBSDID("$FreeBSD$");
 #include <xen/gnttab.h>
 #include <xen/xen_intr.h>
 
+#ifdef XENHVM
+#include <xen/hvm.h>
+#endif
+
 #include <xen/interface/event_channel.h>
 #include <xen/interface/grant_table.h>
 
@@ -199,7 +201,7 @@ extern void xencons_resume(void);
 static void
 xctrl_suspend()
 {
-       int i, j, k, fpp;
+       int i, j, k, fpp, suspend_cancelled;
        unsigned long max_pfn, start_info_mfn;
 
        EVENTHANDLER_INVOKE(power_suspend);
@@ -264,7 +266,7 @@ xctrl_suspend()
         */
        start_info_mfn = VTOMFN(xen_start_info);
        pmap_suspend();
-       HYPERVISOR_suspend(start_info_mfn);
+       suspend_cancelled = HYPERVISOR_suspend(start_info_mfn);
        pmap_resume();
 
        pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
@@ -287,7 +289,7 @@ xctrl_suspend()
        HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
 
        gnttab_resume();
-       intr_resume();
+       intr_resume(suspend_cancelled != 0);
        local_irq_enable();
        xencons_resume();
 
@@ -331,16 +333,31 @@ xen_pv_shutdown_final(void *arg, int how
 }
 
 #else
-extern void xenpci_resume(void);
 
 /* HVM mode suspension. */
 static void
 xctrl_suspend()
 {
+#ifdef SMP
+       cpuset_t cpu_suspend_map;
+#endif
        int suspend_cancelled;
 
        EVENTHANDLER_INVOKE(power_suspend);
 
+       if (smp_started) {
+               thread_lock(curthread);
+               sched_bind(curthread, 0);
+               thread_unlock(curthread);
+       }
+       KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));
+
+       /*
+        * Clear our XenStore node so the toolstack knows we are
+        * responding to the suspend request.
+        */
+       xs_write(XST_NIL, "control", "shutdown", "");
+
        /*
         * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
         * drivers need this.
@@ -353,31 +370,67 @@ xctrl_suspend()
        }
        mtx_unlock(&Giant);
 
+#ifdef SMP
+       if (smp_started) {
+               /*
+                * Suspend other CPUs. This prevents IPIs while we
+                * are resuming, and will allow us to reset per-cpu
+                * vcpu_info on resume.
+                */
+               cpu_suspend_map = all_cpus;
+               CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
+               if (!CPU_EMPTY(&cpu_suspend_map))
+                       suspend_cpus(cpu_suspend_map);
+       }
+#endif
+
        /*
         * Prevent any races with evtchn_interrupt() handler.
         */
        disable_intr();
        intr_suspend();
+       xen_hvm_suspend();
 
        suspend_cancelled = HYPERVISOR_suspend(0);
 
-       intr_resume();
+       xen_hvm_resume(suspend_cancelled != 0);
+       intr_resume(suspend_cancelled != 0);
+       enable_intr();
 
        /*
-        * Re-enable interrupts and put the scheduler back to normal.
+        * Reset grant table info.
         */
-       enable_intr();
+       gnttab_resume();
+
+#ifdef SMP
+       if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
+               /*
+                * Now that event channels have been initialized,
+                * resume CPUs.
+                */
+               resume_cpus(cpu_suspend_map);
+       }
+#endif
 
        /*
         * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
         * similar.
         */
        mtx_lock(&Giant);
-       if (!suspend_cancelled)
-               DEVICE_RESUME(root_bus);
+       DEVICE_RESUME(root_bus);
        mtx_unlock(&Giant);
 
+       if (smp_started) {
+               thread_lock(curthread);
+               sched_unbind(curthread);
+               thread_unlock(curthread);
+       }
+
        EVENTHANDLER_INVOKE(power_resume);
+
+       if (bootverbose)
+               printf("System resumed after suspension\n");
+
 }
 #endif
 

Modified: head/sys/dev/xen/timer/timer.c
==============================================================================
--- head/sys/dev/xen/timer/timer.c      Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/dev/xen/timer/timer.c      Fri Sep 20 05:06:03 2013        
(r255726)
@@ -1,4 +1,4 @@
-/**
+/*-
  * Copyright (c) 2009 Adrian Chadd
  * Copyright (c) 2012 Spectra Logic Corporation
  * All rights reserved.
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpufunc.h>
 #include <machine/clock.h>
 #include <machine/_inttypes.h>
+#include <machine/smp.h>
 
 #include "clock_if.h"
 
@@ -316,7 +317,7 @@ xentimer_settime(device_t dev __unused, 
         * Don't return EINVAL here; just silently fail if the domain isn't
         * privileged enough to set the TOD.
         */
-       return(0);
+       return (0);
 }
 
 /**
@@ -339,7 +340,7 @@ xentimer_gettime(device_t dev, struct ti
        xen_fetch_uptime(&u_ts);
        timespecadd(ts, &u_ts);
 
-       return(0);
+       return (0);
 }
 
 /**
@@ -457,8 +458,9 @@ xentimer_attach(device_t dev)
 
        /* Bind an event channel to a VIRQ on each VCPU. */
        CPU_FOREACH(i) {
-               struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, 
xentimer_pcpu);
+               struct xentimer_pcpu_data *pcpu;
 
+               pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
                error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
                if (error) {
                        device_printf(dev, "Error disabling Xen periodic timer "
@@ -493,6 +495,7 @@ xentimer_attach(device_t dev)
        /* Register the timecounter. */
        sc->tc.tc_name = "XENTIMER";
        sc->tc.tc_quality = XENTIMER_QUALITY;
+       sc->tc.tc_flags = TC_FLAGS_SUSPEND_SAFE;
        /*
         * The underlying resolution is in nanoseconds, since the timer info
         * scales TSC frequencies using a fraction that represents time in
@@ -523,75 +526,60 @@ xentimer_detach(device_t dev)
        return (EBUSY);
 }
 
-/**
- * The following device methods are disabled because they wouldn't work
- * properly.
- */
-#ifdef NOTYET
+static void
+xentimer_percpu_resume(void *arg)
+{
+       device_t dev = (device_t) arg;
+       struct xentimer_softc *sc = device_get_softc(dev);
+
+       xentimer_et_start(&sc->et, sc->et.et_min_period, 0);
+}
+
 static int
 xentimer_resume(device_t dev)
 {
-       struct xentimer_softc *sc = device_get_softc(dev);
-       int error = 0;
+       int error;
        int i;
 
-       device_printf(sc->dev, "%s", __func__);
+       /* Disable the periodic timer */
        CPU_FOREACH(i) {
-               struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, 
xentimer_pcpu);
-
-               /* Skip inactive timers. */
-               if (pcpu->timer == 0)
-                       continue;
-
-               /*
-                * XXX This won't actually work, because Xen requires that
-                *     singleshot timers be set while running on the given CPU.
-                */
-               error = xentimer_vcpu_start_timer(i, pcpu->timer);
-               if (error == -ETIME) {
-                       /* Event time has already passed; process. */
-                       xentimer_intr(sc);
-               } else if (error != 0) {
-                       panic("%s: error %d restarting vcpu %d\n",
-                           __func__, error, i);
+               error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
+               if (error != 0) {
+                       device_printf(dev,
+                           "Error disabling Xen periodic timer on CPU %d\n",
+                           i);
+                       return (error);
                }
        }
 
-       return (error);
+       /* Reset the last uptime value */
+       xen_timer_last_time = 0;
+
+       /* Reset the RTC clock */
+       inittodr(time_second);
+
+       /* Kick the timers on all CPUs */
+       smp_rendezvous(NULL, xentimer_percpu_resume, NULL, dev);
+
+       if (bootverbose)
+               device_printf(dev, "resumed operation after suspension\n");
+
+       return (0);
 }
 
 static int
 xentimer_suspend(device_t dev)
 {
-       struct xentimer_softc *sc = device_get_softc(dev);
-       int error = 0;
-       int i;
-
-       device_printf(sc->dev, "%s", __func__);
-       CPU_FOREACH(i) {
-               struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, 
xentimer_pcpu);
-
-               /* Skip inactive timers. */
-               if (pcpu->timer == 0)
-                       continue;
-               error = xentimer_vcpu_stop_timer(i);
-               if (error)
-                       panic("Error %d stopping VCPU %d timer\n", error, i);
-       }
-
-       return (error);
+       return (0);
 }
-#endif
 
 static device_method_t xentimer_methods[] = {
        DEVMETHOD(device_identify, xentimer_identify),
        DEVMETHOD(device_probe, xentimer_probe),
        DEVMETHOD(device_attach, xentimer_attach),
        DEVMETHOD(device_detach, xentimer_detach),
-#ifdef NOTYET
        DEVMETHOD(device_suspend, xentimer_suspend),
        DEVMETHOD(device_resume, xentimer_resume),
-#endif
        /* clock interface */
        DEVMETHOD(clock_gettime, xentimer_gettime),
        DEVMETHOD(clock_settime, xentimer_settime),

Modified: head/sys/dev/xen/xenpci/xenpci.c
==============================================================================
--- head/sys/dev/xen/xenpci/xenpci.c    Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/dev/xen/xenpci/xenpci.c    Fri Sep 20 05:06:03 2013        
(r255726)
@@ -77,6 +77,7 @@ xenpci_irq_init(device_t device, struct 
        if (error)
                return error;
 
+#ifdef SMP
        /*
         * When using the PCI event delivery callback we cannot assign
         * events to specific vCPUs, so all events are delivered to vCPU#0 by
@@ -88,6 +89,7 @@ xenpci_irq_init(device_t device, struct 
                              scp->res_irq, 0);
        if (error)
                return error;
+#endif
 
        xen_hvm_set_callback(device);
        return (0);
@@ -309,28 +311,12 @@ xenpci_detach(device_t dev)
 static int
 xenpci_suspend(device_t dev)
 {
-       struct xenpci_softc *scp = device_get_softc(dev);
-       device_t parent = device_get_parent(dev);
-
-       if (scp->intr_cookie != NULL) {
-               if (BUS_TEARDOWN_INTR(parent, dev, scp->res_irq,
-                   scp->intr_cookie) != 0)
-                       printf("intr teardown failed.. continuing\n");
-               scp->intr_cookie = NULL;
-       }
-
        return (bus_generic_suspend(dev));
 }
 
 static int
 xenpci_resume(device_t dev)
 {
-       struct xenpci_softc *scp = device_get_softc(dev);
-       device_t parent = device_get_parent(dev);
-
-       BUS_SETUP_INTR(parent, dev, scp->res_irq,
-           INTR_MPSAFE|INTR_TYPE_MISC, xenpci_intr_filter, NULL,
-           /*trap_frame*/NULL, &scp->intr_cookie);
        xen_hvm_set_callback(dev);
        return (bus_generic_resume(dev));
 }

Modified: head/sys/i386/i386/mp_machdep.c
==============================================================================
--- head/sys/i386/i386/mp_machdep.c     Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/i386/i386/mp_machdep.c     Fri Sep 20 05:06:03 2013        
(r255726)
@@ -1529,6 +1529,10 @@ cpususpend_handler(void)
 
        cpu = PCPU_GET(cpuid);
 
+#ifdef XENHVM
+       mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
+#endif
+
        if (savectx(susppcbs[cpu])) {
                wbinvd();
                CPU_SET_ATOMIC(cpu, &suspended_cpus);
@@ -1545,10 +1549,22 @@ cpususpend_handler(void)
        while (!CPU_ISSET(cpu, &started_cpus))
                ia32_pause();
 
+#ifdef XENHVM
+       /*
+        * Reset pending bitmap IPIs, because Xen doesn't preserve pending
+        * event channels on migration.
+        */
+       cpu_ipi_pending[cpu] = 0;
+       /* register vcpu_info area */
+       xen_hvm_init_cpu();
+#endif
+
        /* Resume MCA and local APIC */
        mca_resume();
        lapic_setup(0);
 
+       /* Indicate that we are resumed */
+       CPU_CLR_ATOMIC(cpu, &suspended_cpus);
        CPU_CLR_ATOMIC(cpu, &started_cpus);
 }
 /*

Modified: head/sys/i386/include/intr_machdep.h
==============================================================================
--- head/sys/i386/include/intr_machdep.h        Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/i386/include/intr_machdep.h        Fri Sep 20 05:06:03 2013        
(r255726)
@@ -108,7 +108,7 @@ struct pic {
        int (*pic_vector)(struct intsrc *);
        int (*pic_source_pending)(struct intsrc *);
        void (*pic_suspend)(struct pic *);
-       void (*pic_resume)(struct pic *);
+       void (*pic_resume)(struct pic *, bool suspend_cancelled);
        int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
            enum intr_polarity);
        int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
@@ -166,7 +166,7 @@ struct intsrc *intr_lookup_source(int ve
 int    intr_register_pic(struct pic *pic);
 int    intr_register_source(struct intsrc *isrc);
 int    intr_remove_handler(void *cookie);
-void   intr_resume(void);
+void   intr_resume(bool suspend_cancelled);
 void   intr_suspend(void);
 void   intrcnt_add(const char *name, u_long **countp);
 void   nexus_add_irq(u_long irq);

Modified: head/sys/kern/subr_smp.c
==============================================================================
--- head/sys/kern/subr_smp.c    Fri Sep 20 04:35:09 2013        (r255725)
+++ head/sys/kern/subr_smp.c    Fri Sep 20 05:06:03 2013        (r255726)
@@ -225,6 +225,18 @@ generic_stop_cpus(cpuset_t map, u_int ty
        CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
            cpusetobj_strprint(cpusetbuf, &map), type);
 
+#ifdef XENHVM
+       /*
+        * When migrating a PVHVM domain we need to make sure there are
+        * no IPIs in progress.  IPIs that have been issued, but not
+        * yet delivered (not pending on a vCPU) will be lost in the
+        * IPI rebinding process, violating FreeBSD's assumption of
+        * reliable IPI delivery.
+        */
+       if (type == IPI_SUSPEND)
+               mtx_lock_spin(&smp_ipi_mtx);
+#endif
+
        if (stopping_cpu != PCPU_GET(cpuid))
                while (atomic_cmpset_int(&stopping_cpu, NOCPU,
                    PCPU_GET(cpuid)) == 0)
@@ -252,6 +264,11 @@ generic_stop_cpus(cpuset_t map, u_int ty
                }
        }
 
+#ifdef XENHVM
+       if (type == IPI_SUSPEND)
+               mtx_unlock_spin(&smp_ipi_mtx);
+#endif
+
        stopping_cpu = NOCPU;
        return (1);
 }
@@ -292,28 +309,60 @@ suspend_cpus(cpuset_t map)
  *   0: NA
  *   1: ok
  */
-int
-restart_cpus(cpuset_t map)
+static int
+generic_restart_cpus(cpuset_t map, u_int type)
 {
 #ifdef KTR
        char cpusetbuf[CPUSETBUFSIZ];
 #endif
+       volatile cpuset_t *cpus;
+
+       KASSERT(
+#if defined(__amd64__) || defined(__i386__)
+           type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
+#else
+           type == IPI_STOP || type == IPI_STOP_HARD,
+#endif
+           ("%s: invalid stop type", __func__));
 
        if (!smp_started)
                return 0;
 
        CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
 
+#if defined(__amd64__) || defined(__i386__)
+       if (type == IPI_SUSPEND)
+               cpus = &suspended_cpus;
+       else
+#endif
+               cpus = &stopped_cpus;
+
        /* signal other cpus to restart */
        CPU_COPY_STORE_REL(&map, &started_cpus);
 
        /* wait for each to clear its bit */
-       while (CPU_OVERLAP(&stopped_cpus, &map))
+       while (CPU_OVERLAP(cpus, &map))
                cpu_spinwait();
 
        return 1;
 }
 
+int
+restart_cpus(cpuset_t map)
+{
+
+       return (generic_restart_cpus(map, IPI_STOP));
+}
+
+#if defined(__amd64__) || defined(__i386__)
+int
+resume_cpus(cpuset_t map)
+{
+
+       return (generic_restart_cpus(map, IPI_SUSPEND));
+}
+#endif
+
 /*
  * All-CPU rendezvous.  CPUs are signalled, all execute the setup function 
  * (if specified), rendezvous, execute the action function (if specified),

Modified: head/sys/sys/smp.h
==============================================================================
--- head/sys/sys/smp.h  Fri Sep 20 04:35:09 2013        (r255725)
+++ head/sys/sys/smp.h  Fri Sep 20 05:06:03 2013        (r255726)
@@ -166,6 +166,7 @@ int stop_cpus(cpuset_t);
 int    stop_cpus_hard(cpuset_t);
 #if defined(__amd64__) || defined(__i386__)
 int    suspend_cpus(cpuset_t);
+int    resume_cpus(cpuset_t);
 #endif
 
 void   smp_rendezvous_action(void);

Modified: head/sys/sys/timetc.h
==============================================================================
--- head/sys/sys/timetc.h       Fri Sep 20 04:35:09 2013        (r255725)
+++ head/sys/sys/timetc.h       Fri Sep 20 05:06:03 2013        (r255726)
@@ -59,6 +59,10 @@ struct timecounter {
                 */
        u_int                   tc_flags;
 #define        TC_FLAGS_C3STOP         1       /* Timer dies in C3. */
+#define        TC_FLAGS_SUSPEND_SAFE   2       /*
+                                        * Timer functional across
+                                        * suspend/resume.
+                                        */
 
        void                    *tc_priv;
                /* Pointer to the timecounter's private parts. */

Modified: head/sys/x86/acpica/acpi_wakeup.c
==============================================================================
--- head/sys/x86/acpica/acpi_wakeup.c   Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/x86/acpica/acpi_wakeup.c   Fri Sep 20 05:06:03 2013        
(r255726)
@@ -266,7 +266,7 @@ acpi_wakeup_machdep(struct acpi_softc *s
                        restart_cpus(suspcpus);
 #endif
                mca_resume();
-               intr_resume();
+               intr_resume(/*suspend_cancelled*/false);
 
                AcpiSetFirmwareWakingVector(0);
        } else {

Modified: head/sys/x86/isa/atpic.c
==============================================================================
--- head/sys/x86/isa/atpic.c    Fri Sep 20 04:35:09 2013        (r255725)
+++ head/sys/x86/isa/atpic.c    Fri Sep 20 05:06:03 2013        (r255726)
@@ -123,7 +123,7 @@ static void atpic_eoi_slave(struct intsr
 static void atpic_enable_intr(struct intsrc *isrc);
 static void atpic_disable_intr(struct intsrc *isrc);
 static int atpic_vector(struct intsrc *isrc);
-static void atpic_resume(struct pic *pic);
+static void atpic_resume(struct pic *pic, bool suspend_cancelled);
 static int atpic_source_pending(struct intsrc *isrc);
 static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol);
@@ -276,7 +276,7 @@ atpic_source_pending(struct intsrc *isrc
 }
 
 static void
-atpic_resume(struct pic *pic)
+atpic_resume(struct pic *pic, bool suspend_cancelled)
 {
        struct atpic *ap = (struct atpic *)pic;
 

Modified: head/sys/x86/x86/intr_machdep.c
==============================================================================
--- head/sys/x86/x86/intr_machdep.c     Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/x86/x86/intr_machdep.c     Fri Sep 20 05:06:03 2013        
(r255726)
@@ -279,7 +279,7 @@ intr_execute_handlers(struct intsrc *isr
 }
 
 void
-intr_resume(void)
+intr_resume(bool suspend_cancelled)
 {
        struct pic *pic;
 
@@ -289,7 +289,7 @@ intr_resume(void)
        mtx_lock(&intr_table_lock);
        TAILQ_FOREACH(pic, &pics, pics) {
                if (pic->pic_resume != NULL)
-                       pic->pic_resume(pic);
+                       pic->pic_resume(pic, suspend_cancelled);
        }
        mtx_unlock(&intr_table_lock);
 }

Modified: head/sys/x86/x86/io_apic.c
==============================================================================
--- head/sys/x86/x86/io_apic.c  Fri Sep 20 04:35:09 2013        (r255725)
+++ head/sys/x86/x86/io_apic.c  Fri Sep 20 05:06:03 2013        (r255726)
@@ -119,7 +119,7 @@ static int  ioapic_vector(struct intsrc *
 static int     ioapic_source_pending(struct intsrc *isrc);
 static int     ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
                    enum intr_polarity pol);
-static void    ioapic_resume(struct pic *pic);
+static void    ioapic_resume(struct pic *pic, bool suspend_cancelled);
 static int     ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void    ioapic_program_intpin(struct ioapic_intsrc *intpin);
 
@@ -486,7 +486,7 @@ ioapic_config_intr(struct intsrc *isrc, 
 }
 
 static void
-ioapic_resume(struct pic *pic)
+ioapic_resume(struct pic *pic, bool suspend_cancelled)
 {
        struct ioapic *io = (struct ioapic *)pic;
        int i;

Modified: head/sys/x86/x86/local_apic.c
==============================================================================
--- head/sys/x86/x86/local_apic.c       Fri Sep 20 04:35:09 2013        
(r255725)
+++ head/sys/x86/x86/local_apic.c       Fri Sep 20 05:06:03 2013        
(r255726)
@@ -161,7 +161,7 @@ static u_long lapic_timer_divisor;
 static struct eventtimer lapic_et;
 
 static void    lapic_enable(void);
-static void    lapic_resume(struct pic *pic);
+static void    lapic_resume(struct pic *pic, bool suspend_cancelled);
 static void    lapic_timer_oneshot(struct lapic *,
                    u_int count, int enable_int);
 static void    lapic_timer_periodic(struct lapic *,
@@ -566,7 +566,7 @@ lapic_enable(void)
 
 /* Reset the local APIC on the BSP during resume. */
 static void
-lapic_resume(struct pic *pic)
+lapic_resume(struct pic *pic, bool suspend_cancelled)
 {
 
        lapic_setup(0);

Modified: head/sys/x86/xen/hvm.c
==============================================================================
--- head/sys/x86/xen/hvm.c      Fri Sep 20 04:35:09 2013        (r255725)
+++ head/sys/x86/xen/hvm.c      Fri Sep 20 05:06:03 2013        (r255726)
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
 #include <xen/interface/vcpu.h>
 
 /*--------------------------- Forward Declarations 
---------------------------*/
+#ifdef SMP
 static driver_filter_t xen_smp_rendezvous_action;
 static driver_filter_t xen_invltlb;
 static driver_filter_t xen_invlpg;
@@ -70,6 +71,7 @@ static driver_filter_t xen_ipi_bitmap_ha
 static driver_filter_t xen_cpustop_handler;
 static driver_filter_t xen_cpususpend_handler;
 static driver_filter_t xen_cpustophard_handler;
+#endif
 
 /*---------------------------- Extern Declarations 
---------------------------*/
 /* Variables used by mp_machdep to perform the MMU related IPIs */
@@ -93,6 +95,12 @@ extern void pmap_lazyfix_action(void);
 #define        IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
 
 /*-------------------------------- Local Types 
-------------------------------*/
+enum xen_hvm_init_type {
+       XEN_HVM_INIT_COLD,
+       XEN_HVM_INIT_CANCELLED_SUSPEND,
+       XEN_HVM_INIT_RESUME
+};
+
 struct xen_ipi_handler
 {
        driver_filter_t *filter;
@@ -104,6 +112,7 @@ enum xen_domain_type xen_domain_type = X
 
 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
 
+#ifdef SMP
 static struct xen_ipi_handler xen_ipis[] = 
 {
        [IPI_TO_IDX(IPI_RENDEZVOUS)]    = { xen_smp_rendezvous_action,  "r"   },
@@ -119,6 +128,7 @@ static struct xen_ipi_handler xen_ipis[]
        [IPI_TO_IDX(IPI_SUSPEND)]       = { xen_cpususpend_handler,     "sp"  },
        [IPI_TO_IDX(IPI_STOP_HARD)]     = { xen_cpustophard_handler,    "sth" },
 };
+#endif
 
 /**
  * If non-zero, the hypervisor has been configured to use a direct
@@ -129,13 +139,16 @@ int xen_vector_callback_enabled;
 /*------------------------------- Per-CPU Data 
-------------------------------*/
 DPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
 DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
+#ifdef SMP
 DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
+#endif
 
 /*------------------ Hypervisor Access Shared Memory Regions 
-----------------*/
 /** Hypercall table accessed via HYPERVISOR_*_op() methods. */
 char *hypercall_stubs;
 shared_info_t *HYPERVISOR_shared_info;
 
+#ifdef SMP
 /*---------------------------- XEN PV IPI Handlers 
---------------------------*/
 /*
  * This are C clones of the ASM functions found in apic_vector.s
@@ -496,6 +509,7 @@ xen_init_ipis(void)
        /* Set the xen pv ipi ops to replace the native ones */
        cpu_ops.ipi_vectored = xen_ipi_vectored;
 }
+#endif
 
 /*---------------------- XEN Hypervisor Probe and Setup 
----------------------*/
 static uint32_t
@@ -579,6 +593,9 @@ xen_hvm_set_callback(device_t dev)
        struct xen_hvm_param xhp;
        int irq;
 
+       if (xen_vector_callback_enabled)
+               return;
+
        xhp.domid = DOMID_SELF;
        xhp.index = HVM_PARAM_CALLBACK_IRQ;
        if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
@@ -637,41 +654,83 @@ xen_hvm_disable_emulated_devices(void)
        outw(XEN_MAGIC_IOPORT, XMI_UNPLUG_IDE_DISKS|XMI_UNPLUG_NICS);
 }
 
+static void
+xen_hvm_init(enum xen_hvm_init_type init_type)
+{
+       int error;
+       int i;
+
+       if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
+               return;
+
+       error = xen_hvm_init_hypercall_stubs();
+
+       switch (init_type) {
+       case XEN_HVM_INIT_COLD:
+               if (error != 0)
+                       return;
+
+               setup_xen_features();
+               break;
+       case XEN_HVM_INIT_RESUME:
+               if (error != 0)
+                       panic("Unable to init Xen hypercall stubs on resume");
+               break;
+       default:
+               panic("Unsupported HVM initialization type");
+       }
+
+       /* Clear any stale vcpu_info. */
+       CPU_FOREACH(i)
+               DPCPU_ID_SET(i, vcpu_info, NULL);
+
+       xen_vector_callback_enabled = 0;
+       xen_domain_type = XEN_HVM_DOMAIN;
+       xen_hvm_init_shared_info_page();
+       xen_hvm_set_callback(NULL);
+       xen_hvm_disable_emulated_devices();
+} 
+
 void
 xen_hvm_suspend(void)
 {
 }
 
 void
-xen_hvm_resume(void)
+xen_hvm_resume(bool suspend_cancelled)
 {
 
-       xen_hvm_init_hypercall_stubs();
-       xen_hvm_init_shared_info_page();
+       xen_hvm_init(suspend_cancelled ?
+           XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
+
+       /* Register vcpu_info area for CPU#0. */
+       xen_hvm_init_cpu();
 }
  
 static void
-xen_hvm_init(void *dummy __unused)
+xen_hvm_sysinit(void *arg __unused)
 {
+       xen_hvm_init(XEN_HVM_INIT_COLD);
+}
 
-       if (xen_hvm_init_hypercall_stubs() != 0)
-               return;
-
-       xen_domain_type = XEN_HVM_DOMAIN;
-       setup_xen_features();
-       xen_hvm_init_shared_info_page();
-       xen_hvm_set_callback(NULL);
-       xen_hvm_disable_emulated_devices();
-} 
-
-void xen_hvm_init_cpu(void)
+void
+xen_hvm_init_cpu(void)
 {
        struct vcpu_register_vcpu_info info;
        struct vcpu_info *vcpu_info;
        int cpu, rc;
 
-       cpu = PCPU_GET(acpi_id);
+       if (DPCPU_GET(vcpu_info) != NULL) {
+               /*
+                * vcpu_info is already set.  We're resuming
+                * from a failed migration and our pre-suspend
+                * configuration is still valid.
+                */
+               return;
+       }
+
        vcpu_info = DPCPU_PTR(vcpu_local_info);
+       cpu = PCPU_GET(acpi_id);
        info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT;
        info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info));
 
@@ -682,6 +741,8 @@ void xen_hvm_init_cpu(void)
                DPCPU_SET(vcpu_info, vcpu_info);
 }
 
-SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_init, NULL);
+SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, 
NULL);
+#ifdef SMP
 SYSINIT(xen_init_ipis, SI_SUB_SMP, SI_ORDER_FIRST, xen_init_ipis, NULL);
+#endif
 SYSINIT(xen_hvm_init_cpu, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_init_cpu, NULL);

Modified: head/sys/x86/xen/xen_intr.c
==============================================================================
--- head/sys/x86/xen/xen_intr.c Fri Sep 20 04:35:09 2013        (r255725)
+++ head/sys/x86/xen/xen_intr.c Fri Sep 20 05:06:03 2013        (r255726)
@@ -120,7 +120,7 @@ struct xenisrc {
 #define ARRAY_SIZE(a)  (sizeof(a) / sizeof(a[0]))
 
 static void    xen_intr_suspend(struct pic *);
-static void    xen_intr_resume(struct pic *);
+static void    xen_intr_resume(struct pic *, bool suspend_cancelled);
 static void    xen_intr_enable_source(struct intsrc *isrc);
 static void    xen_intr_disable_source(struct intsrc *isrc, int eoi);
 static void    xen_intr_eoi_source(struct intsrc *isrc);
@@ -334,7 +334,7 @@ xen_intr_release_isrc(struct xenisrc *is
        evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
        evtchn_cpu_unmask_port(0, isrc->xi_port);
 
-       if (isrc->xi_close != 0) {
+       if (isrc->xi_close != 0 && is_valid_evtchn(isrc->xi_port)) {
                struct evtchn_close close = { .port = isrc->xi_port };
                if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
                        panic("EVTCHNOP_close failed");
@@ -408,6 +408,7 @@ xen_intr_bind_isrc(struct xenisrc **isrc
                return (error);
        }
        *isrcp = isrc;
+       evtchn_unmask_port(local_port);
        return (0);
 }
 
@@ -571,6 +572,9 @@ xen_intr_init(void *dummy __unused)
        struct xen_intr_pcpu_data *pcpu;
        int i;
 
+       if (!xen_domain())
+               return (0);
+
        mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF);
 
        /*
@@ -602,20 +606,116 @@ xen_intr_suspend(struct pic *unused)
 {
 }
 
+static void
+xen_rebind_ipi(struct xenisrc *isrc)
+{
+#ifdef SMP
+       int cpu = isrc->xi_cpu;
+       int acpi_id = pcpu_find(cpu)->pc_acpi_id;
+       int error;
+       struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id };
+
+       error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+                                           &bind_ipi);
+       if (error != 0)
+               panic("unable to rebind xen IPI: %d", error);
+
+       isrc->xi_port = bind_ipi.port;
+       isrc->xi_cpu = 0;
+       xen_intr_port_to_isrc[bind_ipi.port] = isrc;
+
+       error = xen_intr_assign_cpu(&isrc->xi_intsrc,
+                                   cpu_apic_ids[cpu]);
+       if (error)
+               panic("unable to bind xen IPI to CPU#%d: %d",
+                     cpu, error);
+
+       evtchn_unmask_port(bind_ipi.port);
+#else
+       panic("Resume IPI event channel on UP");
+#endif
+}
+
+static void
+xen_rebind_virq(struct xenisrc *isrc)
+{
+       int cpu = isrc->xi_cpu;
+       int acpi_id = pcpu_find(cpu)->pc_acpi_id;
+       int error;
+       struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq,
+                                             .vcpu = acpi_id };
+
+       error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+                                           &bind_virq);
+       if (error != 0)
+               panic("unable to rebind xen VIRQ#%d: %d", isrc->xi_virq, error);
+
+       isrc->xi_port = bind_virq.port;
+       isrc->xi_cpu = 0;
+       xen_intr_port_to_isrc[bind_virq.port] = isrc;
+
+#ifdef SMP
+       error = xen_intr_assign_cpu(&isrc->xi_intsrc,
+                                   cpu_apic_ids[cpu]);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to