The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=6c69c6bb4c7ffde48b130df707799d4eca21ca9f

commit 6c69c6bb4c7ffde48b130df707799d4eca21ca9f
Author:     Adam Fenn <[email protected]>
AuthorDate: 2021-08-04 15:42:48 +0000
Commit:     Konstantin Belousov <[email protected]>
CommitDate: 2021-08-14 12:57:54 +0000

    kvm_clock: KVM paravirtual clock support
    
    Add support for the KVM paravirtual clock device.
    
    Sponsored by:   Juniper Networks, Inc.
    Sponsored by:   Klara, Inc.
    Reviewed by:    kib
    Differential Revision:  https://reviews.freebsd.org/D29733
---
 sys/amd64/conf/GENERIC        |   3 +
 sys/amd64/conf/MINIMAL        |   3 +
 sys/amd64/conf/NOTES          |   3 +
 sys/conf/files.x86            |   3 +-
 sys/dev/kvm_clock/kvm_clock.c | 240 ++++++++++++++++++++++++++++++++++++++++++
 sys/i386/conf/GENERIC         |   3 +
 sys/i386/conf/MINIMAL         |   3 +
 sys/i386/conf/NOTES           |   3 +
 sys/x86/include/kvm.h         |  80 ++++++++++++++
 9 files changed, 340 insertions(+), 1 deletion(-)

diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index f7b41919575d..78b5e0e299f6 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -376,6 +376,9 @@ device              virtio_blk              # VirtIO Block 
device
 device         virtio_scsi             # VirtIO SCSI device
 device         virtio_balloon          # VirtIO Memory Balloon device
 
+# Linux KVM paravirtualization support
+device         kvm_clock               # KVM paravirtual clock driver
+
 # HyperV drivers and enhancement support
 device         hyperv                  # HyperV drivers 
 
diff --git a/sys/amd64/conf/MINIMAL b/sys/amd64/conf/MINIMAL
index 14f91e6c8eaf..f724cbd2e3f1 100644
--- a/sys/amd64/conf/MINIMAL
+++ b/sys/amd64/conf/MINIMAL
@@ -131,6 +131,9 @@ device              ether                   # Ethernet 
support
 # Note that 'bpf' is required for DHCP.
 device         bpf                     # Berkeley packet filter
 
+# Linux KVM paravirtualization support
+device         kvm_clock               # KVM paravirtual clock driver
+
 # Xen HVM Guest Optimizations
 # NOTE: XENHVM depends on xenpci and xentimer.
 # They must be added or removed together.
diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES
index 501ceaedb222..c1db8ef7512f 100644
--- a/sys/amd64/conf/NOTES
+++ b/sys/amd64/conf/NOTES
@@ -498,6 +498,9 @@ device              virtio_balloon  # VirtIO Memory Balloon 
device
 device         virtio_random   # VirtIO Entropy device
 device         virtio_console  # VirtIO Console device
 
+# Linux KVM paravirtualization support
+device         kvm_clock       # KVM paravirtual clock driver
+
 # Microsoft Hyper-V enhancement support
 device                 hyperv          # HyperV drivers
 
diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
index d1fc234c4182..d0cda2da8580 100644
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -263,6 +263,7 @@ dev/isci/scil/scif_sas_task_request_state_handlers.c        
        optional isci
 dev/isci/scil/scif_sas_task_request_states.c                   optional isci
 dev/isci/scil/scif_sas_timer.c                                 optional isci
 dev/itwd/itwd.c                                                        
optional itwd
+dev/kvm_clock/kvm_clock.c      optional        kvm_clock
 dev/qat/qat.c                  optional qat
 dev/qat/qat_ae.c               optional qat
 dev/qat/qat_c2xxx.c            optional qat
@@ -318,7 +319,7 @@ x86/x86/x86_mem.c           optional        mem
 x86/x86/mp_x86.c               optional        smp
 x86/x86/mp_watchdog.c          optional        mp_watchdog smp
 x86/x86/nexus.c                        standard
-x86/x86/pvclock.c              optional        xenhvm
+x86/x86/pvclock.c              optional        kvm_clock | xenhvm
 x86/x86/stack_machdep.c                optional        ddb | stack
 x86/x86/tsc.c                  standard
 x86/x86/ucode.c                        standard
diff --git a/sys/dev/kvm_clock/kvm_clock.c b/sys/dev/kvm_clock/kvm_clock.c
new file mode 100644
index 000000000000..1a76432e417d
--- /dev/null
+++ b/sys/dev/kvm_clock/kvm_clock.c
@@ -0,0 +1,240 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2014 Bryan Venteicher <[email protected]>
+ * Copyright (c) 2021 Mathieu Chouquet-Stringer
+ * Copyright (c) 2021 Juniper Networks, Inc.
+ * Copyright (c) 2021 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Linux KVM paravirtual clock support
+ *
+ * References:
+ *     - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
+ *     - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/domainset.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/smp.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+
+#include <machine/pvclock.h>
+#include <x86/kvm.h>
+
+#include "clock_if.h"
+
+#define        KVM_CLOCK_DEVNAME               "kvmclock"
+/*
+ * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's
+ * default value of 800, and (3) below the TSC's value when it supports the
+ * "Invariant TSC" feature and is believed to be synchronized across all CPUs.
+ */
+#define        KVM_CLOCK_TC_QUALITY            975
+
+struct kvm_clock_softc {
+       struct pvclock                   pvc;
+       struct pvclock_wall_clock        wc;
+       struct pvclock_vcpu_time_info   *timeinfos;
+       u_int                            msr_tc;
+       u_int                            msr_wc;
+};
+
+static devclass_t      kvm_clock_devclass;
+
+static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg);
+static void    kvm_clock_system_time_enable(struct kvm_clock_softc *sc);
+static void    kvm_clock_system_time_enable_pcpu(void *arg);
+
+static struct pvclock_wall_clock *
+kvm_clock_get_wallclock(void *arg)
+{
+       struct kvm_clock_softc *sc = arg;
+
+       wrmsr(sc->msr_wc, vtophys(&sc->wc));
+       return (&sc->wc);
+}
+
+static void
+kvm_clock_system_time_enable(struct kvm_clock_softc *sc)
+{
+       smp_rendezvous(NULL, kvm_clock_system_time_enable_pcpu, NULL, sc);
+}
+
+static void
+kvm_clock_system_time_enable_pcpu(void *arg)
+{
+       struct kvm_clock_softc *sc = arg;
+
+       /*
+        * See [2]; the lsb of this MSR is the system time enable bit.
+        */
+       wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
+}
+
+static void
+kvm_clock_identify(driver_t *driver, device_t parent)
+{
+       u_int regs[4];
+
+       kvm_cpuid_get_features(regs);
+       if ((regs[0] &
+           (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0)
+               return;
+       if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1))
+               return;
+       BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0);
+}
+
+static int
+kvm_clock_probe(device_t dev)
+{
+       device_set_desc(dev, "KVM paravirtual clock");
+       return (BUS_PROBE_DEFAULT);
+}
+
+static int
+kvm_clock_attach(device_t dev)
+{
+       u_int regs[4];
+       struct kvm_clock_softc *sc = device_get_softc(dev);
+       bool stable_flag_supported;
+
+       /* Process KVM "features" CPUID leaf content: */
+       kvm_cpuid_get_features(regs);
+       if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) {
+               sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW;
+               sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW;
+       } else {
+               KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0,
+                   ("Clocksource feature flags disappeared since "
+                   "kvm_clock_identify: regs[0] %#0x.", regs[0]));
+               sc->msr_tc = KVM_MSR_SYSTEM_TIME;
+               sc->msr_wc = KVM_MSR_WALL_CLOCK;
+       }
+       stable_flag_supported =
+           (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0;
+
+       /* Set up 'struct pvclock_vcpu_time_info' page(s): */
+       sc->timeinfos = (struct pvclock_vcpu_time_info *)kmem_malloc(mp_ncpus *
+           sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO);
+       kvm_clock_system_time_enable(sc);
+
+       /*
+        * Init pvclock; register KVM clock wall clock, register KVM clock
+        * timecounter, and set up the requisite infrastructure for vDSO access
+        * to this timecounter.
+        *     Regarding 'tc_flags': Since the KVM MSR documentation does not
+        *     specifically discuss suspend/resume scenarios, conservatively
+        *     leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system
+        *     time must be re-inited in such cases.
+        */
+       sc->pvc.get_wallclock = kvm_clock_get_wallclock;
+       sc->pvc.get_wallclock_arg = sc;
+       sc->pvc.timeinfos = sc->timeinfos;
+       sc->pvc.stable_flag_supported = stable_flag_supported;
+       pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0);
+       return (0);
+}
+
+static int
+kvm_clock_detach(device_t dev)
+{
+       struct kvm_clock_softc *sc = device_get_softc(dev);
+
+       return (pvclock_destroy(&sc->pvc));
+}
+
+static int
+kvm_clock_suspend(device_t dev)
+{
+       return (0);
+}
+
+static int
+kvm_clock_resume(device_t dev)
+{
+       /*
+        * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE';
+        * conservatively assume that the system time must be re-inited in
+        * suspend/resume scenarios.
+        */
+       kvm_clock_system_time_enable(device_get_softc(dev));
+       pvclock_resume();
+       inittodr(time_second);
+       return (0);
+}
+
+static int
+kvm_clock_gettime(device_t dev, struct timespec *ts)
+{
+       struct kvm_clock_softc *sc = device_get_softc(dev);
+
+       pvclock_gettime(&sc->pvc, ts);
+       return (0);
+}
+
+static int
+kvm_clock_settime(device_t dev, struct timespec *ts)
+{
+       /*
+        * Even though it is not possible to set the KVM clock's wall clock, to
+        * avoid the possibility of periodic benign error messages from
+        * 'settime_task_func()', report success rather than, e.g., 'ENODEV'.
+        */
+       return (0);
+}
+
+static device_method_t kvm_clock_methods[] = {
+       DEVMETHOD(device_identify,      kvm_clock_identify),
+       DEVMETHOD(device_probe,         kvm_clock_probe),
+       DEVMETHOD(device_attach,        kvm_clock_attach),
+       DEVMETHOD(device_detach,        kvm_clock_detach),
+       DEVMETHOD(device_suspend,       kvm_clock_suspend),
+       DEVMETHOD(device_resume,        kvm_clock_resume),
+       /* clock interface */
+       DEVMETHOD(clock_gettime,        kvm_clock_gettime),
+       DEVMETHOD(clock_settime,        kvm_clock_settime),
+
+       DEVMETHOD_END
+};
+
+static driver_t kvm_clock_driver = {
+       KVM_CLOCK_DEVNAME,
+       kvm_clock_methods,
+       sizeof(struct kvm_clock_softc),
+};
+
+DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, kvm_clock_devclass, 0, 0);
diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC
index 86c062effd81..5447c452c4f7 100644
--- a/sys/i386/conf/GENERIC
+++ b/sys/i386/conf/GENERIC
@@ -337,6 +337,9 @@ device              virtio_blk              # VirtIO Block 
device
 device         virtio_scsi             # VirtIO SCSI device
 device         virtio_balloon          # VirtIO Memory Balloon device
 
+# Linux KVM paravirtualization support
+device         kvm_clock               # KVM paravirtual clock driver
+
 # HyperV drivers and enhancement support
 # NOTE: HYPERV depends on hyperv.  They must be added or removed together.
 options        HYPERV                  # Kernel support for HyperV drivers
diff --git a/sys/i386/conf/MINIMAL b/sys/i386/conf/MINIMAL
index 37b8e074ac65..9d735dbb0580 100644
--- a/sys/i386/conf/MINIMAL
+++ b/sys/i386/conf/MINIMAL
@@ -145,6 +145,9 @@ device              gif                     # IPv6 and IPv4 
tunneling
 # Note that 'bpf' is required for DHCP.
 device         bpf                     # Berkeley packet filter
 
+# Linux KVM paravirtualization support
+device         kvm_clock               # KVM paravirtual clock driver
+
 # Xen HVM Guest Optimizations
 # NOTE: XENHVM depends on xenpci.  They must be added or removed together.
 options        XENHVM                  # Xen HVM kernel infrastructure
diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES
index df9ff4fb3aed..27cebf0c48a0 100644
--- a/sys/i386/conf/NOTES
+++ b/sys/i386/conf/NOTES
@@ -719,6 +719,9 @@ device              virtio_balloon  # VirtIO Memory Balloon 
device
 device         virtio_random   # VirtIO Entropy device
 device         virtio_console  # VirtIO Console device
 
+# Linux KVM paravirtualization support
+device                 kvm_clock       # KVM paravirtual clock driver
+
 options        HYPERV
 device         hyperv          # HyperV drivers
 
diff --git a/sys/x86/include/kvm.h b/sys/x86/include/kvm.h
new file mode 100644
index 000000000000..beec6447d7d6
--- /dev/null
+++ b/sys/x86/include/kvm.h
@@ -0,0 +1,80 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2014 Bryan Venteicher <[email protected]>
+ * Copyright (c) 2021 Mathieu Chouquet-Stringer
+ * Copyright (c) 2021 Juniper Networks, Inc.
+ * Copyright (c) 2021 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Linux KVM paravirtualization: common definitions
+ *
+ * References:
+ *     - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
+ *     - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
+ */
+
+#ifndef _X86_KVM_H_
+#define        _X86_KVM_H_
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <machine/md_var.h>
+
+#define        KVM_CPUID_SIGNATURE                     0x40000000
+#define        KVM_CPUID_FEATURES_LEAF                 0x40000001
+
+#define        KVM_FEATURE_CLOCKSOURCE                 0x00000001
+#define        KVM_FEATURE_CLOCKSOURCE2                0x00000008
+#define        KVM_FEATURE_CLOCKSOURCE_STABLE_BIT      0x01000000
+
+/* Deprecated: for the CLOCKSOURCE feature. */
+#define        KVM_MSR_WALL_CLOCK                      0x11
+#define        KVM_MSR_SYSTEM_TIME                     0x12
+
+#define        KVM_MSR_WALL_CLOCK_NEW                  0x4b564d00
+#define        KVM_MSR_SYSTEM_TIME_NEW                 0x4b564d01
+
+static inline bool
+kvm_cpuid_features_leaf_supported(void)
+{
+       return (vm_guest == VM_GUEST_KVM &&
+           KVM_CPUID_FEATURES_LEAF > hv_base &&
+           KVM_CPUID_FEATURES_LEAF <= hv_high);
+}
+
+static inline void
+kvm_cpuid_get_features(u_int *regs)
+{
+       if (!kvm_cpuid_features_leaf_supported())
+               regs[0] = regs[1] = regs[2] = regs[3] = 0;
+       else
+               do_cpuid(KVM_CPUID_FEATURES_LEAF, regs);
+}
+
+#endif /* !_X86_KVM_H_ */
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to