On Fri, Aug 23, 2019 at 5:03 PM Graf (AWS), Alexander <g...@amazon.com> wrote:
>
>
>
> > Am 23.08.2019 um 13:05 schrieb Anup Patel <a...@brainfault.org>:
> >
> >> On Fri, Aug 23, 2019 at 1:23 PM Alexander Graf <g...@amazon.com> wrote:
> >>
> >>> On 22.08.19 10:46, Anup Patel wrote:
> >>> From: Atish Patra <atish.pa...@wdc.com>
> >>>
> >>> The RISC-V hypervisor specification doesn't have any virtual timer
> >>> feature.
> >>>
> >>> Due to this, the guest VCPU timer will be programmed via SBI calls.
> >>> The host will use a separate hrtimer event for each guest VCPU to
> >>> provide timer functionality. We inject a virtual timer interrupt to
> >>> the guest VCPU whenever the guest VCPU hrtimer event expires.
> >>>
> >>> The following features are not supported yet and will be added in
> >>> future:
> >>> 1. A time offset to adjust guest time from host time
> >>> 2. A saved next event in guest vcpu for vm migration
> >>
> >> Implementing these 2 bits right now should be trivial. Why wait?
> >
> > We were waiting for HTIMEDELTA CSR to be merged so we
> > deferred this items.
> >
> >>
> >>>
> >>> Signed-off-by: Atish Patra <atish.pa...@wdc.com>
> >>> Signed-off-by: Anup Patel <anup.pa...@wdc.com>
> >>> Acked-by: Paolo Bonzini <pbonz...@redhat.com>
> >>> Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
> >>> ---
> >>>  arch/riscv/include/asm/kvm_host.h       |   4 +
> >>>  arch/riscv/include/asm/kvm_vcpu_timer.h |  32 +++++++
> >>>  arch/riscv/kvm/Makefile                 |   2 +-
> >>>  arch/riscv/kvm/vcpu.c                   |   6 ++
> >>>  arch/riscv/kvm/vcpu_timer.c             | 106 ++++++++++++++++++++++++
> >>>  drivers/clocksource/timer-riscv.c       |   8 ++
> >>>  include/clocksource/timer-riscv.h       |  16 ++++
> >>>  7 files changed, 173 insertions(+), 1 deletion(-)
> >>>  create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h
> >>>  create mode 100644 arch/riscv/kvm/vcpu_timer.c
> >>>  create mode 100644 include/clocksource/timer-riscv.h
> >>>
> >>> diff --git a/arch/riscv/include/asm/kvm_host.h 
> >>> b/arch/riscv/include/asm/kvm_host.h
> >>> index ab33e59a3d88..d2a2e45eefc0 100644
> >>> --- a/arch/riscv/include/asm/kvm_host.h
> >>> +++ b/arch/riscv/include/asm/kvm_host.h
> >>> @@ -12,6 +12,7 @@
> >>>  #include <linux/types.h>
> >>>  #include <linux/kvm.h>
> >>>  #include <linux/kvm_types.h>
> >>> +#include <asm/kvm_vcpu_timer.h>
> >>>
> >>>  #ifdef CONFIG_64BIT
> >>>  #define KVM_MAX_VCPUS                       (1U << 16)
> >>> @@ -167,6 +168,9 @@ struct kvm_vcpu_arch {
> >>>      unsigned long irqs_pending;
> >>>      unsigned long irqs_pending_mask;
> >>>
> >>> +     /* VCPU Timer */
> >>> +     struct kvm_vcpu_timer timer;
> >>> +
> >>>      /* MMIO instruction details */
> >>>      struct kvm_mmio_decode mmio_decode;
> >>>
> >>> diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h 
> >>> b/arch/riscv/include/asm/kvm_vcpu_timer.h
> >>> new file mode 100644
> >>> index 000000000000..df67ea86988e
> >>> --- /dev/null
> >>> +++ b/arch/riscv/include/asm/kvm_vcpu_timer.h
> >>> @@ -0,0 +1,32 @@
> >>> +/* SPDX-License-Identifier: GPL-2.0-only */
> >>> +/*
> >>> + * Copyright (C) 2019 Western Digital Corporation or its affiliates.
> >>> + *
> >>> + * Authors:
> >>> + *   Atish Patra <atish.pa...@wdc.com>
> >>> + */
> >>> +
> >>> +#ifndef __KVM_VCPU_RISCV_TIMER_H
> >>> +#define __KVM_VCPU_RISCV_TIMER_H
> >>> +
> >>> +#include <linux/hrtimer.h>
> >>> +
> >>> +#define VCPU_TIMER_PROGRAM_THRESHOLD_NS 1000
> >>> +
> >>> +struct kvm_vcpu_timer {
> >>> +     bool init_done;
> >>> +     /* Check if the timer is programmed */
> >>> +     bool is_set;
> >>> +     struct hrtimer hrt;
> >>> +     /* Mult & Shift values to get nanosec from cycles */
> >>> +     u32 mult;
> >>> +     u32 shift;
> >>> +};
> >>> +
> >>> +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu);
> >>> +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
> >>> +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
> >>> +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu,
> >>> +                                 unsigned long ncycles);
> >>
> >> This function never gets called?
> >
> > It's called from SBI emulation.
> >
> >>
> >>> +
> >>> +#endif
> >>> diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
> >>> index c0f57f26c13d..3e0c7558320d 100644
> >>> --- a/arch/riscv/kvm/Makefile
> >>> +++ b/arch/riscv/kvm/Makefile
> >>> @@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
> >>>  kvm-objs := $(common-objs-y)
> >>>
> >>>  kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
> >>> -kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o
> >>> +kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o
> >>>
> >>>  obj-$(CONFIG_KVM)   += kvm.o
> >>> diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
> >>> index 6124077d154f..018fca436776 100644
> >>> --- a/arch/riscv/kvm/vcpu.c
> >>> +++ b/arch/riscv/kvm/vcpu.c
> >>> @@ -54,6 +54,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
> >>>
> >>>      memcpy(cntx, reset_cntx, sizeof(*cntx));
> >>>
> >>> +     kvm_riscv_vcpu_timer_reset(vcpu);
> >>> +
> >>>      WRITE_ONCE(vcpu->arch.irqs_pending, 0);
> >>>      WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
> >>>  }
> >>> @@ -108,6 +110,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >>>      cntx->hstatus |= HSTATUS_SP2P;
> >>>      cntx->hstatus |= HSTATUS_SPV;
> >>>
> >>> +     /* Setup VCPU timer */
> >>> +     kvm_riscv_vcpu_timer_init(vcpu);
> >>> +
> >>>      /* Reset VCPU */
> >>>      kvm_riscv_reset_vcpu(vcpu);
> >>>
> >>> @@ -116,6 +121,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >>>
> >>>  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
> >>>  {
> >>> +     kvm_riscv_vcpu_timer_deinit(vcpu);
> >>>      kvm_riscv_stage2_flush_cache(vcpu);
> >>>      kmem_cache_free(kvm_vcpu_cache, vcpu);
> >>>  }
> >>> diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
> >>> new file mode 100644
> >>> index 000000000000..a45ca06e1aa6
> >>> --- /dev/null
> >>> +++ b/arch/riscv/kvm/vcpu_timer.c
> >>> @@ -0,0 +1,106 @@
> >>> +// SPDX-License-Identifier: GPL-2.0
> >>> +/*
> >>> + * Copyright (C) 2019 Western Digital Corporation or its affiliates.
> >>> + *
> >>> + * Authors:
> >>> + *     Atish Patra <atish.pa...@wdc.com>
> >>> + */
> >>> +
> >>> +#include <linux/errno.h>
> >>> +#include <linux/err.h>
> >>> +#include <linux/kvm_host.h>
> >>> +#include <clocksource/timer-riscv.h>
> >>> +#include <asm/csr.h>
> >>> +#include <asm/kvm_vcpu_timer.h>
> >>> +
> >>> +static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct 
> >>> hrtimer *h)
> >>> +{
> >>> +     struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, 
> >>> hrt);
> >>> +     struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, 
> >>> arch.timer);
> >>> +
> >>> +     t->is_set = false;
> >>> +     kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_TIMER);
> >>> +
> >>> +     return HRTIMER_NORESTART;
> >>> +}
> >>> +
> >>> +static u64 kvm_riscv_delta_cycles2ns(u64 cycles, struct kvm_vcpu_timer 
> >>> *t)
> >>> +{
> >>> +     unsigned long flags;
> >>> +     u64 cycles_now, cycles_delta, delta_ns;
> >>> +
> >>> +     local_irq_save(flags);
> >>> +     cycles_now = get_cycles64();
> >>> +     if (cycles_now < cycles)
> >>> +             cycles_delta = cycles - cycles_now;
> >>> +     else
> >>> +             cycles_delta = 0;
> >>> +     delta_ns = (cycles_delta * t->mult) >> t->shift;
> >>> +     local_irq_restore(flags);
> >>> +
> >>> +     return delta_ns;
> >>> +}
> >>> +
> >>> +static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t)
> >>> +{
> >>> +     if (!t->init_done || !t->is_set)
> >>> +             return -EINVAL;
> >>> +
> >>> +     hrtimer_cancel(&t->hrt);
> >>> +     t->is_set = false;
> >>> +
> >>> +     return 0;
> >>> +}
> >>> +
> >>> +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu,
> >>> +                                 unsigned long ncycles)
> >>> +{
> >>> +     struct kvm_vcpu_timer *t = &vcpu->arch.timer;
> >>> +     u64 delta_ns = kvm_riscv_delta_cycles2ns(ncycles, t);
> >>
> >> ... in fact, I feel like I'm missing something obvious here. How does
> >> the guest trigger the timer event? What is the argument it uses for that
> >> and how does that play with the tbfreq in the earlier patch?
> >
> > We have SBI call inferface between Hypervisor and Guest. One of the
> > SBI call allows Guest to program time event. The next event is specified
> > as absolute cycles. The Guest can read time using TIME CSR which
> > returns system timer value (@ tbfreq freqency).
> >
> > Guest Linux will know the tbfreq from DTB passed by QEMU/KVMTOOL
> > and it has to be same as Host tbfreq.
> >
> > The TBFREQ config register visible to user-space is a read-only CONFIG
> > register which tells user-space tools (QEMU/KVMTOOL) about Host tbfreq.
>
> And it's read-only because you can not trap on TB reads?

There is no TB registers.

The tbfreq can only be know through DT/ACPI kind-of HW description
for both Host and Guest.

The KVM user-space tool needs to know TBFREQ so that it can set correct
value in generated DT for Guest Linux.

Regards,
Anup

>
> Alex
>
> >
> > Regards,
> > Anup
> >
> >>
> >>
> >> Alex
> >>

Reply via email to