> Am 23.08.2019 um 13:05 schrieb Anup Patel <a...@brainfault.org>:
> 
>> On Fri, Aug 23, 2019 at 1:23 PM Alexander Graf <g...@amazon.com> wrote:
>> 
>>> On 22.08.19 10:46, Anup Patel wrote:
>>> From: Atish Patra <atish.pa...@wdc.com>
>>> 
>>> The RISC-V hypervisor specification doesn't have any virtual timer
>>> feature.
>>> 
>>> Due to this, the guest VCPU timer will be programmed via SBI calls.
>>> The host will use a separate hrtimer event for each guest VCPU to
>>> provide timer functionality. We inject a virtual timer interrupt to
>>> the guest VCPU whenever the guest VCPU hrtimer event expires.
>>> 
>>> The following features are not supported yet and will be added in
>>> future:
>>> 1. A time offset to adjust guest time from host time
>>> 2. A saved next event in guest vcpu for vm migration
>> 
>> Implementing these 2 bits right now should be trivial. Why wait?
> 
> We were waiting for HTIMEDELTA CSR to be merged so we
> deferred this items.
> 
>> 
>>> 
>>> Signed-off-by: Atish Patra <atish.pa...@wdc.com>
>>> Signed-off-by: Anup Patel <anup.pa...@wdc.com>
>>> Acked-by: Paolo Bonzini <pbonz...@redhat.com>
>>> Reviewed-by: Paolo Bonzini <pbonz...@redhat.com>
>>> ---
>>>  arch/riscv/include/asm/kvm_host.h       |   4 +
>>>  arch/riscv/include/asm/kvm_vcpu_timer.h |  32 +++++++
>>>  arch/riscv/kvm/Makefile                 |   2 +-
>>>  arch/riscv/kvm/vcpu.c                   |   6 ++
>>>  arch/riscv/kvm/vcpu_timer.c             | 106 ++++++++++++++++++++++++
>>>  drivers/clocksource/timer-riscv.c       |   8 ++
>>>  include/clocksource/timer-riscv.h       |  16 ++++
>>>  7 files changed, 173 insertions(+), 1 deletion(-)
>>>  create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h
>>>  create mode 100644 arch/riscv/kvm/vcpu_timer.c
>>>  create mode 100644 include/clocksource/timer-riscv.h
>>> 
>>> diff --git a/arch/riscv/include/asm/kvm_host.h 
>>> b/arch/riscv/include/asm/kvm_host.h
>>> index ab33e59a3d88..d2a2e45eefc0 100644
>>> --- a/arch/riscv/include/asm/kvm_host.h
>>> +++ b/arch/riscv/include/asm/kvm_host.h
>>> @@ -12,6 +12,7 @@
>>>  #include <linux/types.h>
>>>  #include <linux/kvm.h>
>>>  #include <linux/kvm_types.h>
>>> +#include <asm/kvm_vcpu_timer.h>
>>> 
>>>  #ifdef CONFIG_64BIT
>>>  #define KVM_MAX_VCPUS                       (1U << 16)
>>> @@ -167,6 +168,9 @@ struct kvm_vcpu_arch {
>>>      unsigned long irqs_pending;
>>>      unsigned long irqs_pending_mask;
>>> 
>>> +     /* VCPU Timer */
>>> +     struct kvm_vcpu_timer timer;
>>> +
>>>      /* MMIO instruction details */
>>>      struct kvm_mmio_decode mmio_decode;
>>> 
>>> diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h 
>>> b/arch/riscv/include/asm/kvm_vcpu_timer.h
>>> new file mode 100644
>>> index 000000000000..df67ea86988e
>>> --- /dev/null
>>> +++ b/arch/riscv/include/asm/kvm_vcpu_timer.h
>>> @@ -0,0 +1,32 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>> +/*
>>> + * Copyright (C) 2019 Western Digital Corporation or its affiliates.
>>> + *
>>> + * Authors:
>>> + *   Atish Patra <atish.pa...@wdc.com>
>>> + */
>>> +
>>> +#ifndef __KVM_VCPU_RISCV_TIMER_H
>>> +#define __KVM_VCPU_RISCV_TIMER_H
>>> +
>>> +#include <linux/hrtimer.h>
>>> +
>>> +#define VCPU_TIMER_PROGRAM_THRESHOLD_NS 1000
>>> +
>>> +struct kvm_vcpu_timer {
>>> +     bool init_done;
>>> +     /* Check if the timer is programmed */
>>> +     bool is_set;
>>> +     struct hrtimer hrt;
>>> +     /* Mult & Shift values to get nanosec from cycles */
>>> +     u32 mult;
>>> +     u32 shift;
>>> +};
>>> +
>>> +int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu);
>>> +int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
>>> +int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
>>> +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu,
>>> +                                 unsigned long ncycles);
>> 
>> This function never gets called?
> 
> It's called from SBI emulation.
> 
>> 
>>> +
>>> +#endif
>>> diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
>>> index c0f57f26c13d..3e0c7558320d 100644
>>> --- a/arch/riscv/kvm/Makefile
>>> +++ b/arch/riscv/kvm/Makefile
>>> @@ -9,6 +9,6 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm
>>>  kvm-objs := $(common-objs-y)
>>> 
>>>  kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
>>> -kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o
>>> +kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o vcpu_timer.o
>>> 
>>>  obj-$(CONFIG_KVM)   += kvm.o
>>> diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
>>> index 6124077d154f..018fca436776 100644
>>> --- a/arch/riscv/kvm/vcpu.c
>>> +++ b/arch/riscv/kvm/vcpu.c
>>> @@ -54,6 +54,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
>>> 
>>>      memcpy(cntx, reset_cntx, sizeof(*cntx));
>>> 
>>> +     kvm_riscv_vcpu_timer_reset(vcpu);
>>> +
>>>      WRITE_ONCE(vcpu->arch.irqs_pending, 0);
>>>      WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
>>>  }
>>> @@ -108,6 +110,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>>>      cntx->hstatus |= HSTATUS_SP2P;
>>>      cntx->hstatus |= HSTATUS_SPV;
>>> 
>>> +     /* Setup VCPU timer */
>>> +     kvm_riscv_vcpu_timer_init(vcpu);
>>> +
>>>      /* Reset VCPU */
>>>      kvm_riscv_reset_vcpu(vcpu);
>>> 
>>> @@ -116,6 +121,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>>> 
>>>  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>>>  {
>>> +     kvm_riscv_vcpu_timer_deinit(vcpu);
>>>      kvm_riscv_stage2_flush_cache(vcpu);
>>>      kmem_cache_free(kvm_vcpu_cache, vcpu);
>>>  }
>>> diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
>>> new file mode 100644
>>> index 000000000000..a45ca06e1aa6
>>> --- /dev/null
>>> +++ b/arch/riscv/kvm/vcpu_timer.c
>>> @@ -0,0 +1,106 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Copyright (C) 2019 Western Digital Corporation or its affiliates.
>>> + *
>>> + * Authors:
>>> + *     Atish Patra <atish.pa...@wdc.com>
>>> + */
>>> +
>>> +#include <linux/errno.h>
>>> +#include <linux/err.h>
>>> +#include <linux/kvm_host.h>
>>> +#include <clocksource/timer-riscv.h>
>>> +#include <asm/csr.h>
>>> +#include <asm/kvm_vcpu_timer.h>
>>> +
>>> +static enum hrtimer_restart kvm_riscv_vcpu_hrtimer_expired(struct hrtimer 
>>> *h)
>>> +{
>>> +     struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, 
>>> hrt);
>>> +     struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer);
>>> +
>>> +     t->is_set = false;
>>> +     kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_TIMER);
>>> +
>>> +     return HRTIMER_NORESTART;
>>> +}
>>> +
>>> +static u64 kvm_riscv_delta_cycles2ns(u64 cycles, struct kvm_vcpu_timer *t)
>>> +{
>>> +     unsigned long flags;
>>> +     u64 cycles_now, cycles_delta, delta_ns;
>>> +
>>> +     local_irq_save(flags);
>>> +     cycles_now = get_cycles64();
>>> +     if (cycles_now < cycles)
>>> +             cycles_delta = cycles - cycles_now;
>>> +     else
>>> +             cycles_delta = 0;
>>> +     delta_ns = (cycles_delta * t->mult) >> t->shift;
>>> +     local_irq_restore(flags);
>>> +
>>> +     return delta_ns;
>>> +}
>>> +
>>> +static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t)
>>> +{
>>> +     if (!t->init_done || !t->is_set)
>>> +             return -EINVAL;
>>> +
>>> +     hrtimer_cancel(&t->hrt);
>>> +     t->is_set = false;
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu,
>>> +                                 unsigned long ncycles)
>>> +{
>>> +     struct kvm_vcpu_timer *t = &vcpu->arch.timer;
>>> +     u64 delta_ns = kvm_riscv_delta_cycles2ns(ncycles, t);
>> 
>> ... in fact, I feel like I'm missing something obvious here. How does
>> the guest trigger the timer event? What is the argument it uses for that
>> and how does that play with the tbfreq in the earlier patch?
> 
> We have SBI call inferface between Hypervisor and Guest. One of the
> SBI call allows Guest to program time event. The next event is specified
> as absolute cycles. The Guest can read time using TIME CSR which
> returns system timer value (@ tbfreq freqency).
> 
> Guest Linux will know the tbfreq from DTB passed by QEMU/KVMTOOL
> and it has to be same as Host tbfreq.
> 
> The TBFREQ config register visible to user-space is a read-only CONFIG
> register which tells user-space tools (QEMU/KVMTOOL) about Host tbfreq.

And it's read-only because you can not trap on TB reads?

Alex

> 
> Regards,
> Anup
> 
>> 
>> 
>> Alex
>> 

Reply via email to