Re: [PATCH 9/9] arm64: Retrieve stolen time as paravirtualized guest

2019-08-12 Thread Zenghui Yu

On 2019/8/12 18:39, Steven Price wrote:

On 09/08/2019 14:51, Zenghui Yu wrote:
[...]

Hi Steven,

Since userspace is not involved yet (right?), no one will create the
PV_TIME device for guest (and no one will specify the IPA of the shared
stolen time region), and I guess we will get a "not supported" error
here.

So what should we do if we want to test this series now?  Any userspace
tools?  If no, do you have any plans for userspace developing? ;-)


At the moment I have the following patch to kvmtool which creates the
PV_TIME device - this isn't in a state to go upstream, and Marc has
asked that I rework the memory allocation, so this will need to change.

It's a little ugly as it simply reserves the first page of RAM to use
for the PV time structures.


Thanks for sharing the code. It's good enough to show what is required
in user-space.

(I'm not familiar with kvmtool. I will first take some time to move the
steal time part to Qemu and see what will happen.)


Thanks,
zenghui


8<
diff --git a/Makefile b/Makefile
index 3862112..a79956b 100644
--- a/Makefile
+++ b/Makefile
@@ -158,7 +158,7 @@ endif
  # ARM
  OBJS_ARM_COMMON   := arm/fdt.o arm/gic.o arm/gicv2m.o 
arm/ioport.o \
   arm/kvm.o arm/kvm-cpu.o arm/pci.o arm/timer.o \
-  arm/pmu.o
+  arm/pmu.o arm/pvtime.o
  HDRS_ARM_COMMON   := arm/include
  ifeq ($(ARCH), arm)
DEFINES += -DCONFIG_ARM
diff --git a/arm/fdt.c b/arm/fdt.c
index c80e6da..19eccbc 100644
--- a/arm/fdt.c
+++ b/arm/fdt.c
@@ -119,6 +119,7 @@ static int setup_fdt(struct kvm *kvm)
  
  	/* Create new tree without a reserve map */

_FDT(fdt_create(fdt, FDT_MAX_SIZE));
+   _FDT(fdt_add_reservemap_entry(fdt, kvm->arch.memory_guest_start, 4096));
_FDT(fdt_finish_reservemap(fdt));
  
  	/* Header */

diff --git a/arm/kvm.c b/arm/kvm.c
index 1f85fc6..8bbfef1 100644
--- a/arm/kvm.c
+++ b/arm/kvm.c
@@ -11,6 +11,8 @@
  #include 
  #include 
  
+int pvtime_create(struct kvm *kvm);

+
  struct kvm_ext kvm_req_ext[] = {
{ DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) },
{ DEFINE_KVM_EXT(KVM_CAP_ONE_REG) },
@@ -86,6 +88,10 @@ void kvm__arch_init(struct kvm *kvm, const char 
*hugetlbfs_path, u64 ram_size)
/* Create the virtual GIC. */
if (gic__create(kvm, kvm->cfg.arch.irqchip))
die("Failed to create virtual GIC");
+
+   /* Setup PV time */
+   if (pvtime_create(kvm))
+   die("Failed to initialise PV time");
  }
  
  #define FDT_ALIGN	SZ_2M

diff --git a/arm/pvtime.c b/arm/pvtime.c
new file mode 100644
index 000..abcaab3
--- /dev/null
+++ b/arm/pvtime.c
@@ -0,0 +1,77 @@
+#include "kvm/kvm.h"
+
+#define KVM_DEV_TYPE_ARM_PV_TIME (KVM_DEV_TYPE_ARM_VGIC_ITS+2)
+
+/* Device Control API: PV_TIME */
+#define KVM_DEV_ARM_PV_TIME_PADDR  0
+#define KVM_DEV_ARM_PV_TIME_FREQUENCY  3
+
+#define KVM_DEV_ARM_PV_TIME_ST 0
+#define KVM_DEV_ARM_PV_TIME_LPT1
+
+static int pvtime_fd;
+
+int pvtime_create(struct kvm *kvm);
+
+int pvtime_create(struct kvm *kvm)
+{
+   int err;
+   u64 lpt_paddr = 0x1000;
+   u64 st_paddr = lpt_paddr + 4096;
+   u32 frequency = 100 * 1000 * 1000;
+
+   printf("lpt_paddr=%llx\n", lpt_paddr);
+
+   struct kvm_create_device pvtime_device = {
+   .type = KVM_DEV_TYPE_ARM_PV_TIME,
+   .flags = 0,
+   };
+
+   err = ioctl(kvm->vm_fd, KVM_CREATE_DEVICE, &pvtime_device);
+   if (err) {
+   printf("Failed to create PV device\n");
+   return 0;
+   }
+
+   pvtime_fd = pvtime_device.fd;
+
+   struct kvm_device_attr lpt_base = {
+   .group = KVM_DEV_ARM_PV_TIME_PADDR,
+   .attr = KVM_DEV_ARM_PV_TIME_LPT,
+   .addr = (u64)(unsigned long)&lpt_paddr
+   };
+   struct kvm_device_attr st_base = {
+   .group = KVM_DEV_ARM_PV_TIME_PADDR,
+   .attr = KVM_DEV_ARM_PV_TIME_ST,
+   .addr = (u64)(unsigned long)&st_paddr
+   };
+
+   struct kvm_device_attr lpt_freq = {
+   .group = KVM_DEV_ARM_PV_TIME_FREQUENCY,
+   .attr = KVM_DEV_ARM_PV_TIME_LPT,
+   .addr = (u64)(unsigned long)&frequency
+   };
+
+   err = ioctl(pvtime_fd, KVM_SET_DEVICE_ATTR, &lpt_base);
+   if (err) {
+   perror("ioctl lpt_base failed");
+   printf("Ignoring LPT...\n");
+   }
+   err = ioctl(pvtime_fd, KVM_SET_DEVICE_ATTR, &st_base);
+   if (err) {
+   perror("ioctl st_base failed");
+   goto out_err;
+   }
+   err = ioctl(pvtime_fd, KVM_SET_DEVICE_ATTR, &lpt_freq);
+   if (err) {
+   perror("ioctl lpt_freq failed");
+   printf("Ignoring LPT...\n");
+   }
+
+   printf("PV time setup\n");
+
+   return 0;
+out_err:
+   close(pvtime_fd);
+   return err;
+}



Re: [PATCH 9/9] arm64: Retrieve stolen time as paravirtualized guest

2019-08-12 Thread Steven Price
On 09/08/2019 14:51, Zenghui Yu wrote:
[...]
> Hi Steven,
> 
> Since userspace is not involved yet (right?), no one will create the
> PV_TIME device for guest (and no one will specify the IPA of the shared
> stolen time region), and I guess we will get a "not supported" error
> here.
> 
> So what should we do if we want to test this series now?  Any userspace
> tools?  If no, do you have any plans for userspace developing? ;-)

At the moment I have the following patch to kvmtool which creates the
PV_TIME device - this isn't in a state to go upstream, and Marc has
asked that I rework the memory allocation, so this will need to change.

It's a little ugly as it simply reserves the first page of RAM to use
for the PV time structures.

8<
diff --git a/Makefile b/Makefile
index 3862112..a79956b 100644
--- a/Makefile
+++ b/Makefile
@@ -158,7 +158,7 @@ endif
 # ARM
 OBJS_ARM_COMMON:= arm/fdt.o arm/gic.o arm/gicv2m.o 
arm/ioport.o \
   arm/kvm.o arm/kvm-cpu.o arm/pci.o arm/timer.o \
-  arm/pmu.o
+  arm/pmu.o arm/pvtime.o
 HDRS_ARM_COMMON:= arm/include
 ifeq ($(ARCH), arm)
DEFINES += -DCONFIG_ARM
diff --git a/arm/fdt.c b/arm/fdt.c
index c80e6da..19eccbc 100644
--- a/arm/fdt.c
+++ b/arm/fdt.c
@@ -119,6 +119,7 @@ static int setup_fdt(struct kvm *kvm)
 
/* Create new tree without a reserve map */
_FDT(fdt_create(fdt, FDT_MAX_SIZE));
+   _FDT(fdt_add_reservemap_entry(fdt, kvm->arch.memory_guest_start, 4096));
_FDT(fdt_finish_reservemap(fdt));
 
/* Header */
diff --git a/arm/kvm.c b/arm/kvm.c
index 1f85fc6..8bbfef1 100644
--- a/arm/kvm.c
+++ b/arm/kvm.c
@@ -11,6 +11,8 @@
 #include 
 #include 
 
+int pvtime_create(struct kvm *kvm);
+
 struct kvm_ext kvm_req_ext[] = {
{ DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) },
{ DEFINE_KVM_EXT(KVM_CAP_ONE_REG) },
@@ -86,6 +88,10 @@ void kvm__arch_init(struct kvm *kvm, const char 
*hugetlbfs_path, u64 ram_size)
/* Create the virtual GIC. */
if (gic__create(kvm, kvm->cfg.arch.irqchip))
die("Failed to create virtual GIC");
+
+   /* Setup PV time */
+   if (pvtime_create(kvm))
+   die("Failed to initialise PV time");
 }
 
 #define FDT_ALIGN  SZ_2M
diff --git a/arm/pvtime.c b/arm/pvtime.c
new file mode 100644
index 000..abcaab3
--- /dev/null
+++ b/arm/pvtime.c
@@ -0,0 +1,77 @@
+#include "kvm/kvm.h"
+
+#define KVM_DEV_TYPE_ARM_PV_TIME (KVM_DEV_TYPE_ARM_VGIC_ITS+2)
+
+/* Device Control API: PV_TIME */
+#define KVM_DEV_ARM_PV_TIME_PADDR  0
+#define KVM_DEV_ARM_PV_TIME_FREQUENCY  3
+
+#define KVM_DEV_ARM_PV_TIME_ST 0
+#define KVM_DEV_ARM_PV_TIME_LPT1
+
+static int pvtime_fd;
+
+int pvtime_create(struct kvm *kvm);
+
+int pvtime_create(struct kvm *kvm)
+{
+   int err;
+   u64 lpt_paddr = 0x1000;
+   u64 st_paddr = lpt_paddr + 4096;
+   u32 frequency = 100 * 1000 * 1000;
+
+   printf("lpt_paddr=%llx\n", lpt_paddr);
+
+   struct kvm_create_device pvtime_device = {
+   .type = KVM_DEV_TYPE_ARM_PV_TIME,
+   .flags = 0,
+   };
+
+   err = ioctl(kvm->vm_fd, KVM_CREATE_DEVICE, &pvtime_device);
+   if (err) {
+   printf("Failed to create PV device\n");
+   return 0;
+   }
+
+   pvtime_fd = pvtime_device.fd;
+
+   struct kvm_device_attr lpt_base = {
+   .group = KVM_DEV_ARM_PV_TIME_PADDR,
+   .attr = KVM_DEV_ARM_PV_TIME_LPT,
+   .addr = (u64)(unsigned long)&lpt_paddr
+   };
+   struct kvm_device_attr st_base = {
+   .group = KVM_DEV_ARM_PV_TIME_PADDR,
+   .attr = KVM_DEV_ARM_PV_TIME_ST,
+   .addr = (u64)(unsigned long)&st_paddr
+   };
+
+   struct kvm_device_attr lpt_freq = {
+   .group = KVM_DEV_ARM_PV_TIME_FREQUENCY,
+   .attr = KVM_DEV_ARM_PV_TIME_LPT,
+   .addr = (u64)(unsigned long)&frequency
+   };
+
+   err = ioctl(pvtime_fd, KVM_SET_DEVICE_ATTR, &lpt_base);
+   if (err) {
+   perror("ioctl lpt_base failed");
+   printf("Ignoring LPT...\n");
+   }
+   err = ioctl(pvtime_fd, KVM_SET_DEVICE_ATTR, &st_base);
+   if (err) {
+   perror("ioctl st_base failed");
+   goto out_err;
+   }
+   err = ioctl(pvtime_fd, KVM_SET_DEVICE_ATTR, &lpt_freq);
+   if (err) {
+   perror("ioctl lpt_freq failed");
+   printf("Ignoring LPT...\n");
+   }
+
+   printf("PV time setup\n");
+
+   return 0;
+out_err:
+   close(pvtime_fd);
+   return err;
+}


Re: [PATCH 9/9] arm64: Retrieve stolen time as paravirtualized guest

2019-08-09 Thread Zenghui Yu

On 2019/8/2 22:50, Steven Price wrote:

Enable paravirtualization features when running under a hypervisor
supporting the PV_TIME_ST hypercall.

For each (v)CPU, we ask the hypervisor for the location of a shared
page which the hypervisor will use to report stolen time to us. We set
pv_time_ops to the stolen time function which simply reads the stolen
value from the shared page for a VCPU. We guarantee single-copy
atomicity using READ_ONCE which means we can also read the stolen
time for another VCPU than the currently running one while it is
potentially being updated by the hypervisor.

Signed-off-by: Steven Price 
---
  arch/arm64/kernel/Makefile |   1 +
  arch/arm64/kernel/kvm.c| 155 +
  include/linux/cpuhotplug.h |   1 +
  3 files changed, 157 insertions(+)
  create mode 100644 arch/arm64/kernel/kvm.c

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 478491f07b4f..eb36edf9b930 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE)  += crash_core.o
  obj-$(CONFIG_ARM_SDE_INTERFACE)   += sdei.o
  obj-$(CONFIG_ARM64_SSBD)  += ssbd.o
  obj-$(CONFIG_ARM64_PTR_AUTH)  += pointer_auth.o
+obj-$(CONFIG_PARAVIRT) += kvm.o
  
  obj-y	+= vdso/ probes/

  obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/kvm.c b/arch/arm64/kernel/kvm.c
new file mode 100644
index ..245398c79dae
--- /dev/null
+++ b/arch/arm64/kernel/kvm.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Arm Ltd.
+
+#define pr_fmt(fmt) "kvmarm-pv: " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+struct kvmarm_stolen_time_region {
+   struct pvclock_vcpu_stolen_time_info *kaddr;
+};
+
+static DEFINE_PER_CPU(struct kvmarm_stolen_time_region, stolen_time_region);
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+   steal_acc = false;
+   return 0;
+}
+early_param("no-steal-acc", parse_no_stealacc);
+
+/* return stolen time in ns by asking the hypervisor */
+static u64 kvm_steal_clock(int cpu)
+{
+   struct kvmarm_stolen_time_region *reg;
+
+   reg = per_cpu_ptr(&stolen_time_region, cpu);
+   if (!reg->kaddr) {
+   pr_warn_once("stolen time enabled but not configured for cpu 
%d\n",
+cpu);
+   return 0;
+   }
+
+   return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+}
+
+static int disable_stolen_time_current_cpu(void)
+{
+   struct kvmarm_stolen_time_region *reg;
+
+   reg = this_cpu_ptr(&stolen_time_region);
+   if (!reg->kaddr)
+   return 0;
+
+   memunmap(reg->kaddr);
+   memset(reg, 0, sizeof(*reg));
+
+   return 0;
+}
+
+static int stolen_time_dying_cpu(unsigned int cpu)
+{
+   return disable_stolen_time_current_cpu();
+}
+
+static int init_stolen_time_cpu(unsigned int cpu)
+{
+   struct kvmarm_stolen_time_region *reg;
+   struct arm_smccc_res res;
+
+   reg = this_cpu_ptr(&stolen_time_region);
+
+   if (reg->kaddr)
+   return 0;
+
+   arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+   if ((long)res.a0 < 0)
+   return -EINVAL;


Hi Steven,

Since userspace is not involved yet (right?), no one will create the
PV_TIME device for guest (and no one will specify the IPA of the shared
stolen time region), and I guess we will get a "not supported" error
here.

So what should we do if we want to test this series now?  Any userspace
tools?  If no, do you have any plans for userspace developing? ;-)


Thanks,
zenghui


+
+   reg->kaddr = memremap(res.a0,
+   sizeof(struct pvclock_vcpu_stolen_time_info),
+   MEMREMAP_WB);
+
+   if (reg->kaddr == NULL) {
+   pr_warn("Failed to map stolen time data structure\n");
+   return -EINVAL;
+   }
+
+   if (le32_to_cpu(reg->kaddr->revision) != 0 ||
+   le32_to_cpu(reg->kaddr->attributes) != 0) {
+   pr_warn("Unexpected revision or attributes in stolen time 
data\n");
+   return -ENXIO;
+   }
+
+   return 0;
+}
+
+static int kvm_arm_init_stolen_time(void)
+{
+   int ret;
+
+   ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING,
+   "hypervisor/kvmarm/pv:starting",
+   init_stolen_time_cpu, stolen_time_dying_cpu);
+   if (ret < 0)
+   return ret;
+   return 0;
+}
+
+static bool has_kvm_steal_clock(void)
+{
+   struct arm_smccc_res res;
+
+   /* To detect the presence of PV time support we require SMCCC 1.1+ */
+   if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+   return false;
+
+   arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,

Re: [PATCH 9/9] arm64: Retrieve stolen time as paravirtualized guest

2019-08-08 Thread Marc Zyngier
On 08/08/2019 16:29, Steven Price wrote:
> On 04/08/2019 10:53, Marc Zyngier wrote:
>> On Fri,  2 Aug 2019 15:50:17 +0100
>> Steven Price  wrote:
>>
>>> Enable paravirtualization features when running under a hypervisor
>>> supporting the PV_TIME_ST hypercall.
>>>
>>> For each (v)CPU, we ask the hypervisor for the location of a shared
>>> page which the hypervisor will use to report stolen time to us. We set
>>> pv_time_ops to the stolen time function which simply reads the stolen
>>> value from the shared page for a VCPU. We guarantee single-copy
>>> atomicity using READ_ONCE which means we can also read the stolen
>>> time for another VCPU than the currently running one while it is
>>> potentially being updated by the hypervisor.
>>>
>>> Signed-off-by: Steven Price 
>>> ---
>>>  arch/arm64/kernel/Makefile |   1 +
>>>  arch/arm64/kernel/kvm.c| 155 +

[...]

>>> +static int __init kvm_guest_init(void)
>>> +{
>>> +   int ret = 0;
>>> +
>>> +   if (!has_kvm_steal_clock())
>>> +   return 0;
>>> +
>>> +   ret = kvm_arm_init_stolen_time();
>>> +   if (ret)
>>> +   return ret;
>>> +
>>> +   pv_ops.time.steal_clock = kvm_steal_clock;
>>> +
>>> +   static_key_slow_inc(¶virt_steal_enabled);
>>> +   if (steal_acc)
>>> +   static_key_slow_inc(¶virt_steal_rq_enabled);
>>> +
>>> +   pr_info("using stolen time PV\n");
>>> +
>>> +   return 0;
>>> +}
>>> +early_initcall(kvm_guest_init);
>>
>> Is there any reason why we wouldn't directly call into this rather than
>> using an initcall?
> 
> I'm not sure where the direct call would go - any pointers?

I'd be temped to say arch/arm64/kernel/time.c:time_init(), provided that
there is no issue with the CPU hotplug lock (I remember hitting that a
while ago).

M.
-- 
Jazz is not dead, it just smells funny...


Re: [PATCH 9/9] arm64: Retrieve stolen time as paravirtualized guest

2019-08-08 Thread Steven Price
On 04/08/2019 10:53, Marc Zyngier wrote:
> On Fri,  2 Aug 2019 15:50:17 +0100
> Steven Price  wrote:
> 
>> Enable paravirtualization features when running under a hypervisor
>> supporting the PV_TIME_ST hypercall.
>>
>> For each (v)CPU, we ask the hypervisor for the location of a shared
>> page which the hypervisor will use to report stolen time to us. We set
>> pv_time_ops to the stolen time function which simply reads the stolen
>> value from the shared page for a VCPU. We guarantee single-copy
>> atomicity using READ_ONCE which means we can also read the stolen
>> time for another VCPU than the currently running one while it is
>> potentially being updated by the hypervisor.
>>
>> Signed-off-by: Steven Price 
>> ---
>>  arch/arm64/kernel/Makefile |   1 +
>>  arch/arm64/kernel/kvm.c| 155 +
> 
> nit: Why not using paravirt.c, which clearly states what it does? The
> alternative would be to name it kvm-pv.c.

I can move it to paravirt.c - seems reasonable.

>>  include/linux/cpuhotplug.h |   1 +
>>  3 files changed, 157 insertions(+)
>>  create mode 100644 arch/arm64/kernel/kvm.c
>>
>> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
>> index 478491f07b4f..eb36edf9b930 100644
>> --- a/arch/arm64/kernel/Makefile
>> +++ b/arch/arm64/kernel/Makefile
>> @@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE)   += crash_core.o
>>  obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
>>  obj-$(CONFIG_ARM64_SSBD)+= ssbd.o
>>  obj-$(CONFIG_ARM64_PTR_AUTH)+= pointer_auth.o
>> +obj-$(CONFIG_PARAVIRT)  += kvm.o
>>  
>>  obj-y   += vdso/ probes/
>>  obj-$(CONFIG_COMPAT_VDSO)   += vdso32/
>> diff --git a/arch/arm64/kernel/kvm.c b/arch/arm64/kernel/kvm.c
>> new file mode 100644
>> index ..245398c79dae
>> --- /dev/null
>> +++ b/arch/arm64/kernel/kvm.c
>> @@ -0,0 +1,155 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +// Copyright (C) 2019 Arm Ltd.
>> +
>> +#define pr_fmt(fmt) "kvmarm-pv: " fmt
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include 
>> +#include 
>> +#include 
>> +
>> +struct kvmarm_stolen_time_region {
>> +struct pvclock_vcpu_stolen_time_info *kaddr;
>> +};
>> +
>> +static DEFINE_PER_CPU(struct kvmarm_stolen_time_region, stolen_time_region);
>> +
>> +static bool steal_acc = true;
>> +static int __init parse_no_stealacc(char *arg)
>> +{
>> +steal_acc = false;
>> +return 0;
>> +}
>> +early_param("no-steal-acc", parse_no_stealacc);
>> +
>> +/* return stolen time in ns by asking the hypervisor */
>> +static u64 kvm_steal_clock(int cpu)
>> +{
>> +struct kvmarm_stolen_time_region *reg;
>> +
>> +reg = per_cpu_ptr(&stolen_time_region, cpu);
>> +if (!reg->kaddr) {
>> +pr_warn_once("stolen time enabled but not configured for cpu 
>> %d\n",
>> + cpu);
>> +return 0;
>> +}
>> +
>> +return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
>> +}
>> +
>> +static int disable_stolen_time_current_cpu(void)
>> +{
>> +struct kvmarm_stolen_time_region *reg;
>> +
>> +reg = this_cpu_ptr(&stolen_time_region);
>> +if (!reg->kaddr)
>> +return 0;
>> +
>> +memunmap(reg->kaddr);
>> +memset(reg, 0, sizeof(*reg));
>> +
>> +return 0;
>> +}
>> +
>> +static int stolen_time_dying_cpu(unsigned int cpu)
>> +{
>> +return disable_stolen_time_current_cpu();
>> +}
>> +
>> +static int init_stolen_time_cpu(unsigned int cpu)
>> +{
>> +struct kvmarm_stolen_time_region *reg;
>> +struct arm_smccc_res res;
>> +
>> +reg = this_cpu_ptr(&stolen_time_region);
>> +
>> +if (reg->kaddr)
>> +return 0;
> 
> Can this actually happen? It'd take two CPU_UP calls from the HP
> notifiers to get in that situation...

Yes, something would have to be very broken for that to happen - I'll
remove this check.

>> +
>> +arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res);
>> +
>> +if ((long)res.a0 < 0)
>> +return -EINVAL;
>> +
>> +reg->kaddr = memremap(res.a0,
>> +sizeof(struct pvclock_vcpu_stolen_time_info),
>> +MEMREMAP_WB);
>> +
>> +if (reg->kaddr == NULL) {
>> +pr_warn("Failed to map stolen time data structure\n");
>> +return -EINVAL;
> 
> -ENOMEM is the expected return code.

Ok

>> +}
>> +
>> +if (le32_to_cpu(reg->kaddr->revision) != 0 ||
>> +le32_to_cpu(reg->kaddr->attributes) != 0) {
>> +pr_warn("Unexpected revision or attributes in stolen time 
>> data\n");
>> +return -ENXIO;
>> +}
>> +
>> +return 0;
>> +}
>> +
>> +static int kvm_arm_init_stolen_time(void)
>> +{
>> +int ret;
>> +
>> +ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING,
>> +"hypervisor/kvmarm/pv:starting",
>> +init_stolen_

Re: [PATCH 9/9] arm64: Retrieve stolen time as paravirtualized guest

2019-08-04 Thread Marc Zyngier
On Fri,  2 Aug 2019 15:50:17 +0100
Steven Price  wrote:

> Enable paravirtualization features when running under a hypervisor
> supporting the PV_TIME_ST hypercall.
> 
> For each (v)CPU, we ask the hypervisor for the location of a shared
> page which the hypervisor will use to report stolen time to us. We set
> pv_time_ops to the stolen time function which simply reads the stolen
> value from the shared page for a VCPU. We guarantee single-copy
> atomicity using READ_ONCE which means we can also read the stolen
> time for another VCPU than the currently running one while it is
> potentially being updated by the hypervisor.
> 
> Signed-off-by: Steven Price 
> ---
>  arch/arm64/kernel/Makefile |   1 +
>  arch/arm64/kernel/kvm.c| 155 +

nit: Why not using paravirt.c, which clearly states what it does? The
alternative would be to name it kvm-pv.c.

>  include/linux/cpuhotplug.h |   1 +
>  3 files changed, 157 insertions(+)
>  create mode 100644 arch/arm64/kernel/kvm.c
> 
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 478491f07b4f..eb36edf9b930 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE)+= crash_core.o
>  obj-$(CONFIG_ARM_SDE_INTERFACE)  += sdei.o
>  obj-$(CONFIG_ARM64_SSBD) += ssbd.o
>  obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
> +obj-$(CONFIG_PARAVIRT)   += kvm.o
>  
>  obj-y+= vdso/ probes/
>  obj-$(CONFIG_COMPAT_VDSO)+= vdso32/
> diff --git a/arch/arm64/kernel/kvm.c b/arch/arm64/kernel/kvm.c
> new file mode 100644
> index ..245398c79dae
> --- /dev/null
> +++ b/arch/arm64/kernel/kvm.c
> @@ -0,0 +1,155 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (C) 2019 Arm Ltd.
> +
> +#define pr_fmt(fmt) "kvmarm-pv: " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +
> +struct kvmarm_stolen_time_region {
> + struct pvclock_vcpu_stolen_time_info *kaddr;
> +};
> +
> +static DEFINE_PER_CPU(struct kvmarm_stolen_time_region, stolen_time_region);
> +
> +static bool steal_acc = true;
> +static int __init parse_no_stealacc(char *arg)
> +{
> + steal_acc = false;
> + return 0;
> +}
> +early_param("no-steal-acc", parse_no_stealacc);
> +
> +/* return stolen time in ns by asking the hypervisor */
> +static u64 kvm_steal_clock(int cpu)
> +{
> + struct kvmarm_stolen_time_region *reg;
> +
> + reg = per_cpu_ptr(&stolen_time_region, cpu);
> + if (!reg->kaddr) {
> + pr_warn_once("stolen time enabled but not configured for cpu 
> %d\n",
> +  cpu);
> + return 0;
> + }
> +
> + return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
> +}
> +
> +static int disable_stolen_time_current_cpu(void)
> +{
> + struct kvmarm_stolen_time_region *reg;
> +
> + reg = this_cpu_ptr(&stolen_time_region);
> + if (!reg->kaddr)
> + return 0;
> +
> + memunmap(reg->kaddr);
> + memset(reg, 0, sizeof(*reg));
> +
> + return 0;
> +}
> +
> +static int stolen_time_dying_cpu(unsigned int cpu)
> +{
> + return disable_stolen_time_current_cpu();
> +}
> +
> +static int init_stolen_time_cpu(unsigned int cpu)
> +{
> + struct kvmarm_stolen_time_region *reg;
> + struct arm_smccc_res res;
> +
> + reg = this_cpu_ptr(&stolen_time_region);
> +
> + if (reg->kaddr)
> + return 0;

Can this actually happen? It'd take two CPU_UP calls from the HP
notifiers to get in that situation...

> +
> + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res);
> +
> + if ((long)res.a0 < 0)
> + return -EINVAL;
> +
> + reg->kaddr = memremap(res.a0,
> + sizeof(struct pvclock_vcpu_stolen_time_info),
> + MEMREMAP_WB);
> +
> + if (reg->kaddr == NULL) {
> + pr_warn("Failed to map stolen time data structure\n");
> + return -EINVAL;

-ENOMEM is the expected return code.

> + }
> +
> + if (le32_to_cpu(reg->kaddr->revision) != 0 ||
> + le32_to_cpu(reg->kaddr->attributes) != 0) {
> + pr_warn("Unexpected revision or attributes in stolen time 
> data\n");
> + return -ENXIO;
> + }
> +
> + return 0;
> +}
> +
> +static int kvm_arm_init_stolen_time(void)
> +{
> + int ret;
> +
> + ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING,
> + "hypervisor/kvmarm/pv:starting",
> + init_stolen_time_cpu, stolen_time_dying_cpu);
> + if (ret < 0)
> + return ret;
> + return 0;
> +}
> +
> +static bool has_kvm_steal_clock(void)
> +{
> + struct arm_smccc_res res;
> +
> + /* To detect the presence of PV time support we require SMCCC 1.1+ */
> + if (psci_ops.smccc_versi