[RFC QEMU v2 1/2] arm/virt: Initialize generic timer scale factor dynamically

2018-11-27 Thread Bijan Mottahedeh
Initialize the generic timer scale factor based on the counter frequency
register cntfrq_el0, and default to the current static value if necessary.
Always use the default value for TCG.

Signed-off-by: Bijan Mottahedeh 
---
 hw/arm/virt.c  | 17 +
 target/arm/helper.c| 19 ---
 target/arm/internals.h |  8 ++--
 target/arm/kvm64.c |  1 +
 4 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 281ddcd..792d223 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -59,6 +59,7 @@
 #include "qapi/visitor.h"
 #include "standard-headers/linux/input.h"
 #include "hw/arm/smmuv3.h"
+#include "target/arm/internals.h"
 
 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
 static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
@@ -1710,6 +1711,21 @@ static HotplugHandler 
*virt_machine_get_hotplug_handler(MachineState *machine,
 return NULL;
 }
 
+static void set_system_clock_scale(void)
+{
+unsigned long cntfrq_el0 = 0;
+
+#ifdef  CONFIG_KVM
+asm volatile("mrs %0, cntfrq_el0" : "=r"(cntfrq_el0));
+#endif
+
+if (cntfrq_el0 == 0) {
+cntfrq_el0 = GTIMER_SCALE_DEF;
+}
+
+system_clock_scale = NANOSECONDS_PER_SECOND / (int)cntfrq_el0;
+}
+
 static void virt_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -1736,6 +1752,7 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 assert(!mc->get_hotplug_handler);
 mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
 hc->plug = virt_machine_device_plug_cb;
+set_system_clock_scale();
 }
 
 static const TypeInfo virt_machine_info = {
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 66afb08..6330586 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -18,6 +18,7 @@
 #include "sysemu/kvm.h"
 #include "fpu/softfloat.h"
 #include "qemu/range.h"
+#include "hw/arm/arm.h"
 
 #define ARM_CPU_FREQ 10 /* FIXME: 1 GHz, should be configurable */
 
@@ -1614,6 +1615,18 @@ static CPAccessResult gt_cntfrq_access(CPUARMState *env, 
const ARMCPRegInfo *ri,
 return CP_ACCESS_OK;
 }
 
+static void gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+assert(GTIMER_SCALE);
+assert(ri->fieldoffset);
+
+if (cpreg_field_is_64bit(ri)) {
+CPREG_FIELD64(env, ri) = NANOSECONDS_PER_SECOND / GTIMER_SCALE;
+} else {
+CPREG_FIELD32(env, ri) = NANOSECONDS_PER_SECOND / GTIMER_SCALE;
+}
+}
+
 static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx,
 bool isread)
 {
@@ -1709,7 +1722,7 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
 }
 }
 
-static uint64_t gt_get_countervalue(CPUARMState *env)
+uint64_t gt_get_countervalue(CPUARMState *env)
 {
 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / GTIMER_SCALE;
 }
@@ -1996,7 +2009,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0,
   .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access,
   .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
-  .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE,
+  .resetfn = gt_cntfrq_reset,
 },
 /* overall control: mostly access permissions */
 { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH,
@@ -2187,7 +2200,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0,
   .type = ARM_CP_CONST, .access = PL0_R /* no PL1_RW in linux-user */,
   .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
-  .resetvalue = NANOSECONDS_PER_SECOND / GTIMER_SCALE,
+  .resetfn = gt_cntfrq_reset,
 },
 { .name = "CNTVCT_EL0", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 2,
diff --git a/target/arm/internals.h b/target/arm/internals.h
index dc93577..b66a1fa 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -52,9 +52,13 @@ static inline bool excp_is_internal(int excp)
 }
 
 /* Scale factor for generic timers, ie number of ns per tick.
- * This gives a 62.5MHz timer.
+ * Calculated dynamically based on CNTFRQ with a default value
+ * that gives a 62.5MHZ timer.
  */
-#define GTIMER_SCALE 16
+#define GTIMER_SCALEsystem_clock_scale
+#define GTIMER_SCALE_DEF16
+
+uint64_t gt_get_countervalue(CPUARMState *);
 
 /* Bit definitions for the v7M CONTROL register */
 FIELD(V7M_CONTROL, NPRIV, 0, 1)
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index e0b8246..5d1c394 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -485,6 +485,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 set_feature(, ARM_FEATURE_NEON);
 set_feature(, ARM_FEATURE_AARCH64);

[RFC QEMU v2 0/2] arm/virt: Account for guest pause time

2018-11-27 Thread Bijan Mottahedeh
whether modifying the virtual counter offset
breaks any assumptions, e.g., see the kvm_timer_vcpu_put() comment above.


hwclock vs. date


The hwclock on the ends up in drivers/rtc/rtc-pl031.c

Real Time Clock interface for ARM AMBA PrimeCell 031 RTC

ioctl("/dev/rtc", RTC_RD_TIME, ...)
-> rtc_dev_ioctl()
   -> rtc_read_time()
  -> __rtc_read_time()
 -> pl031_read_time()


The date command reads time from from a vdso page updated as follows:

irq_enter()
-> tick_irq_enter()
   -> tick_nohz_irq_enter()
  -> tick_nohz_update_jiffies()
 -> tick_do_update_jiffies64()
-> tick_do_update_jiffies64()
   -> update_wall_time()
  -> timekeeping_advance()
 -> timekeeping_update()
-> update_vsyscall(struct timekeepr *tk)

The struct timekeeper uses the Arm platform clocksource_counter noted above:

(gdb) p tk->tkr_mono
$4 = {clock = 0x097ddad0 ,
  mask = 72057594037927935, cycle_last = 271809294296, mult = 335544320,
  shift = 24, xtime_nsec = 345610649600, base = 5435795172160,
  base_real = 15391264550}

This would explain why without any pause time accounting, the both
hwclock and date command show the same time after the guest is resume
from a pause, e.g. with the following sequence:

virsh suspend 
sleep 
virsh resume 

Bijan Mottahedeh (2):
  arm/virt: Initialize generic timer scale factor dynamically
  arm/virt: Account for guest pause time

 hw/arm/virt.c   | 17 +
 hw/intc/arm_gicv3_kvm.c | 39 +++
 target/arm/cpu.h|  3 +++
 target/arm/helper.c | 19 ---
 target/arm/internals.h  |  8 ++--
 target/arm/kvm64.c  |  1 +
 6 files changed, 82 insertions(+), 5 deletions(-)

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC QEMU v2 2/2] arm/virt: Account for guest pause time

2018-11-27 Thread Bijan Mottahedeh
Accumulate the total guest pause time and update the virtual counter
offset register accordingly in order to account for that time before
resuming the guest.

Signed-off-by: Bijan Mottahedeh 
---
 hw/intc/arm_gicv3_kvm.c | 39 +++
 target/arm/cpu.h|  3 +++
 2 files changed, 42 insertions(+)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 1e11200..aabd508 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -30,6 +30,7 @@
 #include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/blocker.h"
+#include "target/arm/internals.h"
 
 #ifdef DEBUG_GICV3_KVM
 #define DPRINTF(fmt, ...) \
@@ -745,9 +746,38 @@ static void vm_change_state_handler(void *opaque, int 
running,
 {
 GICv3State *s = (GICv3State *)opaque;
 Error *err = NULL;
+CPUState *cpu;
+CPUARMState *env;
+struct kvm_one_reg reg;
 int ret;
+uint64_t cnt;
 
 if (running) {
+CPU_FOREACH(cpu) {
+
+env = (CPUARMState *)(cpu->env_ptr);
+
+if (!env->pause_start) {
+continue;
+}
+
+/*
+ * Accumulate the total pause time and set the
+ * counter virtual offset accordingly.
+ */
+cnt = gt_get_countervalue(env);
+env->pause_total += (cnt - env->pause_start);
+env->cp15.cntvoff_el2 = cnt - env->pause_total;
+
+env->pause_start = 0;   /* clear for next pause */
+reg.id = KVM_REG_ARM_TIMER_CNT;
+reg.addr = (uintptr_t) >cp15.cntvoff_el2;
+ret = kvm_vcpu_ioctl(cpu, KVM_SET_ONE_REG, );
+if (ret) {
+error_report("Set virtual counter offset failed: %d", ret);
+abort();
+}
+}
 return;
 }
 
@@ -760,6 +790,15 @@ static void vm_change_state_handler(void *opaque, int 
running,
 if (ret < 0 && ret != -EFAULT) {
 abort();
 }
+
+CPU_FOREACH(cpu) {
+/*
+ * Record the current pause start time.
+ */
+env = (CPUARMState *)(cpu->env_ptr);
+cnt = gt_get_countervalue(env);
+env->pause_start = cnt;
+}
 }
 
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index e310ffc..bd0a56e 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -602,6 +602,9 @@ typedef struct CPUARMState {
 struct CPUBreakpoint *cpu_breakpoint[16];
 struct CPUWatchpoint *cpu_watchpoint[16];
 
+uint64_t pause_start;   /* start time of last pause */
+uint64_t pause_total;   /* total pause time */
+
 /* Fields up to this point are cleared by a CPU reset */
 struct {} end_reset_fields;
 
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Qemu-devel] [RFC QEMU 1/2] arm/virt: Initialize generic timer scale factor dynamically

2018-11-09 Thread Bijan Mottahedeh

On 11/8/2018 6:21 AM, Richard Henderson wrote:

On 11/7/18 7:48 PM, Bijan Mottahedeh wrote:
  
+static void set_system_clock_scale(void)

+{
+unsigned long cntfrq_el0;
+
+asm volatile("mrs %0, cntfrq_el0" : "=r"(cntfrq_el0));
+
+if (cntfrq_el0 == 0) {
+cntfrq_el0 = GTIMER_SCALE_DEF;
+}
+
+system_clock_scale = NANOSECONDS_PER_SECOND / (int)cntfrq_el0;
+}

This only works for kvm.

For TCG you need to use the default always.  In particular, it won't even
compile for an x86 host.


r~
Is it ok to ifdef the asm statement with CONFIG_KVM, or what would be 
the correct way to account for TCG vs. KVM?


Thanks.

--bijan
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC QEMU 0/2] arm/virt: Account for guest pause time

2018-11-07 Thread Bijan Mottahedeh
ment above.


hwclock vs. date


The hwclock on the ends up in drivers/rtc/rtc-pl031.c

Real Time Clock interface for ARM AMBA PrimeCell 031 RTC

ioctl("/dev/rtc", RTC_RD_TIME, ...)
-> rtc_dev_ioctl()
   -> rtc_read_time()
  -> __rtc_read_time()
 -> pl031_read_time()


The date command reads time from from a vdso page updated as follows:

irq_enter()
-> tick_irq_enter()
   -> tick_nohz_irq_enter()
  -> tick_nohz_update_jiffies()
 -> tick_do_update_jiffies64()
-> tick_do_update_jiffies64()
   -> update_wall_time()
  -> timekeeping_advance()
 -> timekeeping_update()
-> update_vsyscall(struct timekeepr *tk)

The struct timekeeper uses the Arm platform clocksource_counter noted above:

(gdb) p tk->tkr_mono
$4 = {clock = 0x097ddad0 ,
  mask = 72057594037927935, cycle_last = 271809294296, mult = 335544320,
  shift = 24, xtime_nsec = 345610649600, base = 5435795172160,
  base_real = 15391264550}

This would explain why without any pause time accounting, the both
hwclock and date command show the same time after the guest is resume
from a pause, e.g. with the following sequence:

virsh suspend 
sleep 
virsh resume 

Bijan Mottahedeh (2):
  arm/virt: Initialize generic timer scale factor dynamically
  arm/virt: Account for guest pause time

 hw/arm/virt.c   | 15 +++
 hw/intc/arm_gicv3_kvm.c | 39 +++
 target/arm/cpu.h|  3 +++
 target/arm/helper.c | 19 ---
 target/arm/internals.h  |  8 ++--
 target/arm/kvm64.c  |  1 +
 6 files changed, 80 insertions(+), 5 deletions(-)

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC QEMU 2/2] arm/virt: Account for guest pause time

2018-11-07 Thread Bijan Mottahedeh
Accumulate the total guest pause time and update the virtual counter
offset register accordingly in order to account for that time before
resuming the guest.

Signed-off-by: Bijan Mottahedeh 
---
 hw/intc/arm_gicv3_kvm.c | 39 +++
 target/arm/cpu.h|  3 +++
 2 files changed, 42 insertions(+)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 1e11200..aabd508 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -30,6 +30,7 @@
 #include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/blocker.h"
+#include "target/arm/internals.h"
 
 #ifdef DEBUG_GICV3_KVM
 #define DPRINTF(fmt, ...) \
@@ -745,9 +746,38 @@ static void vm_change_state_handler(void *opaque, int 
running,
 {
 GICv3State *s = (GICv3State *)opaque;
 Error *err = NULL;
+CPUState *cpu;
+CPUARMState *env;
+struct kvm_one_reg reg;
 int ret;
+uint64_t cnt;
 
 if (running) {
+CPU_FOREACH(cpu) {
+
+env = (CPUARMState *)(cpu->env_ptr);
+
+if (!env->pause_start) {
+continue;
+}
+
+/*
+ * Accumulate the total pause time and set the
+ * counter virtual offset accordingly.
+ */
+cnt = gt_get_countervalue(env);
+env->pause_total += (cnt - env->pause_start);
+env->cp15.cntvoff_el2 = cnt - env->pause_total;
+
+env->pause_start = 0;   /* clear for next pause */
+reg.id = KVM_REG_ARM_TIMER_CNT;
+reg.addr = (uintptr_t) >cp15.cntvoff_el2;
+ret = kvm_vcpu_ioctl(cpu, KVM_SET_ONE_REG, );
+if (ret) {
+error_report("Set virtual counter offset failed: %d", ret);
+abort();
+}
+}
 return;
 }
 
@@ -760,6 +790,15 @@ static void vm_change_state_handler(void *opaque, int 
running,
 if (ret < 0 && ret != -EFAULT) {
 abort();
 }
+
+CPU_FOREACH(cpu) {
+/*
+ * Record the current pause start time.
+ */
+env = (CPUARMState *)(cpu->env_ptr);
+cnt = gt_get_countervalue(env);
+env->pause_start = cnt;
+}
 }
 
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index e310ffc..bd0a56e 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -602,6 +602,9 @@ typedef struct CPUARMState {
 struct CPUBreakpoint *cpu_breakpoint[16];
 struct CPUWatchpoint *cpu_watchpoint[16];
 
+uint64_t pause_start;   /* start time of last pause */
+uint64_t pause_total;   /* total pause time */
+
 /* Fields up to this point are cleared by a CPU reset */
 struct {} end_reset_fields;
 
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC QEMU 1/2] arm/virt: Initialize generic timer scale factor dynamically

2018-11-07 Thread Bijan Mottahedeh
Initialize the generic timer scale factor based on the counter frequency
register cntfrq_el0, and default to the current static value if necessary.

Signed-off-by: Bijan Mottahedeh 
---
 hw/arm/virt.c  | 15 +++
 target/arm/helper.c| 19 ---
 target/arm/internals.h |  8 ++--
 target/arm/kvm64.c |  1 +
 4 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 281ddcd..200a601 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -59,6 +59,7 @@
 #include "qapi/visitor.h"
 #include "standard-headers/linux/input.h"
 #include "hw/arm/smmuv3.h"
+#include "target/arm/internals.h"
 
 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
 static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
@@ -1710,6 +1711,19 @@ static HotplugHandler 
*virt_machine_get_hotplug_handler(MachineState *machine,
 return NULL;
 }
 
+static void set_system_clock_scale(void)
+{
+unsigned long cntfrq_el0;
+
+asm volatile("mrs %0, cntfrq_el0" : "=r"(cntfrq_el0));
+
+if (cntfrq_el0 == 0) {
+cntfrq_el0 = GTIMER_SCALE_DEF;
+}
+
+system_clock_scale = NANOSECONDS_PER_SECOND / (int)cntfrq_el0;
+}
+
 static void virt_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -1736,6 +1750,7 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 assert(!mc->get_hotplug_handler);
 mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
 hc->plug = virt_machine_device_plug_cb;
+set_system_clock_scale();
 }
 
 static const TypeInfo virt_machine_info = {
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 66afb08..6330586 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -18,6 +18,7 @@
 #include "sysemu/kvm.h"
 #include "fpu/softfloat.h"
 #include "qemu/range.h"
+#include "hw/arm/arm.h"
 
 #define ARM_CPU_FREQ 10 /* FIXME: 1 GHz, should be configurable */
 
@@ -1614,6 +1615,18 @@ static CPAccessResult gt_cntfrq_access(CPUARMState *env, 
const ARMCPRegInfo *ri,
 return CP_ACCESS_OK;
 }
 
+static void gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+assert(GTIMER_SCALE);
+assert(ri->fieldoffset);
+
+if (cpreg_field_is_64bit(ri)) {
+CPREG_FIELD64(env, ri) = NANOSECONDS_PER_SECOND / GTIMER_SCALE;
+} else {
+CPREG_FIELD32(env, ri) = NANOSECONDS_PER_SECOND / GTIMER_SCALE;
+}
+}
+
 static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx,
 bool isread)
 {
@@ -1709,7 +1722,7 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
 }
 }
 
-static uint64_t gt_get_countervalue(CPUARMState *env)
+uint64_t gt_get_countervalue(CPUARMState *env)
 {
 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / GTIMER_SCALE;
 }
@@ -1996,7 +2009,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0,
   .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access,
   .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
-  .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE,
+  .resetfn = gt_cntfrq_reset,
 },
 /* overall control: mostly access permissions */
 { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH,
@@ -2187,7 +2200,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0,
   .type = ARM_CP_CONST, .access = PL0_R /* no PL1_RW in linux-user */,
   .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
-  .resetvalue = NANOSECONDS_PER_SECOND / GTIMER_SCALE,
+  .resetfn = gt_cntfrq_reset,
 },
 { .name = "CNTVCT_EL0", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 2,
diff --git a/target/arm/internals.h b/target/arm/internals.h
index dc93577..b66a1fa 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -52,9 +52,13 @@ static inline bool excp_is_internal(int excp)
 }
 
 /* Scale factor for generic timers, ie number of ns per tick.
- * This gives a 62.5MHz timer.
+ * Calculated dynamically based on CNTFRQ with a default value
+ * that gives a 62.5MHZ timer.
  */
-#define GTIMER_SCALE 16
+#define GTIMER_SCALEsystem_clock_scale
+#define GTIMER_SCALE_DEF16
+
+uint64_t gt_get_countervalue(CPUARMState *);
 
 /* Bit definitions for the v7M CONTROL register */
 FIELD(V7M_CONTROL, NPRIV, 0, 1)
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index e0b8246..5d1c394 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -485,6 +485,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 set_feature(, ARM_FEATURE_NEON);
 set_feature(, ARM_FEATURE_AARCH64);
 set_feature(, ARM_FEATURE_PMU);
+set_feature(, ARM_FEATURE_GENERIC

[RFC QEMU 0/2] arm/virt: Account for guest pause time

2018-11-06 Thread Bijan Mottahedeh
ment above.


hwclock vs. date


The hwclock on the ends up in drivers/rtc/rtc-pl031.c

Real Time Clock interface for ARM AMBA PrimeCell 031 RTC

ioctl("/dev/rtc", RTC_RD_TIME, ...)
-> rtc_dev_ioctl()
   -> rtc_read_time()
  -> __rtc_read_time()
 -> pl031_read_time()


The date command reads time from from a vdso page updated as follows:

irq_enter()
-> tick_irq_enter()
   -> tick_nohz_irq_enter()
  -> tick_nohz_update_jiffies()
 -> tick_do_update_jiffies64()
-> tick_do_update_jiffies64()
   -> update_wall_time()
  -> timekeeping_advance()
 -> timekeeping_update()
-> update_vsyscall(struct timekeepr *tk)

The struct timekeeper uses the Arm platform clocksource_counter noted above:

(gdb) p tk->tkr_mono
$4 = {clock = 0x097ddad0 ,
  mask = 72057594037927935, cycle_last = 271809294296, mult = 335544320,
  shift = 24, xtime_nsec = 345610649600, base = 5435795172160,
  base_real = 15391264550}

This would explain why without any pause time accounting, the both
hwclock and date command show the same time after the guest is resume
from a pause, e.g. with the following sequence:

virsh suspend 
sleep 
virsh resume 

Bijan Mottahedeh (2):
  arm/virt: Initialize generic timer scale factor dynamically
  arm/virt: Account for guest pause time

 hw/arm/virt.c   | 15 +++
 hw/intc/arm_gicv3_kvm.c | 39 +++
 target/arm/cpu.h|  3 +++
 target/arm/helper.c | 19 ---
 target/arm/internals.h  |  8 ++--
 target/arm/kvm64.c  |  1 +
 6 files changed, 80 insertions(+), 5 deletions(-)

-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC QEMU 1/2] arm/virt: Initialize generic timer scale factor dynamically

2018-11-06 Thread Bijan Mottahedeh
Initialize the generic timer scale factor based on the counter frequency
register cntfrq_el0, and default to the current static value if necessary.

Signed-off-by: Bijan Mottahedeh 
---
 hw/arm/virt.c  | 15 +++
 target/arm/helper.c| 19 ---
 target/arm/internals.h |  8 ++--
 target/arm/kvm64.c |  1 +
 4 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 281ddcd..200a601 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -59,6 +59,7 @@
 #include "qapi/visitor.h"
 #include "standard-headers/linux/input.h"
 #include "hw/arm/smmuv3.h"
+#include "target/arm/internals.h"
 
 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
 static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
@@ -1710,6 +1711,19 @@ static HotplugHandler 
*virt_machine_get_hotplug_handler(MachineState *machine,
 return NULL;
 }
 
+static void set_system_clock_scale(void)
+{
+unsigned long cntfrq_el0;
+
+asm volatile("mrs %0, cntfrq_el0" : "=r"(cntfrq_el0));
+
+if (cntfrq_el0 == 0) {
+cntfrq_el0 = GTIMER_SCALE_DEF;
+}
+
+system_clock_scale = NANOSECONDS_PER_SECOND / (int)cntfrq_el0;
+}
+
 static void virt_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -1736,6 +1750,7 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 assert(!mc->get_hotplug_handler);
 mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
 hc->plug = virt_machine_device_plug_cb;
+set_system_clock_scale();
 }
 
 static const TypeInfo virt_machine_info = {
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 66afb08..6330586 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -18,6 +18,7 @@
 #include "sysemu/kvm.h"
 #include "fpu/softfloat.h"
 #include "qemu/range.h"
+#include "hw/arm/arm.h"
 
 #define ARM_CPU_FREQ 10 /* FIXME: 1 GHz, should be configurable */
 
@@ -1614,6 +1615,18 @@ static CPAccessResult gt_cntfrq_access(CPUARMState *env, 
const ARMCPRegInfo *ri,
 return CP_ACCESS_OK;
 }
 
+static void gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+assert(GTIMER_SCALE);
+assert(ri->fieldoffset);
+
+if (cpreg_field_is_64bit(ri)) {
+CPREG_FIELD64(env, ri) = NANOSECONDS_PER_SECOND / GTIMER_SCALE;
+} else {
+CPREG_FIELD32(env, ri) = NANOSECONDS_PER_SECOND / GTIMER_SCALE;
+}
+}
+
 static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx,
 bool isread)
 {
@@ -1709,7 +1722,7 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
 }
 }
 
-static uint64_t gt_get_countervalue(CPUARMState *env)
+uint64_t gt_get_countervalue(CPUARMState *env)
 {
 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / GTIMER_SCALE;
 }
@@ -1996,7 +2009,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0,
   .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access,
   .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
-  .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE,
+  .resetfn = gt_cntfrq_reset,
 },
 /* overall control: mostly access permissions */
 { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH,
@@ -2187,7 +2200,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0,
   .type = ARM_CP_CONST, .access = PL0_R /* no PL1_RW in linux-user */,
   .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
-  .resetvalue = NANOSECONDS_PER_SECOND / GTIMER_SCALE,
+  .resetfn = gt_cntfrq_reset,
 },
 { .name = "CNTVCT_EL0", .state = ARM_CP_STATE_AA64,
   .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 2,
diff --git a/target/arm/internals.h b/target/arm/internals.h
index dc93577..b66a1fa 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -52,9 +52,13 @@ static inline bool excp_is_internal(int excp)
 }
 
 /* Scale factor for generic timers, ie number of ns per tick.
- * This gives a 62.5MHz timer.
+ * Calculated dynamically based on CNTFRQ with a default value
+ * that gives a 62.5MHZ timer.
  */
-#define GTIMER_SCALE 16
+#define GTIMER_SCALEsystem_clock_scale
+#define GTIMER_SCALE_DEF16
+
+uint64_t gt_get_countervalue(CPUARMState *);
 
 /* Bit definitions for the v7M CONTROL register */
 FIELD(V7M_CONTROL, NPRIV, 0, 1)
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index e0b8246..5d1c394 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -485,6 +485,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 set_feature(, ARM_FEATURE_NEON);
 set_feature(, ARM_FEATURE_AARCH64);
 set_feature(, ARM_FEATURE_PMU);
+set_feature(, ARM_FEATURE_GENERIC

[RFC QEMU 2/2] arm/virt: Account for guest pause time

2018-11-06 Thread Bijan Mottahedeh
Accumulate the total guest pause time and update the virtual counter
offset register accordingly in order to account for that time before
resuming the guest.

Signed-off-by: Bijan Mottahedeh 
---
 hw/intc/arm_gicv3_kvm.c | 39 +++
 target/arm/cpu.h|  3 +++
 2 files changed, 42 insertions(+)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 1e11200..aabd508 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -30,6 +30,7 @@
 #include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/blocker.h"
+#include "target/arm/internals.h"
 
 #ifdef DEBUG_GICV3_KVM
 #define DPRINTF(fmt, ...) \
@@ -745,9 +746,38 @@ static void vm_change_state_handler(void *opaque, int 
running,
 {
 GICv3State *s = (GICv3State *)opaque;
 Error *err = NULL;
+CPUState *cpu;
+CPUARMState *env;
+struct kvm_one_reg reg;
 int ret;
+uint64_t cnt;
 
 if (running) {
+CPU_FOREACH(cpu) {
+
+env = (CPUARMState *)(cpu->env_ptr);
+
+if (!env->pause_start) {
+continue;
+}
+
+/*
+ * Accumulate the total pause time and set the
+ * counter virtual offset accordingly.
+ */
+cnt = gt_get_countervalue(env);
+env->pause_total += (cnt - env->pause_start);
+env->cp15.cntvoff_el2 = cnt - env->pause_total;
+
+env->pause_start = 0;   /* clear for next pause */
+reg.id = KVM_REG_ARM_TIMER_CNT;
+reg.addr = (uintptr_t) >cp15.cntvoff_el2;
+ret = kvm_vcpu_ioctl(cpu, KVM_SET_ONE_REG, );
+if (ret) {
+error_report("Set virtual counter offset failed: %d", ret);
+abort();
+}
+}
 return;
 }
 
@@ -760,6 +790,15 @@ static void vm_change_state_handler(void *opaque, int 
running,
 if (ret < 0 && ret != -EFAULT) {
 abort();
 }
+
+CPU_FOREACH(cpu) {
+/*
+ * Record the current pause start time.
+ */
+env = (CPUARMState *)(cpu->env_ptr);
+cnt = gt_get_countervalue(env);
+env->pause_start = cnt;
+}
 }
 
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index e310ffc..bd0a56e 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -602,6 +602,9 @@ typedef struct CPUARMState {
 struct CPUBreakpoint *cpu_breakpoint[16];
 struct CPUWatchpoint *cpu_watchpoint[16];
 
+uint64_t pause_start;   /* start time of last pause */
+uint64_t pause_total;   /* total pause time */
+
 /* Fields up to this point are cleared by a CPU reset */
 struct {} end_reset_fields;
 
-- 
1.8.3.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: Timekeeping on ARM guests/hosts

2018-11-02 Thread Bijan Mottahedeh

On 11/2/2018 7:34 AM, Christoffer Dall wrote:

The key is whether the userspace program that controls the KVM VM
(kvmtool, QEMU, crosvm) uses the KVM_REG_ARM_TIMER_CNT ioctl to save the
VM view of virtual time, and to retore that at a later time.

KVM adjusts the CNTVOFF_EL2 for the VM on which the ioctl is executed to
represent the value written by userspace to the VM when the VM reads
CNTVCT_EL0.


The Arm ARM doesn't say a great deal about power saving modes so I
wouldn't be surprised if there's differing behaviour as to whether the
system clock is stopped during suspend modes. Indeed I wonder if the
clock can even go backwards during a suspend-to-disk/resume cycle? I
don't have hardware handy to test this.

The specific hardware I have in front of me is a Samsung Chromebook
Plus (board codename "kevin"), which I believe has an RK3399
processor. (More info at
https://www.chromium.org/chromium-os/developer-information-for-chrome-os-devices)


For the purpose of timekeeping in KVM, we need an architecturally
meaningful solution, not something specific to any device.


I've been working on a QEMU patch to address a problem observed when an 
active guest is paused and resumed after a certain delay with virsh or 
the QEMU monitor.


A simple test to reproduce the problem executes one or more instances of 
the following command in the guest:


dd if=/dev/zero of=/dev/null &

and then pauses and resumes the guest after a certain delay:

virsh suspend         # pauses the guest
sleep 120
virsh resume 

After the guest is resumed, there are soft lockup warning messages 
displayed on the console.


A comparison with x86 shows that hwclock and date values diverge after 
the above pause and resume sequence for x86 but remain the same for Arm.


The patch accumulates the total guest pause time in QEMU and adjusts the 
virtual offset counter accordingly with the KVM_REG_ARM_TIMER_CNT ioctl 
before the guest is resumed.  With the patch the time behavior is the 
same as x86 and the soft lockup messages go away.


I've tested the patch on an Ampere eMag server but I'm not sure how 
complete, generic, and backward compatible of a solution the patch is in 
terms of other Arm platforms.


Also, I'm not sure if and when this patch would be superseded by the 
proposal from your KVM Forum 2018 presentation:


Paravirtualized Time for Arm-based Systems
https://developer.arm.com/docs/den0057/a

Would it make sense to send the patch as an RFC for evaluation at this 
point or do you suggest any other considerations?


Thanks.

--bijan
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm