On 18/05/16 18:14, Juergen Gross wrote:
> On 18/05/16 18:09, Tony S wrote:
>> On Wed, May 18, 2016 at 8:57 AM, Dario Faggioli
>> <dario.faggi...@citrix.com> wrote:
>>> On Wed, 2016-05-18 at 14:24 +0200, Juergen Gross wrote:
>>>> On 17/05/16 11:33, George Dunlap wrote:
>>>>>> Looks like CONFIG_PARAVIRT_TIME_ACCOUNTING is used for adjusting
>>>>>> process
>>>>>> times. KVM uses it but Xen doesn't.
>>>>> Is someone on the Linux side going to put this on their to-do list
>>>>> then? :-)
>>>>
>>>> Patch sent.
>>>>
>>> Yep, seen it, thanks.
>>>
>>>> Support was already existing for arm.
>>>>
>>> Yes!! I remember Stefano talking about introducing it, and that was
>>> also why I thought we had it already since long time on x86.
>>>
>>> Well, anyway... :-)
>>>
>>>> What is missing is support for
>>>> paravirt_steal_rq_enabled which requires to be able to read the
>>>> stolen
>>>> time of another cpu. This can't work today as accessing another cpu's
>>>> vcpu_runstate_info isn't possible without risking inconsistent data.
>>>> I plan to add support for this, too, but this will require adding
>>>> another hypercall to map a modified vcpu_runstate_info containing an
>>>> indicator for an ongoing update of the data.
>>>>
>>> Understood.
>>>
>>> So, Tony, up for trying again your workload with this patch applied to
>>> Linux?
>>>
>>> Most likely, it _won't_ fix all the problems you're seeing, but I'm
>>> curious to see if it helps.
>>
>> Hi Dario,
>>
>> I did not see the patch. Can you please send me the patch and I will
>> try to test it later.
> 
> Here is an updated version.

Tony, would you be interested to test a complete solution?

This would require to use a Xen 4.7 hypervisor with some patches applied
and some patches for the Linux kernel (I've done some basic tests with
kernel 4.6). I've attached the patches in case you want to try them. :-)
You should set CONFIG_PARAVIRT_TIME_ACCOUNTING=y in the kernel .config


Juergen

From 689b4ba8c13be73ed51e485a7f7baea593d0ce6e Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Tue, 17 May 2016 14:03:02 +0200
Subject: [PATCH v4] xen: add steal_clock support on x86

The pv_time_ops structure contains a function pointer for the
"steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86
uses its own mechanism to account for the "stolen" time a thread wasn't
able to run due to hypervisor scheduling.

Add support in Xen arch independent time handling for this feature by
moving it out of the arm arch into drivers/xen and remove the x86 Xen
hack.

Signed-off-by: Juergen Gross <jgr...@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrov...@oracle.com>
---
V4: minor adjustments as requested by Stefano Stabellini (remove
    no longer needed #include, remove __init from header)
V3: add #include <asm/paravirt.h> to avoid build error on arm
V2: remove the x86 do_stolen_accounting() hack
---
 arch/arm/xen/enlighten.c    | 18 ++----------------
 arch/x86/xen/time.c         | 44 ++------------------------------------------
 drivers/xen/time.c          | 20 ++++++++++++++++++++
 include/linux/kernel_stat.h |  1 -
 include/xen/xen-ops.h       |  1 +
 kernel/sched/cputime.c      | 10 ----------
 6 files changed, 25 insertions(+), 69 deletions(-)

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 75cd734..71db30c 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -12,7 +12,6 @@
 #include <xen/page.h>
 #include <xen/interface/sched.h>
 #include <xen/xen-ops.h>
-#include <asm/paravirt.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 #include <asm/system_misc.h>
@@ -84,19 +83,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
 
-static unsigned long long xen_stolen_accounting(int cpu)
-{
-       struct vcpu_runstate_info state;
-
-       BUG_ON(cpu != smp_processor_id());
-
-       xen_get_runstate_snapshot(&state);
-
-       WARN_ON(state.state != RUNSTATE_running);
-
-       return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
-}
-
 static void xen_read_wallclock(struct timespec64 *ts)
 {
        u32 version;
@@ -355,8 +341,8 @@ static int __init xen_guest_init(void)
 
        register_cpu_notifier(&xen_cpu_notifier);
 
-       pv_time_ops.steal_clock = xen_stolen_accounting;
-       static_key_slow_inc(&paravirt_steal_enabled);
+       xen_time_setup_guest();
+
        if (xen_initial_domain())
                pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a0a4e55..6be31df 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -11,8 +11,6 @@
 #include <linux/interrupt.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
-#include <linux/kernel_stat.h>
-#include <linux/math64.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/pvclock_gtod.h>
@@ -31,44 +29,6 @@
 
 /* Xen may fire a timer up to this many ns early */
 #define TIMER_SLOP     100000
-#define NS_PER_TICK    (1000000000LL / HZ)
-
-/* snapshots of runstate info */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
-
-/* unused ns of stolen time */
-static DEFINE_PER_CPU(u64, xen_residual_stolen);
-
-static void do_stolen_accounting(void)
-{
-       struct vcpu_runstate_info state;
-       struct vcpu_runstate_info *snap;
-       s64 runnable, offline, stolen;
-       cputime_t ticks;
-
-       xen_get_runstate_snapshot(&state);
-
-       WARN_ON(state.state != RUNSTATE_running);
-
-       snap = this_cpu_ptr(&xen_runstate_snapshot);
-
-       /* work out how much time the VCPU has not been runn*ing*  */
-       runnable = state.time[RUNSTATE_runnable] - 
snap->time[RUNSTATE_runnable];
-       offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
-
-       *snap = state;
-
-       /* Add the appropriate number of ticks of stolen time,
-          including any left-overs from last time. */
-       stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
-
-       if (stolen < 0)
-               stolen = 0;
-
-       ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
-       __this_cpu_write(xen_residual_stolen, stolen);
-       account_steal_ticks(ticks);
-}
 
 /* Get the TSC speed from Xen */
 static unsigned long xen_tsc_khz(void)
@@ -335,8 +295,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void 
*dev_id)
                ret = IRQ_HANDLED;
        }
 
-       do_stolen_accounting();
-
        return ret;
 }
 
@@ -431,6 +389,8 @@ static void __init xen_time_init(void)
        xen_setup_timer(cpu);
        xen_setup_cpu_clockevents();
 
+       xen_time_setup_guest();
+
        if (xen_initial_domain())
                pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 }
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 7107842..2257b66 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -6,6 +6,7 @@
 #include <linux/math64.h>
 #include <linux/gfp.h>
 
+#include <asm/paravirt.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
@@ -75,6 +76,15 @@ bool xen_vcpu_stolen(int vcpu)
        return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
 }
 
+static u64 xen_steal_clock(int cpu)
+{
+       struct vcpu_runstate_info state;
+
+       BUG_ON(cpu != smp_processor_id());
+       xen_get_runstate_snapshot(&state);
+       return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
+}
+
 void xen_setup_runstate_info(int cpu)
 {
        struct vcpu_register_runstate_memory_area area;
@@ -86,3 +96,13 @@ void xen_setup_runstate_info(int cpu)
                BUG();
 }
 
+void __init xen_time_setup_guest(void)
+{
+       pv_time_ops.steal_clock = xen_steal_clock;
+
+       static_key_slow_inc(&paravirt_steal_enabled);
+       /*
+        * We can't set paravirt_steal_rq_enabled as this would require the
+        * capability to read another cpu's runstate info.
+        */
+}
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 25a822f..44fda64 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct 
*tsk, int user)
 extern void account_process_tick(struct task_struct *, int user);
 #endif
 
-extern void account_steal_ticks(unsigned long ticks);
 extern void account_idle_ticks(unsigned long ticks);
 
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 86abe07..77bf9d1 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block 
*nb);
 
 bool xen_vcpu_stolen(int vcpu);
 void xen_setup_runstate_info(int cpu);
+void xen_time_setup_guest(void);
 void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
 
 int xen_setup_shutdown_event(void);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 75f98c5..8c4c6dc 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -490,16 +490,6 @@ void account_process_tick(struct task_struct *p, int 
user_tick)
 }
 
 /*
- * Account multiple ticks of steal time.
- * @p: the process from which the cpu time has been stolen
- * @ticks: number of stolen ticks
- */
-void account_steal_ticks(unsigned long ticks)
-{
-       account_steal_time(jiffies_to_cputime(ticks));
-}
-
-/*
  * Account multiple ticks of idle time.
  * @ticks: number of stolen ticks
  */
-- 
2.6.6

From 4073bb301aed18981ec69c3cf5f0df4fae567d7c Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Fri, 20 May 2016 09:32:30 +0200
Subject: [PATCH 1/3] xen: update xen headers

Update some Xen headers to be able to use new functionality.

Signed-off-by: Juergen Gross <jgr...@suse.com>
---
 include/xen/interface/vcpu.h | 24 +++++++++++++++---------
 include/xen/interface/xen.h  | 17 ++++++++++++++++-
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index b05288c..98188c8 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -75,15 +75,21 @@
  */
 #define VCPUOP_get_runstate_info        4
 struct vcpu_runstate_info {
-               /* VCPU's current state (RUNSTATE_*). */
-               int              state;
-               /* When was current state entered (system time, ns)? */
-               uint64_t state_entry_time;
-               /*
-                * Time spent in each RUNSTATE_* (ns). The sum of these times is
-                * guaranteed not to drift from system time.
-                */
-               uint64_t time[4];
+       /* VCPU's current state (RUNSTATE_*). */
+       int              state;
+       /* When was current state entered (system time, ns)? */
+       uint64_t state_entry_time;
+       /*
+        * Update indicator set in state_entry_time:
+        * When activated via VMASST_TYPE_runstate_update_flag, set during
+        * updates in guest memory mapped copy of vcpu_runstate_info.
+        */
+#define XEN_RUNSTATE_UPDATE    (1ULL << 63)
+       /*
+        * Time spent in each RUNSTATE_* (ns). The sum of these times is
+        * guaranteed not to drift from system time.
+        */
+       uint64_t time[4];
 };
 DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info);
 
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index d133112..1b0d189 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -413,7 +413,22 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
 /* x86/PAE guests: support PDPTs above 4GB. */
 #define VMASST_TYPE_pae_extended_cr3     3
 
-#define MAX_VMASST_TYPE 3
+/*
+ * x86 guests: Sane behaviour for virtual iopl
+ *  - virtual iopl updated from do_iret() hypercalls.
+ *  - virtual iopl reported in bounce frames.
+ *  - guest kernels assumed to be level 0 for the purpose of iopl checks.
+ */
+#define VMASST_TYPE_architectural_iopl   4
+
+/*
+ * All guests: activate update indicator in vcpu_runstate_info
+ * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped
+ * vcpu_runstate_info during updates of the runstate information.
+ */
+#define VMASST_TYPE_runstate_update_flag 5
+
+#define MAX_VMASST_TYPE 5
 
 #ifndef __ASSEMBLY__
 
-- 
2.6.6

From ab457b88c03a66c6051ac022b51bc5c218f48842 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Fri, 20 May 2016 12:08:21 +0200
Subject: [PATCH 2/3] arm/xen: add support for vm_assist hypercall

Add support for the Xen HYPERVISOR_vm_assist hypercall.

Signed-off-by: Juergen Gross <jgr...@suse.com>
---
 arch/arm/include/asm/xen/hypercall.h | 1 +
 arch/arm/xen/enlighten.c             | 1 +
 arch/arm/xen/hypercall.S             | 1 +
 arch/arm64/xen/hypercall.S           | 1 +
 4 files changed, 4 insertions(+)

diff --git a/arch/arm/include/asm/xen/hypercall.h 
b/arch/arm/include/asm/xen/hypercall.h
index b6b962d..9d874db 100644
--- a/arch/arm/include/asm/xen/hypercall.h
+++ b/arch/arm/include/asm/xen/hypercall.h
@@ -52,6 +52,7 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
 int HYPERVISOR_physdev_op(int cmd, void *arg);
 int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
 int HYPERVISOR_tmem_op(void *arg);
+int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type);
 int HYPERVISOR_platform_op_raw(void *arg);
 static inline int HYPERVISOR_platform_op(struct xen_platform_op *op)
 {
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 71db30c..0f3aa12 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -389,4 +389,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
+EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist);
 EXPORT_SYMBOL_GPL(privcmd_call);
diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S
index 9a36f4f..a648dfc 100644
--- a/arch/arm/xen/hypercall.S
+++ b/arch/arm/xen/hypercall.S
@@ -91,6 +91,7 @@ HYPERCALL3(vcpu_op);
 HYPERCALL1(tmem_op);
 HYPERCALL1(platform_op_raw);
 HYPERCALL2(multicall);
+HYPERCALL2(vm_assist);
 
 ENTRY(privcmd_call)
        stmdb sp!, {r4}
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 70df80e..329c802 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -82,6 +82,7 @@ HYPERCALL3(vcpu_op);
 HYPERCALL1(tmem_op);
 HYPERCALL1(platform_op_raw);
 HYPERCALL2(multicall);
+HYPERCALL2(vm_assist);
 
 ENTRY(privcmd_call)
        mov x16, x0
-- 
2.6.6

From f27da1aba6c9c92add4f88b4dcec517e5e321caa Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Fri, 20 May 2016 12:25:58 +0200
Subject: [PATCH 3/3] xen: support runqueue steal time on xen

Up to now reading the stolen time of a remote cpu was not possible in a
performant way under Xen. This made support of runqueue steal time via
paravirt_steal_rq_enabled impossible.

With the addition of an appropriate hypervisor interface this is now
possible, so add the support.

Signed-off-by: Juergen Gross <jgr...@suse.com>
---
 drivers/xen/time.c | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 2257b66..04b6cb7 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -19,6 +19,9 @@
 /* runstate info updated by Xen */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
 
+/* runstate info of remote cpu accessible */
+static bool xen_runstate_remote;
+
 /* return an consistent snapshot of 64-bit time/counter value */
 static u64 get64(const u64 *p)
 {
@@ -47,27 +50,31 @@ static u64 get64(const u64 *p)
        return ret;
 }
 
-/*
- * Runstate accounting
- */
-void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
+static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res,
+                                          unsigned cpu)
 {
        u64 state_time;
        struct vcpu_runstate_info *state;
 
        BUG_ON(preemptible());
 
-       state = this_cpu_ptr(&xen_runstate);
+       state = per_cpu_ptr(&xen_runstate, cpu);
 
-       /*
-        * The runstate info is always updated by the hypervisor on
-        * the current CPU, so there's no need to use anything
-        * stronger than a compiler barrier when fetching it.
-        */
        do {
                state_time = get64(&state->state_entry_time);
+               rmb();
                *res = READ_ONCE(*state);
-       } while (get64(&state->state_entry_time) != state_time);
+               rmb();
+       } while (get64(&state->state_entry_time) != state_time ||
+                (state_time & XEN_RUNSTATE_UPDATE));
+}
+
+/*
+ * Runstate accounting
+ */
+void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
+{
+       xen_get_runstate_snapshot_cpu(res, smp_processor_id());
 }
 
 /* return true when a vcpu could run but has no real cpu to run on */
@@ -80,8 +87,8 @@ static u64 xen_steal_clock(int cpu)
 {
        struct vcpu_runstate_info state;
 
-       BUG_ON(cpu != smp_processor_id());
-       xen_get_runstate_snapshot(&state);
+       BUG_ON(!xen_runstate_remote && cpu != smp_processor_id());
+       xen_get_runstate_snapshot_cpu(&state, cpu);
        return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
 }
 
@@ -98,11 +105,12 @@ void xen_setup_runstate_info(int cpu)
 
 void __init xen_time_setup_guest(void)
 {
+       xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable,
+                                       VMASST_TYPE_runstate_update_flag);
+
        pv_time_ops.steal_clock = xen_steal_clock;
 
        static_key_slow_inc(&paravirt_steal_enabled);
-       /*
-        * We can't set paravirt_steal_rq_enabled as this would require the
-        * capability to read another cpu's runstate info.
-        */
+       if (xen_runstate_remote)
+               static_key_slow_inc(&paravirt_steal_rq_enabled);
 }
-- 
2.6.6

From 39c8fea3b4fdcf634e4858748f77fc4ee2ddc4cd Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Thu, 19 May 2016 18:19:42 +0200
Subject: [PATCH 1/2] xen/arm: add support for vm_assist hypercall

Up to now the vm_assist hypercall hasn't been supported on ARM, as
there are only x86 specific features to switch. Add support of
vm_assist on ARM for future use.

Signed-off-by: Juergen Gross <jgr...@suse.com>
---
 xen/arch/arm/traps.c         | 1 +
 xen/common/domain.c          | 2 --
 xen/common/kernel.c          | 2 --
 xen/include/asm-arm/config.h | 2 ++
 4 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 1828ea1..ccc6351 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -1284,6 +1284,7 @@ static arm_hypercall_t arm_hypercall_table[] = {
     HYPERCALL(multicall, 2),
     HYPERCALL(platform_op, 1),
     HYPERCALL_ARM(vcpu_op, 3),
+    HYPERCALL(vm_assist, 2),
 };
 
 #ifndef NDEBUG
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 45273d4..0afb1ee 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -1408,7 +1408,6 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, 
XEN_GUEST_HANDLE_PARAM(void) arg)
     return rc;
 }
 
-#ifdef VM_ASSIST_VALID
 long vm_assist(struct domain *p, unsigned int cmd, unsigned int type,
                unsigned long valid)
 {
@@ -1427,7 +1426,6 @@ long vm_assist(struct domain *p, unsigned int cmd, 
unsigned int type,
 
     return -ENOSYS;
 }
-#endif
 
 struct pirq *pirq_get_info(struct domain *d, int pirq)
 {
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index 1a6823a..74b6e1f 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -441,12 +441,10 @@ DO(nmi_op)(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) 
arg)
     return rc;
 }
 
-#ifdef VM_ASSIST_VALID
 DO(vm_assist)(unsigned int cmd, unsigned int type)
 {
     return vm_assist(current->domain, cmd, type, VM_ASSIST_VALID);
 }
-#endif
 
 DO(ni_hypercall)(void)
 {
diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
index 2d11b62..563f49b 100644
--- a/xen/include/asm-arm/config.h
+++ b/xen/include/asm-arm/config.h
@@ -199,6 +199,8 @@ extern unsigned long frametable_virt_end;
 #define watchdog_disable() ((void)0)
 #define watchdog_enable()  ((void)0)
 
+#define VM_ASSIST_VALID          (0)
+
 #endif /* __ARM_CONFIG_H__ */
 /*
  * Local variables:
-- 
2.6.6

From f1b30599d330fcb4086b92125cc36cfc1c95fe63 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgr...@suse.com>
Date: Fri, 20 May 2016 06:32:12 +0200
Subject: [PATCH 2/2] xen: add update indicator to vcpu_runstate_info

In order to support reading another vcpu's mapped vcpu_runstate_info
an indicator for an occurring update of the runstate information is
needed.

Add the possibility to activate setting this indicator in the highest
bit of state_entry_time via a vm_assist hypercall. When activated the
update indicator will be set before the runstate information is
modified in guest memory and it will be reset after modification is
done. As state_entry_time is guaranteed to be different after each
update the guest can detect any update (either in progress or while
reading the runstate data) by comparing state_entry_time before and
after reading runstate data: in case the values differ or the update
indicator was set the data might be inconsistent and should be reread.

Signed-off-by: Juergen Gross <jgr...@suse.com>
---
 xen/arch/arm/domain.c        | 22 ++++++++++++++++++++++
 xen/arch/x86/domain.c        | 31 +++++++++++++++++++++++++++++++
 xen/include/asm-arm/config.h |  2 +-
 xen/include/asm-x86/config.h |  1 +
 xen/include/public/vcpu.h    |  6 ++++++
 xen/include/public/xen.h     |  7 +++++++
 6 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 1365b4a..91f256b 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -239,10 +239,32 @@ static void ctxt_switch_to(struct vcpu *n)
 /* Update per-VCPU guest runstate shared memory area (if registered). */
 static void update_runstate_area(struct vcpu *v)
 {
+    bool_t update_flag;
+    void __user *guest_handle = NULL;
+    unsigned off = 0;
+
     if ( guest_handle_is_null(runstate_guest(v)) )
         return;
 
+    update_flag = VM_ASSIST(v->domain, runstate_update_flag);
+    if ( update_flag )
+    {
+        off = offsetof(struct vcpu_runstate_info, state_entry_time) + 7;
+        guest_handle = v->runstate_guest.p;
+        guest_handle += off;
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+        __raw_copy_to_guest(guest_handle, (void *)&v->runstate + off, 1);
+        wmb();
+    }
+
     __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+
+    if ( update_flag )
+    {
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        wmb();
+        __raw_copy_to_guest(guest_handle, (void *)&v->runstate + off, 1);
+    }
 }
 
 static void schedule_tail(struct vcpu *prev)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 5af2cc5..dfaee5d 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1925,13 +1925,37 @@ static void paravirt_ctxt_switch_to(struct vcpu *v)
 bool_t update_runstate_area(struct vcpu *v)
 {
     bool_t rc;
+    bool_t update_flag;
     smap_check_policy_t smap_policy;
+    void __user *guest_handle = NULL;
+    unsigned off = 0;
 
     if ( guest_handle_is_null(runstate_guest(v)) )
         return 1;
 
+    update_flag = VM_ASSIST(v->domain, runstate_update_flag);
+
     smap_policy = smap_policy_change(v, SMAP_CHECK_ENABLED);
 
+    if ( update_flag )
+    {
+        off = offsetof(struct vcpu_runstate_info, state_entry_time) + 7;
+        if ( has_32bit_shinfo(v->domain) )
+        {
+            guest_handle = v->runstate_guest.compat.p;
+            guest_handle += offsetof(struct compat_vcpu_runstate_info,
+                                     state_entry_time) + 7;
+        }
+        else
+        {
+            guest_handle = v->runstate_guest.native.p;
+            guest_handle += off;
+        }
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+        __raw_copy_to_guest(guest_handle, (void *)&v->runstate + off, 1);
+        wmb();
+    }
+
     if ( has_32bit_shinfo(v->domain) )
     {
         struct compat_vcpu_runstate_info info;
@@ -1944,6 +1968,13 @@ bool_t update_runstate_area(struct vcpu *v)
         rc = __copy_to_guest(runstate_guest(v), &v->runstate, 1) !=
              sizeof(v->runstate);
 
+    if ( update_flag )
+    {
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        wmb();
+        __raw_copy_to_guest(guest_handle, (void *)&v->runstate + off, 1);
+    }
+
     smap_policy_change(v, smap_policy);
 
     return rc;
diff --git a/xen/include/asm-arm/config.h b/xen/include/asm-arm/config.h
index 563f49b..ce3edc2 100644
--- a/xen/include/asm-arm/config.h
+++ b/xen/include/asm-arm/config.h
@@ -199,7 +199,7 @@ extern unsigned long frametable_virt_end;
 #define watchdog_disable() ((void)0)
 #define watchdog_enable()  ((void)0)
 
-#define VM_ASSIST_VALID          (0)
+#define VM_ASSIST_VALID          (1UL << VMASST_TYPE_runstate_update_flag)
 
 #endif /* __ARM_CONFIG_H__ */
 /*
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index c10129d..6fd84e7 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -332,6 +332,7 @@ extern unsigned long xen_phys_start;
                                   (1UL << VMASST_TYPE_writable_pagetables) | \
                                   (1UL << VMASST_TYPE_pae_extended_cr3)    | \
                                   (1UL << VMASST_TYPE_architectural_iopl)  | \
+                                  (1UL << VMASST_TYPE_runstate_update_flag)| \
                                   (1UL << VMASST_TYPE_m2p_strict))
 #define VM_ASSIST_VALID          NATIVE_VM_ASSIST_VALID
 #define COMPAT_VM_ASSIST_VALID   (NATIVE_VM_ASSIST_VALID & \
diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
index 692b87a..2aa230d 100644
--- a/xen/include/public/vcpu.h
+++ b/xen/include/public/vcpu.h
@@ -84,6 +84,12 @@ struct vcpu_runstate_info {
     /* When was current state entered (system time, ns)? */
     uint64_t state_entry_time;
     /*
+     * Update indicator set in state_entry_time:
+     * When activated via VMASST_TYPE_runstate_update_flag, set during
+     * updates in guest memory mapped copy of vcpu_runstate_info.
+     */
+#define XEN_RUNSTATE_UPDATE          (1ULL << 63)
+    /*
      * Time spent in each RUNSTATE_* (ns). The sum of these times is
      * guaranteed not to drift from system time.
      */
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 37bbb22..b9e5e0f 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -510,6 +510,13 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
 #define VMASST_TYPE_architectural_iopl   4
 
 /*
+ * All guests: activate update indicator in vcpu_runstate_info
+ * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped
+ * vcpu_runstate_info during updates of the runstate information.
+ */
+#define VMASST_TYPE_runstate_update_flag 5
+
+/*
  * x86/64 guests: strictly hide M2P from user mode.
  * This allows the guest to control respective hypervisor behavior:
  * - when not set, L4 tables get created with the respective slot blank,
-- 
2.6.6

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Reply via email to