Re: [Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests

2015-06-16 Thread Boris Ostrovsky

On 06/16/2015 03:45 AM, Jan Beulich wrote:

On 15.06.15 at 19:17, boris.ostrov...@oracle.com wrote:

+for ( i = 0; i  num_counters; i++ )
+{
+if ( (ctrl_regs[i]  CTRL_RSVD_MASK) != ctrl_rsvd[i] )
+{
+/*
+ * Not necessary to re-init context since we should never load
+ * it until guest provides valid values. But just to be safe.
+ */
+amd_vpmu_init_regs(ctxt);
+return -EINVAL;
+}
+
+if ( is_pmu_enabled(ctrl_regs[i]) )
+num_enabled++;
+}
+
+if ( num_enabled )

Looks like a boolean flag would do - the exact count doesn't seem to
be of interest here or in later patches?

So the reason why I use a counter here is because keeping track of
VPMU_RUNNING state is currently broken on AMD, I noticed it when I was
updating this patch. amd_vpmu_do_wrmsr() will reset VPMU_RUNNING if the
MSR write is disabling current counter, even though there may still be
other counters running. This may be related to HVM brokenness of AMD
counters that I mentioned a while ago where the guest, when running with
multiple counters, sometimes gets unexpected NMIs. (This goes back all
the way to to 4.1.)

I don't want to fix this in this series but I will likely need to count
number of active counters when I do (just like I do for Intel).

I can use a boolean now though since I am not dealing with this problem
here.

If another rev is needed, I'd prefer if you did so. But if we can have
this version go in (provided we get all the necessary acks), I wouldn't
insist on you doing another round just because of this.


I think there are a couple of (fairly cosmetic) changes that need to be 
done so there will be another rev.


OTOH I just tried a quick-and-dirty fix for this problem and it doesn't 
resolve it so presumably there is more to this. It's not particularly 
invasive but I think it would be rather pointless to put it in as it 
still doesn't allow multiple counters on AMD and I suspect the final fix 
will be touching the same code again.


-boris

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests

2015-06-16 Thread Jan Beulich
 On 15.06.15 at 19:17, boris.ostrov...@oracle.com wrote:
 On 06/15/2015 11:50 AM, Jan Beulich wrote:
 On 10.06.15 at 17:04, boris.ostrov...@oracle.com wrote:
 +{
 +unsigned int num_enabled = 0;
 +struct xen_pmu_amd_ctxt *guest_ctxt = 
 vpmu-xenpmu_data-pmu.c.amd;
 +
 +ASSERT(!is_hvm_vcpu(v));
 +
 +ctxt = vpmu-context;
 +ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 +
 +memcpy(ctxt-regs[0], guest_ctxt-regs[0], regs_sz);
 So that's the live structure, not any staging area iiuc. What state
 is the guest going to be in when validation fails (and you can't
 restore the original state)? And what guarantees that nothing
 elsewhere in the hypervisor uses the data _before_ the
 validation below succeeds?
 
 
 We don't load this data into hardware until we have validated it. On 
 failed validation guest will receive hypercall error --- it's up to the 
 guest to decide what to do.
 
 The hypervisor will not use this data as it will still be flagged as 
 PMU_CACHED, i.e. invalid/stale. (That's why I say in the comment below 
 that re-initializing it is really not necessary)

Okay, thanks.

 +for ( i = 0; i  num_counters; i++ )
 +{
 +if ( (ctrl_regs[i]  CTRL_RSVD_MASK) != ctrl_rsvd[i] )
 +{
 +/*
 + * Not necessary to re-init context since we should never 
 load
 + * it until guest provides valid values. But just to be 
 safe.
 + */
 +amd_vpmu_init_regs(ctxt);
 +return -EINVAL;
 +}
 +
 +if ( is_pmu_enabled(ctrl_regs[i]) )
 +num_enabled++;
 +}
 +
 +if ( num_enabled )
 Looks like a boolean flag would do - the exact count doesn't seem to
 be of interest here or in later patches?
 
 So the reason why I use a counter here is because keeping track of 
 VPMU_RUNNING state is currently broken on AMD, I noticed it when I was 
 updating this patch. amd_vpmu_do_wrmsr() will reset VPMU_RUNNING if the 
 MSR write is disabling current counter, even though there may still be 
 other counters running. This may be related to HVM brokenness of AMD 
 counters that I mentioned a while ago where the guest, when running with 
 multiple counters, sometimes gets unexpected NMIs. (This goes back all 
 the way to to 4.1.)
 
 I don't want to fix this in this series but I will likely need to count 
 number of active counters when I do (just like I do for Intel).
 
 I can use a boolean now though since I am not dealing with this problem 
 here.

If another rev is needed, I'd prefer if you did so. But if we can have
this version go in (provided we get all the necessary acks), I wouldn't
insist on you doing another round just because of this.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests

2015-06-15 Thread Jan Beulich
 On 10.06.15 at 17:04, boris.ostrov...@oracle.com wrote:
 @@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v)
  }
  }
  
 -static void amd_vpmu_load(struct vcpu *v)
 +static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
  {
  struct vpmu_struct *vpmu = vcpu_vpmu(v);
 -struct xen_pmu_amd_ctxt *ctxt = vpmu-context;
 -uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 +struct xen_pmu_amd_ctxt *ctxt;
 +uint64_t *ctrl_regs;
 +unsigned int i;
  
  vpmu_reset(vpmu, VPMU_FROZEN);
  
 -if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
 +if ( !from_guest  vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
  {
 -unsigned int i;
 +ctxt = vpmu-context;
 +ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
  
  for ( i = 0; i  num_counters; i++ )
  wrmsrl(ctrls[i], ctrl_regs[i]);
  
 -return;
 +return 0;
 +}
 +
 +if ( from_guest )

Generally I dislike such redundancy (
if ( cond1  cond2 )
return;
if ( !cond1 )
...
which can be written without checking cond1 twice) - do you really
think it is beneficial for overall readability to have it that way?

 +{
 +unsigned int num_enabled = 0;
 +struct xen_pmu_amd_ctxt *guest_ctxt = vpmu-xenpmu_data-pmu.c.amd;
 +
 +ASSERT(!is_hvm_vcpu(v));
 +
 +ctxt = vpmu-context;
 +ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 +
 +memcpy(ctxt-regs[0], guest_ctxt-regs[0], regs_sz);

So that's the live structure, not any staging area iiuc. What state
is the guest going to be in when validation fails (and you can't
restore the original state)? And what guarantees that nothing
elsewhere in the hypervisor uses the data _before_ the
validation below succeeds?

 +for ( i = 0; i  num_counters; i++ )
 +{
 +if ( (ctrl_regs[i]  CTRL_RSVD_MASK) != ctrl_rsvd[i] )
 +{
 +/*
 + * Not necessary to re-init context since we should never 
 load
 + * it until guest provides valid values. But just to be safe.
 + */
 +amd_vpmu_init_regs(ctxt);
 +return -EINVAL;
 +}
 +
 +if ( is_pmu_enabled(ctrl_regs[i]) )
 +num_enabled++;
 +}
 +
 +if ( num_enabled )

Looks like a boolean flag would do - the exact count doesn't seem to
be of interest here or in later patches?

 @@ -246,22 +287,17 @@ static inline void context_save(struct vcpu *v)
  rdmsrl(counters[i], counter_regs[i]);
  }
  
 -static int amd_vpmu_save(struct vcpu *v)
 +static int amd_vpmu_save(struct vcpu *v,  bool_t to_guest)
  {
  struct vpmu_struct *vpmu = vcpu_vpmu(v);
  unsigned int i;
  
 -/*
 - * Stop the counters. If we came here via vpmu_save_force (i.e.
 - * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
 - */
 +for ( i = 0; i  num_counters; i++ )
 +wrmsrl(ctrls[i], 0);

Wouldn't it make sense to retain the first sentence of the comment?

 @@ -478,6 +523,13 @@ int svm_vpmu_initialise(struct vcpu *v)
  vpmu-context = ctxt;
  vpmu-priv_context = NULL;
  
 +if ( !is_hvm_vcpu(v) )
 +{
 +/* Copy register offsets to shared area */
 +ASSERT(vpmu-xenpmu_data);
 +memcpy(vpmu-xenpmu_data-pmu.c.amd, ctxt, sizeof(*ctxt));

At the first glance the comment looks as if it wasn't in line with
the sizeof() used - offsetof() would be more obvious here (or a
file scope constant like you use on the Intel side).

 @@ -552,6 +738,30 @@ long do_xenpmu_op(unsigned int op, 
 XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
  pvpmu_finish(current-domain, pmu_params);
  break;
  
 +case XENPMU_lvtpc_set:
 +xenpmu_data = current-arch.vpmu.xenpmu_data;
 +if ( xenpmu_data == NULL )
 +return -EINVAL;
 +vpmu_lvtpc_update(xenpmu_data-pmu.l.lapic_lvtpc);
 +break;
 +
 +case XENPMU_flush:
 +curr = current;
 +vpmu = vcpu_vpmu(curr);
 +xenpmu_data = curr-arch.vpmu.xenpmu_data;
 +if ( xenpmu_data == NULL )
 +return -EINVAL;
 +xenpmu_data-pmu.pmu_flags = ~PMU_CACHED;
 +vpmu_reset(vpmu, VPMU_CACHED);
 +vpmu_lvtpc_update(xenpmu_data-pmu.l.lapic_lvtpc);
 +if ( vpmu_load(curr, 1) )
 +{
 +xenpmu_data-pmu.pmu_flags |= PMU_CACHED;
 +vpmu_set(vpmu, VPMU_CACHED);
 +return -EIO;
 +}
 +break ;
 +
  default:
  ret = -EINVAL;

Considering how the default case gets handled, can't at least the 1st
and 3rd return-s above become ret = -E..., avoiding needlessly
many return points in the function?

Jan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests

2015-06-15 Thread Boris Ostrovsky

On 06/15/2015 11:50 AM, Jan Beulich wrote:

On 10.06.15 at 17:04, boris.ostrov...@oracle.com wrote:

@@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v)
  }
  }
  
-static void amd_vpmu_load(struct vcpu *v)

+static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
  {
  struct vpmu_struct *vpmu = vcpu_vpmu(v);
-struct xen_pmu_amd_ctxt *ctxt = vpmu-context;
-uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+struct xen_pmu_amd_ctxt *ctxt;
+uint64_t *ctrl_regs;
+unsigned int i;
  
  vpmu_reset(vpmu, VPMU_FROZEN);
  
-if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )

+if ( !from_guest  vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
  {
-unsigned int i;
+ctxt = vpmu-context;
+ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
  
  for ( i = 0; i  num_counters; i++ )

  wrmsrl(ctrls[i], ctrl_regs[i]);
  
-return;

+return 0;
+}
+
+if ( from_guest )

Generally I dislike such redundancy (
 if ( cond1  cond2 )
 return;
 if ( !cond1 )
 ...
which can be written without checking cond1 twice) - do you really
think it is beneficial for overall readability to have it that way?


I thought it was more readable as the first clause means that we are in 
a quick VPMU load from context switch (and so we do a return from it) 
while the second is a part of a full VPMU load.






+{
+unsigned int num_enabled = 0;
+struct xen_pmu_amd_ctxt *guest_ctxt = vpmu-xenpmu_data-pmu.c.amd;
+
+ASSERT(!is_hvm_vcpu(v));
+
+ctxt = vpmu-context;
+ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+memcpy(ctxt-regs[0], guest_ctxt-regs[0], regs_sz);

So that's the live structure, not any staging area iiuc. What state
is the guest going to be in when validation fails (and you can't
restore the original state)? And what guarantees that nothing
elsewhere in the hypervisor uses the data _before_ the
validation below succeeds?



We don't load this data into hardware until we have validated it. On 
failed validation guest will receive hypercall error --- it's up to the 
guest to decide what to do.


The hypervisor will not use this data as it will still be flagged as 
PMU_CACHED, i.e. invalid/stale. (That's why I say in the comment below 
that re-initializing it is really not necessary)






+for ( i = 0; i  num_counters; i++ )
+{
+if ( (ctrl_regs[i]  CTRL_RSVD_MASK) != ctrl_rsvd[i] )
+{
+/*
+ * Not necessary to re-init context since we should never load
+ * it until guest provides valid values. But just to be safe.
+ */
+amd_vpmu_init_regs(ctxt);
+return -EINVAL;
+}
+
+if ( is_pmu_enabled(ctrl_regs[i]) )
+num_enabled++;
+}
+
+if ( num_enabled )

Looks like a boolean flag would do - the exact count doesn't seem to
be of interest here or in later patches?


So the reason why I use a counter here is because keeping track of 
VPMU_RUNNING state is currently broken on AMD, I noticed it when I was 
updating this patch. amd_vpmu_do_wrmsr() will reset VPMU_RUNNING if the 
MSR write is disabling current counter, even though there may still be 
other counters running. This may be related to HVM brokenness of AMD 
counters that I mentioned a while ago where the guest, when running with 
multiple counters, sometimes gets unexpected NMIs. (This goes back all 
the way to to 4.1.)


I don't want to fix this in this series but I will likely need to count 
number of active counters when I do (just like I do for Intel).


I can use a boolean now though since I am not dealing with this problem 
here.



-boris

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests

2015-06-11 Thread Tian, Kevin
 From: Jan Beulich [mailto:jbeul...@suse.com]
 Sent: Thursday, June 11, 2015 5:33 PM
 
  On 11.06.15 at 10:38, kevin.t...@intel.com wrote:
   From: Boris Ostrovsky [mailto:boris.ostrov...@oracle.com]
  Sent: Wednesday, June 10, 2015 11:04 PM
 
  Add support for handling PMU interrupts for PV(H) guests.
 
  VPMU for the interrupted VCPU is unloaded until the guest issues
  XENPMU_flush
  hypercall. This allows the guest to access PMU MSR values that are stored 
  in
  VPMU context which is shared between hypervisor and domain, thus avoiding
  traps to hypervisor.
 
  Since the interrupt handler may now force VPMU context save (i.e. set
  VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
  until now expected this flag to be set only when the counters were stopped.
 
  Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com
  Acked-by: Daniel De Graaf dgde...@tycho.nsa.gov
 
  I may need more time to understand the whole interrupt stuff for PV(H)
  guest. But regarding to VMX specific changes I think they are clear:
 
  Signed-off-by: Kevin Tian kevin.t...@intel.com
 
 I don't think you really meant S-o-b here?
 

My bad when doing batch reviews.

Acked-by: Kevin Tian kevin.t...@intel.com

Thanks
Kevin

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests

2015-06-11 Thread Jan Beulich
 On 11.06.15 at 10:38, kevin.t...@intel.com wrote:
  From: Boris Ostrovsky [mailto:boris.ostrov...@oracle.com]
 Sent: Wednesday, June 10, 2015 11:04 PM
 
 Add support for handling PMU interrupts for PV(H) guests.
 
 VPMU for the interrupted VCPU is unloaded until the guest issues 
 XENPMU_flush
 hypercall. This allows the guest to access PMU MSR values that are stored in
 VPMU context which is shared between hypervisor and domain, thus avoiding
 traps to hypervisor.
 
 Since the interrupt handler may now force VPMU context save (i.e. set
 VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
 until now expected this flag to be set only when the counters were stopped.
 
 Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com
 Acked-by: Daniel De Graaf dgde...@tycho.nsa.gov
 
 I may need more time to understand the whole interrupt stuff for PV(H)
 guest. But regarding to VMX specific changes I think they are clear:
 
 Signed-off-by: Kevin Tian kevin.t...@intel.com

I don't think you really meant S-o-b here?

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests

2015-06-11 Thread Tian, Kevin
 From: Boris Ostrovsky [mailto:boris.ostrov...@oracle.com]
 Sent: Wednesday, June 10, 2015 11:04 PM
 
 Add support for handling PMU interrupts for PV(H) guests.
 
 VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
 hypercall. This allows the guest to access PMU MSR values that are stored in
 VPMU context which is shared between hypervisor and domain, thus avoiding
 traps to hypervisor.
 
 Since the interrupt handler may now force VPMU context save (i.e. set
 VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
 until now expected this flag to be set only when the counters were stopped.
 
 Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com
 Acked-by: Daniel De Graaf dgde...@tycho.nsa.gov

I may need more time to understand the whole interrupt stuff for PV(H)
guest. But regarding to VMX specific changes I think they are clear:

Signed-off-by: Kevin Tian kevin.t...@intel.com

 ---
 Changes in v24:
 * For both AMD and Intel copy guest's MSRs first into context and then verify
   it (to keep things as read-once by hypervisor)
 * To make sure that guest did not alter offsets to registers don't copy these
   values. Store them into shared area during VPMU initialization. Clarify in
   public header file that they are RO by the guest
 * Make vpmu_load return arch_vpmu_load()'s error code, not 1.
 
  xen/arch/x86/hvm/svm/vpmu.c   |  90 ++---
  xen/arch/x86/hvm/vmx/vpmu_core2.c | 108 ++-
  xen/arch/x86/hvm/vpmu.c   | 268
 +-
  xen/include/asm-x86/hvm/vpmu.h|  10 +-
  xen/include/public/arch-x86/pmu.h |  41 +-
  xen/include/public/pmu.h  |   2 +
  xen/include/xsm/dummy.h   |   4 +-
  xen/xsm/flask/hooks.c |   2 +
  8 files changed, 464 insertions(+), 61 deletions(-)
 
 diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
 index 934f1b7..b93d31d 100644
 --- a/xen/arch/x86/hvm/svm/vpmu.c
 +++ b/xen/arch/x86/hvm/svm/vpmu.c
 @@ -46,6 +46,9 @@ static const u32 __read_mostly *counters;
  static const u32 __read_mostly *ctrls;
  static bool_t __read_mostly k7_counters_mirrored;
 
 +/* Total size of PMU registers block (copied to/from PV(H) guest) */
 +static unsigned int __read_mostly regs_sz;
 +
  #define F10H_NUM_COUNTERS   4
  #define F15H_NUM_COUNTERS   6
  #define MAX_NUM_COUNTERSF15H_NUM_COUNTERS
 @@ -158,7 +161,7 @@ static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt 
 *ctxt)
  unsigned i;
  uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 
 -memset(ctxt-regs[0], 0, 2 * sizeof(uint64_t) * num_counters);
 +memset(ctxt-regs[0], 0, regs_sz);
  for ( i = 0; i  num_counters; i++ )
  ctrl_regs[i] = ctrl_rsvd[i];
  }
 @@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v)
  }
  }
 
 -static void amd_vpmu_load(struct vcpu *v)
 +static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
  {
  struct vpmu_struct *vpmu = vcpu_vpmu(v);
 -struct xen_pmu_amd_ctxt *ctxt = vpmu-context;
 -uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 +struct xen_pmu_amd_ctxt *ctxt;
 +uint64_t *ctrl_regs;
 +unsigned int i;
 
  vpmu_reset(vpmu, VPMU_FROZEN);
 
 -if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
 +if ( !from_guest  vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
  {
 -unsigned int i;
 +ctxt = vpmu-context;
 +ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 
  for ( i = 0; i  num_counters; i++ )
  wrmsrl(ctrls[i], ctrl_regs[i]);
 
 -return;
 +return 0;
 +}
 +
 +if ( from_guest )
 +{
 +unsigned int num_enabled = 0;
 +struct xen_pmu_amd_ctxt *guest_ctxt = vpmu-xenpmu_data-pmu.c.amd;
 +
 +ASSERT(!is_hvm_vcpu(v));
 +
 +ctxt = vpmu-context;
 +ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 +
 +memcpy(ctxt-regs[0], guest_ctxt-regs[0], regs_sz);
 +
 +for ( i = 0; i  num_counters; i++ )
 +{
 +if ( (ctrl_regs[i]  CTRL_RSVD_MASK) != ctrl_rsvd[i] )
 +{
 +/*
 + * Not necessary to re-init context since we should never 
 load
 + * it until guest provides valid values. But just to be safe.
 + */
 +amd_vpmu_init_regs(ctxt);
 +return -EINVAL;
 +}
 +
 +if ( is_pmu_enabled(ctrl_regs[i]) )
 +num_enabled++;
 +}
 +
 +if ( num_enabled )
 +vpmu_set(vpmu, VPMU_RUNNING);
 +else
 +vpmu_reset(vpmu, VPMU_RUNNING);
  }
 
  vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
 
  context_load(v);
 +
 +return 0;
  }
 
  static inline void context_save(struct vcpu *v)
 @@ -246,22 +287,17 @@ static inline void context_save(struct vcpu *v)
  rdmsrl(counters[i], counter_regs[i]);
  }
 
 -static int amd_vpmu_save(struct vcpu *v)
 +static int amd_vpmu_save(struct vcpu 

[Xen-devel] [PATCH v24 12/15] x86/VPMU: Handle PMU interrupts for PV(H) guests

2015-06-10 Thread Boris Ostrovsky
Add support for handling PMU interrupts for PV(H) guests.

VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
hypercall. This allows the guest to access PMU MSR values that are stored in
VPMU context which is shared between hypervisor and domain, thus avoiding
traps to hypervisor.

Since the interrupt handler may now force VPMU context save (i.e. set
VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
until now expected this flag to be set only when the counters were stopped.

Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com
Acked-by: Daniel De Graaf dgde...@tycho.nsa.gov
---
Changes in v24:
* For both AMD and Intel copy guest's MSRs first into context and then verify
  it (to keep things as read-once by hypervisor)
* To make sure that guest did not alter offsets to registers don't copy these
  values. Store them into shared area during VPMU initialization. Clarify in 
  public header file that they are RO by the guest
* Make vpmu_load return arch_vpmu_load()'s error code, not 1.

 xen/arch/x86/hvm/svm/vpmu.c   |  90 ++---
 xen/arch/x86/hvm/vmx/vpmu_core2.c | 108 ++-
 xen/arch/x86/hvm/vpmu.c   | 268 +-
 xen/include/asm-x86/hvm/vpmu.h|  10 +-
 xen/include/public/arch-x86/pmu.h |  41 +-
 xen/include/public/pmu.h  |   2 +
 xen/include/xsm/dummy.h   |   4 +-
 xen/xsm/flask/hooks.c |   2 +
 8 files changed, 464 insertions(+), 61 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 934f1b7..b93d31d 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -46,6 +46,9 @@ static const u32 __read_mostly *counters;
 static const u32 __read_mostly *ctrls;
 static bool_t __read_mostly k7_counters_mirrored;
 
+/* Total size of PMU registers block (copied to/from PV(H) guest) */
+static unsigned int __read_mostly regs_sz;
+
 #define F10H_NUM_COUNTERS   4
 #define F15H_NUM_COUNTERS   6
 #define MAX_NUM_COUNTERSF15H_NUM_COUNTERS
@@ -158,7 +161,7 @@ static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt 
*ctxt)
 unsigned i;
 uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 
-memset(ctxt-regs[0], 0, 2 * sizeof(uint64_t) * num_counters);
+memset(ctxt-regs[0], 0, regs_sz);
 for ( i = 0; i  num_counters; i++ )
 ctrl_regs[i] = ctrl_rsvd[i];
 }
@@ -211,27 +214,65 @@ static inline void context_load(struct vcpu *v)
 }
 }
 
-static void amd_vpmu_load(struct vcpu *v)
+static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
 {
 struct vpmu_struct *vpmu = vcpu_vpmu(v);
-struct xen_pmu_amd_ctxt *ctxt = vpmu-context;
-uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+struct xen_pmu_amd_ctxt *ctxt;
+uint64_t *ctrl_regs;
+unsigned int i;
 
 vpmu_reset(vpmu, VPMU_FROZEN);
 
-if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+if ( !from_guest  vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
 {
-unsigned int i;
+ctxt = vpmu-context;
+ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
 
 for ( i = 0; i  num_counters; i++ )
 wrmsrl(ctrls[i], ctrl_regs[i]);
 
-return;
+return 0;
+}
+
+if ( from_guest )
+{
+unsigned int num_enabled = 0;
+struct xen_pmu_amd_ctxt *guest_ctxt = vpmu-xenpmu_data-pmu.c.amd;
+
+ASSERT(!is_hvm_vcpu(v));
+
+ctxt = vpmu-context;
+ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+memcpy(ctxt-regs[0], guest_ctxt-regs[0], regs_sz);
+
+for ( i = 0; i  num_counters; i++ )
+{
+if ( (ctrl_regs[i]  CTRL_RSVD_MASK) != ctrl_rsvd[i] )
+{
+/*
+ * Not necessary to re-init context since we should never load
+ * it until guest provides valid values. But just to be safe.
+ */
+amd_vpmu_init_regs(ctxt);
+return -EINVAL;
+}
+
+if ( is_pmu_enabled(ctrl_regs[i]) )
+num_enabled++;
+}
+
+if ( num_enabled )
+vpmu_set(vpmu, VPMU_RUNNING);
+else
+vpmu_reset(vpmu, VPMU_RUNNING);
 }
 
 vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
 
 context_load(v);
+
+return 0;
 }
 
 static inline void context_save(struct vcpu *v)
@@ -246,22 +287,17 @@ static inline void context_save(struct vcpu *v)
 rdmsrl(counters[i], counter_regs[i]);
 }
 
-static int amd_vpmu_save(struct vcpu *v)
+static int amd_vpmu_save(struct vcpu *v,  bool_t to_guest)
 {
 struct vpmu_struct *vpmu = vcpu_vpmu(v);
 unsigned int i;
 
-/*
- * Stop the counters. If we came here via vpmu_save_force (i.e.
- * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
- */
+for ( i = 0; i  num_counters; i++ )
+wrmsrl(ctrls[i], 0);
+
 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
 {
 vpmu_set(vpmu, VPMU_FROZEN);