Re: [Xen-devel] [PATCH v18 05/16] x86/VPMU: Interface for setting PMU mode and flags

2015-02-20 Thread Boris Ostrovsky


On 02/20/2015 11:23 AM, Jan Beulich wrote:

On 20.02.15 at 17:04,  wrote:

On 02/20/2015 08:59 AM, Jan Beulich wrote:

On 16.02.15 at 23:26,  wrote:

--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -253,6 +253,26 @@ static int amd_vpmu_save(struct vpmu_struct *vpmu)
   return 1;
   }
   
+static void amd_vpmu_unload(struct vpmu_struct *vpmu)

+{
+struct vcpu *v;
+
+if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED | VPMU_FROZEN) )
+{
+unsigned int i;
+
+for ( i = 0; i < num_counters; i++ )
+wrmsrl(ctrls[i], 0);
+context_save(vpmu);

This assumes vpmu_vcpu(vpmu) == current, yet it looks like you're
also calling it under different circumstances now.

This doesn't make this assumption. PMU MSRs may be loaded with values
that belong to a VCPU that is not currently running if, for example,
current VCPU is not using PMU. And flags test guarantees that we won't
stomp on someone else's registers.

I could make a test here and write zeroes to the MSRs only if vpmu is
current but I don't like leaving these MSRs with what is essentially
garbage values. This routine is executed very rarely so overhead is
negligible.

The question wasn't on the wrmsr() - writing zero here
unconditionally is fine - but on the actions context_save() takes.


Same reasoning for context_save() (which is a bunch of RDMSRs) --- if 
flags indicate that the context is LOADED|FROZEN then contents of those 
MSRs belong to this vpmu, regardless of whether it is current.





--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1183,11 +1183,10 @@ int vmx_read_guest_msr(u32 msr, u64 *val)
   return -ESRCH;
   }
   
-int vmx_write_guest_msr(u32 msr, u64 val)

+int vmx_write_guest_msr_vcpu(struct vcpu *v, u32 msr, u64 val)
   {
-struct vcpu *curr = current;
-unsigned int i, msr_count = curr->arch.hvm_vmx.msr_count;
-struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
+unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
   
   for ( i = 0; i < msr_count; i++ )

   {

Same here - while the code itself is only accessing memory, it
remains unclear whether correct behavior results when the subject
vCPU is actually executing.

Not sure what you mean here. The changes are specifically for updating
MSR values (cached in VMCS) of possibly non-current VCPU.

But non-current doesn't mean not-running. I.e. what if the vCPU
you deal with here is active on another pCPU?


I see. With VPMU (which is the only place that calls this routine) this 
will never happen but the interface itself allows it. I'll add an ASSERT 
or some other test to make sure.



-boris

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v18 05/16] x86/VPMU: Interface for setting PMU mode and flags

2015-02-20 Thread Jan Beulich
>>> On 20.02.15 at 17:04,  wrote:
> On 02/20/2015 08:59 AM, Jan Beulich wrote:
> On 16.02.15 at 23:26,  wrote:
>>> --- a/xen/arch/x86/hvm/svm/vpmu.c
>>> +++ b/xen/arch/x86/hvm/svm/vpmu.c
>>> @@ -253,6 +253,26 @@ static int amd_vpmu_save(struct vpmu_struct *vpmu)
>>>   return 1;
>>>   }
>>>   
>>> +static void amd_vpmu_unload(struct vpmu_struct *vpmu)
>>> +{
>>> +struct vcpu *v;
>>> +
>>> +if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED | VPMU_FROZEN) )
>>> +{
>>> +unsigned int i;
>>> +
>>> +for ( i = 0; i < num_counters; i++ )
>>> +wrmsrl(ctrls[i], 0);
>>> +context_save(vpmu);
>> This assumes vpmu_vcpu(vpmu) == current, yet it looks like you're
>> also calling it under different circumstances now.
> 
> This doesn't make this assumption. PMU MSRs may be loaded with values 
> that belong to a VCPU that is not currently running if, for example, 
> current VCPU is not using PMU. And flags test guarantees that we won't 
> stomp on someone else's registers.
> 
> I could make a test here and write zeroes to the MSRs only if vpmu is 
> current but I don't like leaving these MSRs with what is essentially 
> garbage values. This routine is executed very rarely so overhead is 
> negligible.

The question wasn't on the wrmsr() - writing zero here
unconditionally is fine - but on the actions context_save() takes.

>>> --- a/xen/arch/x86/hvm/vmx/vmcs.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
>>> @@ -1183,11 +1183,10 @@ int vmx_read_guest_msr(u32 msr, u64 *val)
>>>   return -ESRCH;
>>>   }
>>>   
>>> -int vmx_write_guest_msr(u32 msr, u64 val)
>>> +int vmx_write_guest_msr_vcpu(struct vcpu *v, u32 msr, u64 val)
>>>   {
>>> -struct vcpu *curr = current;
>>> -unsigned int i, msr_count = curr->arch.hvm_vmx.msr_count;
>>> -struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
>>> +unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
>>> +struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
>>>   
>>>   for ( i = 0; i < msr_count; i++ )
>>>   {
>> Same here - while the code itself is only accessing memory, it
>> remains unclear whether correct behavior results when the subject
>> vCPU is actually executing.
> 
> Not sure what you mean here. The changes are specifically for updating 
> MSR values (cached in VMCS) of possibly non-current VCPU.

But non-current doesn't mean not-running. I.e. what if the vCPU
you deal with here is active on another pCPU?

>>> +long do_xenpmu_op(unsigned int op, 
>>> XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
>>> +{
>>> +int ret;
>>> +struct xen_pmu_params pmu_params;
>>> +
>>> +if ( vpmu_disabled )
>>> +return -EINVAL;
>> EOPNOTSUPP perhaps?
>>
>> And I agree with Dietmar that keeping opt_vpmu_enabled instead
>> of introducing vpmu_disabled would seem more natural, the more
>> that the default is disabled.
> 
> I can switch polarity back to enabled since both of you think it makes 
> more sense but I don't think I should keep "opt_" prefix since this flag 
> can be modified independently of what boot option says (e.g. when 
> watchdog is on).

Grep through the sources, and if you don't find other examples
doing so (I think you will), I'm fine with you dropping the opt_prefix
(in a separate patch).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v18 05/16] x86/VPMU: Interface for setting PMU mode and flags

2015-02-20 Thread Boris Ostrovsky


On 02/20/2015 08:59 AM, Jan Beulich wrote:

On 16.02.15 at 23:26,  wrote:

--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -253,6 +253,26 @@ static int amd_vpmu_save(struct vpmu_struct *vpmu)
  return 1;
  }
  
+static void amd_vpmu_unload(struct vpmu_struct *vpmu)

+{
+struct vcpu *v;
+
+if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED | VPMU_FROZEN) )
+{
+unsigned int i;
+
+for ( i = 0; i < num_counters; i++ )
+wrmsrl(ctrls[i], 0);
+context_save(vpmu);

This assumes vpmu_vcpu(vpmu) == current, yet it looks like you're
also calling it under different circumstances now.


This doesn't make this assumption. PMU MSRs may be loaded with values 
that belong to a VCPU that is not currently running if, for example, 
current VCPU is not using PMU. And flags test guarantees that we won't 
stomp on someone else's registers.


I could make a test here and write zeroes to the MSRs only if vpmu is 
current but I don't like leaving these MSRs with what is essentially 
garbage values. This routine is executed very rarely so overhead is 
negligible.







--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1183,11 +1183,10 @@ int vmx_read_guest_msr(u32 msr, u64 *val)
  return -ESRCH;
  }
  
-int vmx_write_guest_msr(u32 msr, u64 val)

+int vmx_write_guest_msr_vcpu(struct vcpu *v, u32 msr, u64 val)
  {
-struct vcpu *curr = current;
-unsigned int i, msr_count = curr->arch.hvm_vmx.msr_count;
-struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
+unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
  
  for ( i = 0; i < msr_count; i++ )

  {

Same here - while the code itself is only accessing memory, it
remains unclear whether correct behavior results when the subject
vCPU is actually executing.


Not sure what you mean here. The changes are specifically for updating 
MSR values (cached in VMCS) of possibly non-current VCPU.




In both cases, if there are restrictions on the conditions under
which unload can validly be done, I think you should ASSERT()
those conditions (at once making them explicit).


+long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) 
arg)
+{
+int ret;
+struct xen_pmu_params pmu_params;
+
+if ( vpmu_disabled )
+return -EINVAL;

EOPNOTSUPP perhaps?

And I agree with Dietmar that keeping opt_vpmu_enabled instead
of introducing vpmu_disabled would seem more natural, the more
that the default is disabled.


I can switch polarity back to enabled since both of you think it makes 
more sense but I don't think I should keep "opt_" prefix since this flag 
can be modified independently of what boot option says (e.g. when 
watchdog is on).





+/* Context switch */
+static inline void vpmu_switch_from(struct vpmu_struct *prev,
+struct vpmu_struct *next)
+{
+if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
+vpmu_save(prev);
+}
+
+static inline void vpmu_switch_to(struct vpmu_struct *prev,
+  struct vpmu_struct *next)
+{
+if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
+vpmu_load(next);
+else if ( vpmu_is_set(next, VPMU_CONTEXT_LOADED | VPMU_RUNNING) )
+vpmu_unload(next);

I don't recall there having been an unload path here, and I don't
see this being explained anywhere either. Wouldn't that more
naturally be done in the switch-from function?


I had this for a couple of revisions now. And I had a very specific 
reason for (1) having it and (2) having it here (it had something to do 
with the fact that 'next' may still be LOADED after we've changed 
vpmu_mode to, say, OFF) . But I can't remember what it was now. If I 
remember why I'll add a comment, otherwise I'll drop this.




Apart from that it's also not clear why both prev and next get
passed to both of these functions. Iirc a later patch may make
use of that, but then that later patch should add the second
function parameter.


Right, I was thinking of using both parameters at some point and left 
them here. Only one is needed.


-boris

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v18 05/16] x86/VPMU: Interface for setting PMU mode and flags

2015-02-20 Thread Boris Ostrovsky


On 02/20/2015 09:31 AM, Jan Beulich wrote:

On 16.02.15 at 23:26,  wrote:

+long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) 
arg)
+{
+int ret;
+struct xen_pmu_params pmu_params;
+
+if ( vpmu_disabled )
+return -EINVAL;
+
+ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
+if ( ret )
+return ret;
+
+/* Check major version when parameters are specified */
+switch ( op )
+{
+case XENPMU_mode_set:
+case XENPMU_feature_set:
+if ( copy_from_guest(&pmu_params, arg, 1) )
+return -EFAULT;
+
+if ( pmu_params.version.maj != XENPMU_VER_MAJ )
+return -EINVAL;
+}
+
+switch ( op )
+{
+case XENPMU_mode_set:
+{
+unsigned int old_mode;
+static DEFINE_SPINLOCK(xenpmu_mode_lock);
+
+if ( pmu_params.val & ~(XENPMU_MODE_SELF | XENPMU_MODE_HV) )

I btw also highly doubt that all compiler versions can properly track
the pmu_params is not used uninitialized here.


I'll add a zero initializer.

-boris

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v18 05/16] x86/VPMU: Interface for setting PMU mode and flags

2015-02-20 Thread Jan Beulich
>>> On 16.02.15 at 23:26,  wrote:
> +long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) 
> arg)
> +{
> +int ret;
> +struct xen_pmu_params pmu_params;
> +
> +if ( vpmu_disabled )
> +return -EINVAL;
> +
> +ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
> +if ( ret )
> +return ret;
> +
> +/* Check major version when parameters are specified */
> +switch ( op )
> +{
> +case XENPMU_mode_set:
> +case XENPMU_feature_set:
> +if ( copy_from_guest(&pmu_params, arg, 1) )
> +return -EFAULT;
> +
> +if ( pmu_params.version.maj != XENPMU_VER_MAJ )
> +return -EINVAL;
> +}
> +
> +switch ( op )
> +{
> +case XENPMU_mode_set:
> +{
> +unsigned int old_mode;
> +static DEFINE_SPINLOCK(xenpmu_mode_lock);
> +
> +if ( pmu_params.val & ~(XENPMU_MODE_SELF | XENPMU_MODE_HV) )

I btw also highly doubt that all compiler versions can properly track
the pmu_params is not used uninitialized here.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v18 05/16] x86/VPMU: Interface for setting PMU mode and flags

2015-02-20 Thread Jan Beulich
>>> On 16.02.15 at 23:26,  wrote:
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -253,6 +253,26 @@ static int amd_vpmu_save(struct vpmu_struct *vpmu)
>  return 1;
>  }
>  
> +static void amd_vpmu_unload(struct vpmu_struct *vpmu)
> +{
> +struct vcpu *v;
> +
> +if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED | VPMU_FROZEN) )
> +{
> +unsigned int i;
> +
> +for ( i = 0; i < num_counters; i++ )
> +wrmsrl(ctrls[i], 0);
> +context_save(vpmu);

This assumes vpmu_vcpu(vpmu) == current, yet it looks like you're
also calling it under different circumstances now.

> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -1183,11 +1183,10 @@ int vmx_read_guest_msr(u32 msr, u64 *val)
>  return -ESRCH;
>  }
>  
> -int vmx_write_guest_msr(u32 msr, u64 val)
> +int vmx_write_guest_msr_vcpu(struct vcpu *v, u32 msr, u64 val)
>  {
> -struct vcpu *curr = current;
> -unsigned int i, msr_count = curr->arch.hvm_vmx.msr_count;
> -struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
> +unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
> +struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
>  
>  for ( i = 0; i < msr_count; i++ )
>  {

Same here - while the code itself is only accessing memory, it
remains unclear whether correct behavior results when the subject
vCPU is actually executing.

In both cases, if there are restrictions on the conditions under
which unload can validly be done, I think you should ASSERT()
those conditions (at once making them explicit).

> +long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) 
> arg)
> +{
> +int ret;
> +struct xen_pmu_params pmu_params;
> +
> +if ( vpmu_disabled )
> +return -EINVAL;

EOPNOTSUPP perhaps?

And I agree with Dietmar that keeping opt_vpmu_enabled instead
of introducing vpmu_disabled would seem more natural, the more
that the default is disabled.

> +/* Context switch */
> +static inline void vpmu_switch_from(struct vpmu_struct *prev,
> +struct vpmu_struct *next)
> +{
> +if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
> +vpmu_save(prev);
> +}
> +
> +static inline void vpmu_switch_to(struct vpmu_struct *prev,
> +  struct vpmu_struct *next)
> +{
> +if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
> +vpmu_load(next);
> +else if ( vpmu_is_set(next, VPMU_CONTEXT_LOADED | VPMU_RUNNING) )
> +vpmu_unload(next);

I don't recall there having been an unload path here, and I don't
see this being explained anywhere either. Wouldn't that more
naturally be done in the switch-from function?

Apart from that it's also not clear why both prev and next get
passed to both of these functions. Iirc a later patch may make
use of that, but then that later patch should add the second
function parameter.

> --- a/xen/include/xlat.lst
> +++ b/xen/include/xlat.lst
> @@ -109,6 +109,7 @@
>  ?pmu_cntr_pair   arch-x86/pmu.h
>  ?pmu_intel_ctxt  arch-x86/pmu.h
>  ?pmu_regsarch-x86/pmu.h
> +?pmu_params  pmu.h

Apparently I overlooked this in earlier patches and revision: The
entries in this file are (mostly) alphabetically sorted (by filename,
then by structure name). Please don't make the situation worse
than it already is.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v18 05/16] x86/VPMU: Interface for setting PMU mode and flags

2015-02-18 Thread Dietmar Hahn
Am Montag 16 Februar 2015, 17:26:48 schrieb Boris Ostrovsky:
> Add runtime interface for setting PMU mode and flags. Three main modes are
> provided:
> * XENPMU_MODE_OFF:  PMU is not virtualized
> * XENPMU_MODE_SELF: Guests can access PMU MSRs and receive PMU interrupts.
> * XENPMU_MODE_HV: Same as XENPMU_MODE_SELF for non-proviledged guests, dom0
>   can profile itself and the hypervisor.
> 
> Note that PMU modes are different from what can be provided at Xen's boot line
> with 'vpmu' argument. An 'off' (or '0') value is equivalent to 
> XENPMU_MODE_OFF.
> Any other value, on the other hand, will cause VPMU mode to be set to
> XENPMU_MODE_SELF during boot.
> 
> For feature flags only Intel's BTS is currently supported.
> 
> Mode and flags are set via HYPERVISOR_xenpmu_op hypercall.
> 
> Signed-off-by: Boris Ostrovsky 
> Acked-by: Daniel De Graaf 
> ---
>  tools/flask/policy/policy/modules/xen/xen.te |   3 +
>  xen/arch/x86/domain.c|   6 +-
>  xen/arch/x86/hvm/svm/vpmu.c  |  25 ++-
>  xen/arch/x86/hvm/vmx/vmcs.c  |   7 +-
>  xen/arch/x86/hvm/vmx/vpmu_core2.c|  27 ++-
>  xen/arch/x86/hvm/vpmu.c  | 240 
> +--
>  xen/arch/x86/oprofile/nmi_int.c  |   3 +-
>  xen/arch/x86/x86_64/compat/entry.S   |   4 +
>  xen/arch/x86/x86_64/entry.S  |   4 +
>  xen/include/asm-x86/hvm/vmx/vmcs.h   |   7 +-
>  xen/include/asm-x86/hvm/vpmu.h   |  33 +++-
>  xen/include/public/pmu.h |  45 +
>  xen/include/public/xen.h |   1 +
>  xen/include/xen/hypercall.h  |   4 +
>  xen/include/xlat.lst |   1 +
>  xen/include/xsm/dummy.h  |  15 ++
>  xen/include/xsm/xsm.h|   6 +
>  xen/xsm/dummy.c  |   1 +
>  xen/xsm/flask/hooks.c|  18 ++
>  xen/xsm/flask/policy/access_vectors  |   2 +
>  20 files changed, 417 insertions(+), 35 deletions(-)
> 
> diff --git a/tools/flask/policy/policy/modules/xen/xen.te 
> b/tools/flask/policy/policy/modules/xen/xen.te
> index c0128aa..870ff81 100644
> --- a/tools/flask/policy/policy/modules/xen/xen.te
> +++ b/tools/flask/policy/policy/modules/xen/xen.te
> @@ -68,6 +68,9 @@ allow dom0_t xen_t:xen2 {
>  resource_op
>  psr_cmt_op
>  };
> +allow dom0_t xen_t:xen2 {
> +pmu_ctrl
> +};
>  allow dom0_t xen_t:mmu memorymap;
>  
>  # Allow dom0 to use these domctls on itself. For domctls acting on other
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index eb8ac3a..b0e3c3d 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1536,7 +1536,7 @@ void context_switch(struct vcpu *prev, struct vcpu 
> *next)
>  if ( is_hvm_vcpu(prev) )
>  {
>  if (prev != next)
> -vpmu_save(vcpu_vpmu(prev));
> +vpmu_switch_from(vcpu_vpmu(prev), vcpu_vpmu(next));
>  
>  if ( !list_empty(&prev->arch.hvm_vcpu.tm_list) )
>  pt_save_timer(prev);
> @@ -1579,9 +1579,9 @@ void context_switch(struct vcpu *prev, struct vcpu 
> *next)
> !is_hardware_domain(next->domain));
>  }
>  
> -if (is_hvm_vcpu(next) && (prev != next) )
> +if ( is_hvm_vcpu(next) && (prev != next) )
>  /* Must be done with interrupts enabled */
> -vpmu_load(vcpu_vpmu(next));
> +vpmu_switch_to(vcpu_vpmu(prev), vcpu_vpmu(next));
>  
>  context_saved(prev);
>  
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 72e2561..2cfdf08 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -253,6 +253,26 @@ static int amd_vpmu_save(struct vpmu_struct *vpmu)
>  return 1;
>  }
>  
> +static void amd_vpmu_unload(struct vpmu_struct *vpmu)
> +{
> +struct vcpu *v;
> +
> +if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED | VPMU_FROZEN) )
> +{
> +unsigned int i;
> +
> +for ( i = 0; i < num_counters; i++ )
> +wrmsrl(ctrls[i], 0);
> +context_save(vpmu);
> +}
> +
> +v = vpmu_vcpu(vpmu);
> +if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
> +amd_vpmu_unset_msr_bitmap(v);
> +
> +vpmu_reset(vpmu, VPMU_FROZEN);
> +}
> +
>  static void context_update(unsigned int msr, u64 msr_content)
>  {
>  unsigned int i;
> @@ -471,17 +491,18 @@ struct arch_vpmu_ops amd_vpmu_ops = {
>  .arch_vpmu_destroy = amd_vpmu_destroy,
>  .arch_vpmu_save = amd_vpmu_save,
>  .arch_vpmu_load = amd_vpmu_load,
> +.arch_vpmu_unload = amd_vpmu_unload,
>  .arch_vpmu_dump = amd_vpmu_dump
>  };
>  
> -int svm_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
> +int svm_vpmu_initialise(struct vcpu *v)
>  {
>  struct vpmu_struct *vpmu = vcpu_vpmu(v);
>  uint8_t family = current_cpu_data.x86;
>  int ret = 0;
>  
>  /* vpmu enabled? */
> -if 

[Xen-devel] [PATCH v18 05/16] x86/VPMU: Interface for setting PMU mode and flags

2015-02-16 Thread Boris Ostrovsky
Add runtime interface for setting PMU mode and flags. Three main modes are
provided:
* XENPMU_MODE_OFF:  PMU is not virtualized
* XENPMU_MODE_SELF: Guests can access PMU MSRs and receive PMU interrupts.
* XENPMU_MODE_HV: Same as XENPMU_MODE_SELF for non-proviledged guests, dom0
  can profile itself and the hypervisor.

Note that PMU modes are different from what can be provided at Xen's boot line
with 'vpmu' argument. An 'off' (or '0') value is equivalent to XENPMU_MODE_OFF.
Any other value, on the other hand, will cause VPMU mode to be set to
XENPMU_MODE_SELF during boot.

For feature flags only Intel's BTS is currently supported.

Mode and flags are set via HYPERVISOR_xenpmu_op hypercall.

Signed-off-by: Boris Ostrovsky 
Acked-by: Daniel De Graaf 
---
 tools/flask/policy/policy/modules/xen/xen.te |   3 +
 xen/arch/x86/domain.c|   6 +-
 xen/arch/x86/hvm/svm/vpmu.c  |  25 ++-
 xen/arch/x86/hvm/vmx/vmcs.c  |   7 +-
 xen/arch/x86/hvm/vmx/vpmu_core2.c|  27 ++-
 xen/arch/x86/hvm/vpmu.c  | 240 +--
 xen/arch/x86/oprofile/nmi_int.c  |   3 +-
 xen/arch/x86/x86_64/compat/entry.S   |   4 +
 xen/arch/x86/x86_64/entry.S  |   4 +
 xen/include/asm-x86/hvm/vmx/vmcs.h   |   7 +-
 xen/include/asm-x86/hvm/vpmu.h   |  33 +++-
 xen/include/public/pmu.h |  45 +
 xen/include/public/xen.h |   1 +
 xen/include/xen/hypercall.h  |   4 +
 xen/include/xlat.lst |   1 +
 xen/include/xsm/dummy.h  |  15 ++
 xen/include/xsm/xsm.h|   6 +
 xen/xsm/dummy.c  |   1 +
 xen/xsm/flask/hooks.c|  18 ++
 xen/xsm/flask/policy/access_vectors  |   2 +
 20 files changed, 417 insertions(+), 35 deletions(-)

diff --git a/tools/flask/policy/policy/modules/xen/xen.te 
b/tools/flask/policy/policy/modules/xen/xen.te
index c0128aa..870ff81 100644
--- a/tools/flask/policy/policy/modules/xen/xen.te
+++ b/tools/flask/policy/policy/modules/xen/xen.te
@@ -68,6 +68,9 @@ allow dom0_t xen_t:xen2 {
 resource_op
 psr_cmt_op
 };
+allow dom0_t xen_t:xen2 {
+pmu_ctrl
+};
 allow dom0_t xen_t:mmu memorymap;
 
 # Allow dom0 to use these domctls on itself. For domctls acting on other
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index eb8ac3a..b0e3c3d 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1536,7 +1536,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
 if ( is_hvm_vcpu(prev) )
 {
 if (prev != next)
-vpmu_save(vcpu_vpmu(prev));
+vpmu_switch_from(vcpu_vpmu(prev), vcpu_vpmu(next));
 
 if ( !list_empty(&prev->arch.hvm_vcpu.tm_list) )
 pt_save_timer(prev);
@@ -1579,9 +1579,9 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
!is_hardware_domain(next->domain));
 }
 
-if (is_hvm_vcpu(next) && (prev != next) )
+if ( is_hvm_vcpu(next) && (prev != next) )
 /* Must be done with interrupts enabled */
-vpmu_load(vcpu_vpmu(next));
+vpmu_switch_to(vcpu_vpmu(prev), vcpu_vpmu(next));
 
 context_saved(prev);
 
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 72e2561..2cfdf08 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -253,6 +253,26 @@ static int amd_vpmu_save(struct vpmu_struct *vpmu)
 return 1;
 }
 
+static void amd_vpmu_unload(struct vpmu_struct *vpmu)
+{
+struct vcpu *v;
+
+if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED | VPMU_FROZEN) )
+{
+unsigned int i;
+
+for ( i = 0; i < num_counters; i++ )
+wrmsrl(ctrls[i], 0);
+context_save(vpmu);
+}
+
+v = vpmu_vcpu(vpmu);
+if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
+amd_vpmu_unset_msr_bitmap(v);
+
+vpmu_reset(vpmu, VPMU_FROZEN);
+}
+
 static void context_update(unsigned int msr, u64 msr_content)
 {
 unsigned int i;
@@ -471,17 +491,18 @@ struct arch_vpmu_ops amd_vpmu_ops = {
 .arch_vpmu_destroy = amd_vpmu_destroy,
 .arch_vpmu_save = amd_vpmu_save,
 .arch_vpmu_load = amd_vpmu_load,
+.arch_vpmu_unload = amd_vpmu_unload,
 .arch_vpmu_dump = amd_vpmu_dump
 };
 
-int svm_vpmu_initialise(struct vcpu *v, unsigned int vpmu_flags)
+int svm_vpmu_initialise(struct vcpu *v)
 {
 struct vpmu_struct *vpmu = vcpu_vpmu(v);
 uint8_t family = current_cpu_data.x86;
 int ret = 0;
 
 /* vpmu enabled? */
-if ( !vpmu_flags )
+if ( vpmu_mode == XENPMU_MODE_OFF )
 return 0;
 
 switch ( family )
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index d614638..0183222 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1183,11 +1183,10 @@ int vmx_read_guest_msr(u32 msr, u64 *