Re: [PATCH v1] kvm: Make vcpu->requests as 64 bit bitmap

2015-12-24 Thread Roman Kagan
On Thu, Dec 24, 2015 at 12:30:26PM +0300, Andrey Smetanin wrote:
> Currently on x86 arch we has already 32 requests defined
> so the newer request bits can't be placed inside
> vcpu->requests(unsigned long) inside x86 32 bit system.
> But we are going to add a new request in x86 arch
> for Hyper-V tsc page support.
> 
> To solve the problem the patch replaces vcpu->requests by
> bitmap with 64 bit length and uses bitmap API.
> 
> The patch consists of:
> * announce kvm_vcpu_has_requests() to check whether vcpu has
> requests
> * announce kvm_vcpu_requests() to get vcpu requests pointer

I think that if abstracting out the implementation of the request
container is what you're after, you'd better not define this function;
accesses to the request map should be through your accessor functions.

> * announce kvm_clear_request() to clear particular vcpu request
> * replace if (vcpu->requests) by if (kvm_vcpu_has_requests(vcpu))
> * replace clear_bit(req, vcpu->requests) by
>  kvm_clear_request(req, vcpu)

Apparently one accessor is missing: test the presence of a request
without clearing it from the bitmap (because kvm_check_request() clears
it).

> diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
> index 2e0e67e..4015b88 100644
> --- a/arch/powerpc/kvm/trace.h
> +++ b/arch/powerpc/kvm/trace.h
> @@ -109,7 +109,7 @@ TRACE_EVENT(kvm_check_requests,
>  
>   TP_fast_assign(
>   __entry->cpu_nr = vcpu->vcpu_id;
> - __entry->requests   = vcpu->requests;
> + __entry->requests   = (__u32)kvm_vcpu_requests(vcpu)[0];
>   ),

This doesn't make sense, to expose only subset of the requests.  BTW I
don't see this event in Linus tree, nor in linux-next, so I'm not quite
sure why it's formed this way; I guess the interesting part is the
request number and the return value (i.e. whether it's set), not the
whole bitmap.

> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5957,7 +5957,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu 
> *vcpu)
>   if (intr_window_requested && vmx_interrupt_allowed(vcpu))
>   return handle_interrupt_window(>vcpu);
>  
> - if (test_bit(KVM_REQ_EVENT, >requests))
> + if (test_bit(KVM_REQ_EVENT, kvm_vcpu_requests(vcpu)))

Here you'd rather want that function to test for the request without
clearing it.

> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2117,7 +2117,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
> msr_data *msr_info)
>  
>   if (ka->boot_vcpu_runs_old_kvmclock != tmp)
>   set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
> - >requests);
> + kvm_vcpu_requests(vcpu));

This should have been kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);

> @@ -8048,7 +8048,7 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu 
> *vcpu)
>   if (atomic_read(>arch.nmi_queued))
>   return true;
>  
> - if (test_bit(KVM_REQ_SMI, >requests))
> + if (test_bit(KVM_REQ_SMI, kvm_vcpu_requests(vcpu)))

Again the test-only accessor is due here.

> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -146,6 +146,8 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_HV_EXIT   30
>  #define KVM_REQ_HV_STIMER 31
>  
> +#define KVM_REQ_MAX   64
> +
>  #define KVM_USERSPACE_IRQ_SOURCE_ID  0
>  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
>  
> @@ -233,7 +235,7 @@ struct kvm_vcpu {
>   int vcpu_id;
>   int srcu_idx;
>   int mode;
> - unsigned long requests;
> + DECLARE_BITMAP(requests, KVM_REQ_MAX);
>   unsigned long guest_debug;
>  
>   int pre_pcpu;
> @@ -286,6 +288,16 @@ struct kvm_vcpu {
>   struct kvm_vcpu_arch arch;
>  };
>  
> +static inline bool kvm_vcpu_has_requests(struct kvm_vcpu *vcpu)
> +{
> + return !bitmap_empty(vcpu->requests, KVM_REQ_MAX);
> +}
> +
> +static inline ulong *kvm_vcpu_requests(struct kvm_vcpu *vcpu)
> +{
> + return (ulong *)vcpu->requests;
> +}

As I wrote above, I believe this function doesn't belong in the API.

> +
>  static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
>  {
>   return cmpxchg(>mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
> @@ -1002,7 +1014,7 @@ static inline bool kvm_is_error_gpa(struct kvm *kvm, 
> gpa_t gpa)
>  
>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
>  {
> - set_bit(KVM_REQ_MIGRATE_TIMER, >requests);
> + set_bit(KVM_REQ_MIGRATE_TIMER, kvm_vcpu_requests(vcpu));

kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);

> @@ -1118,19 +1130,24 @@ static inline bool kvm_vcpu_compatible(struct 
> kvm_vcpu *vcpu) { return true; }
>  
>  static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
>  {
> - set_bit(req, >requests);
> + set_bit(req, kvm_vcpu_requests(vcpu));
>  }
>  
>  static inline 

Re: [PATCH v1] kvm: Make vcpu->requests as 64 bit bitmap

2015-12-24 Thread Roman Kagan
On Thu, Dec 24, 2015 at 12:30:26PM +0300, Andrey Smetanin wrote:
> Currently on x86 arch we has already 32 requests defined
> so the newer request bits can't be placed inside
> vcpu->requests(unsigned long) inside x86 32 bit system.
> But we are going to add a new request in x86 arch
> for Hyper-V tsc page support.
> 
> To solve the problem the patch replaces vcpu->requests by
> bitmap with 64 bit length and uses bitmap API.
> 
> The patch consists of:
> * announce kvm_vcpu_has_requests() to check whether vcpu has
> requests
> * announce kvm_vcpu_requests() to get vcpu requests pointer

I think that if abstracting out the implementation of the request
container is what you're after, you'd better not define this function;
accesses to the request map should be through your accessor functions.

> * announce kvm_clear_request() to clear particular vcpu request
> * replace if (vcpu->requests) by if (kvm_vcpu_has_requests(vcpu))
> * replace clear_bit(req, vcpu->requests) by
>  kvm_clear_request(req, vcpu)

Apparently one accessor is missing: test the presence of a request
without clearing it from the bitmap (because kvm_check_request() clears
it).

> diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
> index 2e0e67e..4015b88 100644
> --- a/arch/powerpc/kvm/trace.h
> +++ b/arch/powerpc/kvm/trace.h
> @@ -109,7 +109,7 @@ TRACE_EVENT(kvm_check_requests,
>  
>   TP_fast_assign(
>   __entry->cpu_nr = vcpu->vcpu_id;
> - __entry->requests   = vcpu->requests;
> + __entry->requests   = (__u32)kvm_vcpu_requests(vcpu)[0];
>   ),

This doesn't make sense, to expose only subset of the requests.  BTW I
don't see this event in Linus tree, nor in linux-next, so I'm not quite
sure why it's formed this way; I guess the interesting part is the
request number and the return value (i.e. whether it's set), not the
whole bitmap.

> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -5957,7 +5957,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu 
> *vcpu)
>   if (intr_window_requested && vmx_interrupt_allowed(vcpu))
>   return handle_interrupt_window(>vcpu);
>  
> - if (test_bit(KVM_REQ_EVENT, >requests))
> + if (test_bit(KVM_REQ_EVENT, kvm_vcpu_requests(vcpu)))

Here you'd rather want that function to test for the request without
clearing it.

> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2117,7 +2117,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
> msr_data *msr_info)
>  
>   if (ka->boot_vcpu_runs_old_kvmclock != tmp)
>   set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
> - >requests);
> + kvm_vcpu_requests(vcpu));

This should have been kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);

> @@ -8048,7 +8048,7 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu 
> *vcpu)
>   if (atomic_read(>arch.nmi_queued))
>   return true;
>  
> - if (test_bit(KVM_REQ_SMI, >requests))
> + if (test_bit(KVM_REQ_SMI, kvm_vcpu_requests(vcpu)))

Again the test-only accessor is due here.

> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -146,6 +146,8 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_HV_EXIT   30
>  #define KVM_REQ_HV_STIMER 31
>  
> +#define KVM_REQ_MAX   64
> +
>  #define KVM_USERSPACE_IRQ_SOURCE_ID  0
>  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
>  
> @@ -233,7 +235,7 @@ struct kvm_vcpu {
>   int vcpu_id;
>   int srcu_idx;
>   int mode;
> - unsigned long requests;
> + DECLARE_BITMAP(requests, KVM_REQ_MAX);
>   unsigned long guest_debug;
>  
>   int pre_pcpu;
> @@ -286,6 +288,16 @@ struct kvm_vcpu {
>   struct kvm_vcpu_arch arch;
>  };
>  
> +static inline bool kvm_vcpu_has_requests(struct kvm_vcpu *vcpu)
> +{
> + return !bitmap_empty(vcpu->requests, KVM_REQ_MAX);
> +}
> +
> +static inline ulong *kvm_vcpu_requests(struct kvm_vcpu *vcpu)
> +{
> + return (ulong *)vcpu->requests;
> +}

As I wrote above, I believe this function doesn't belong in the API.

> +
>  static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
>  {
>   return cmpxchg(>mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
> @@ -1002,7 +1014,7 @@ static inline bool kvm_is_error_gpa(struct kvm *kvm, 
> gpa_t gpa)
>  
>  static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
>  {
> - set_bit(KVM_REQ_MIGRATE_TIMER, >requests);
> + set_bit(KVM_REQ_MIGRATE_TIMER, kvm_vcpu_requests(vcpu));

kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);

> @@ -1118,19 +1130,24 @@ static inline bool kvm_vcpu_compatible(struct 
> kvm_vcpu *vcpu) { return true; }
>  
>  static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
>  {
> - set_bit(req, >requests);
> + set_bit(req, kvm_vcpu_requests(vcpu));
>  }
>  
>  static inline 

Re: [PATCH v1 1/2] kvm/x86: Hyper-V SynIC tracepoints

2015-12-24 Thread Roman Kagan
On Wed, Dec 23, 2015 at 04:53:59PM +0300, Andrey Smetanin wrote:
> Trace the following Hyper SynIC events:
> * set msr
> * set sint irq
> * ack sint
> * sint irq eoi
> 
> Signed-off-by: Andrey Smetanin <asmeta...@virtuozzo.com>
> CC: Gleb Natapov <g...@kernel.org>
> CC: Paolo Bonzini <pbonz...@redhat.com>
> CC: Roman Kagan <rka...@virtuozzo.com>
> CC: Denis V. Lunev <d...@openvz.org>
> CC: qemu-de...@nongnu.org
> ---
>  arch/x86/kvm/hyperv.c | 10 +++---
>  arch/x86/kvm/trace.h  | 93 
> +++
>  2 files changed, 98 insertions(+), 5 deletions(-)

Reviewed-by: Roman Kagan <rka...@virtuozzo.com>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] kvm: Make vcpu->requests as 64 bit bitmap

2015-12-24 Thread Roman Kagan
On Thu, Dec 24, 2015 at 04:29:21PM +0300, Andrey Smetanin wrote:
> Currently on x86 arch we has already 32 requests defined
> so the newer request bits can't be placed inside
> vcpu->requests(unsigned long) inside x86 32 bit system.
> But we are going to add a new request in x86 arch
> for Hyper-V tsc page support.
> 
> To solve the problem the patch replaces vcpu->requests by
> bitmap with 64 bit length and uses bitmap API.
> 
> The patch consists of:
> * announce kvm_has_requests() to check whether vcpu has
> requests
> * announce kvm_clear_request() to clear particular vcpu request
> * announce kvm_test_request() to test particular vcpu request
> * replace if (vcpu->requests) by if (kvm_has_requests(vcpu))
> * replace clear_bit(req, vcpu->requests) by
>  kvm_clear_request(req, vcpu)
> 
> Changes v2:
> * hide internals of vcpu requests bitmap
> by interface usage in all places
> * replace test_bit(req, vcpu->requests) by
>  kvm_test_request()
> * POWERPC: trace vcpu requests bitmap by
> __bitmask, __assign_bitmap, __get_bitmask
> 
> Signed-off-by: Andrey Smetanin <asmeta...@virtuozzo.com>
> Acked-by: James Hogan <james.ho...@imgtec.com>
> CC: Paolo Bonzini <pbonz...@redhat.com>
> CC: Gleb Natapov <g...@kernel.org>
> CC: James Hogan <james.ho...@imgtec.com>
> CC: Paul Burton <paul.bur...@imgtec.com>
> CC: Ralf Baechle <r...@linux-mips.org>
> CC: Alexander Graf <ag...@suse.com>
> CC: Christian Borntraeger <borntrae...@de.ibm.com>
> CC: Cornelia Huck <cornelia.h...@de.ibm.com>
> CC: linux-m...@linux-mips.org
> CC: kvm-ppc@vger.kernel.org
> CC: linux-s...@vger.kernel.org
> CC: Roman Kagan <rka...@virtuozzo.com>
> CC: Denis V. Lunev <d...@openvz.org>
> CC: qemu-de...@nongnu.org
> ---
>  arch/mips/kvm/emulate.c   |  4 +---
>  arch/powerpc/kvm/book3s_pr.c  |  2 +-
>  arch/powerpc/kvm/book3s_pr_papr.c |  2 +-
>  arch/powerpc/kvm/booke.c  |  6 +++---
>  arch/powerpc/kvm/powerpc.c|  6 +++---
>  arch/powerpc/kvm/trace.h  |  9 +
>  arch/s390/kvm/kvm-s390.c  |  4 ++--
>  arch/x86/kvm/vmx.c|  2 +-
>  arch/x86/kvm/x86.c| 16 
>  include/linux/kvm_host.h  | 38 +-
>  10 files changed, 50 insertions(+), 39 deletions(-)

Reviewed-by: Roman Kagan <rka...@virtuozzo.com>
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] kvm: Make vcpu->requests as 64 bit bitmap

2015-12-24 Thread Roman Kagan
On Thu, Dec 24, 2015 at 04:29:21PM +0300, Andrey Smetanin wrote:
> Currently on x86 arch we has already 32 requests defined
> so the newer request bits can't be placed inside
> vcpu->requests(unsigned long) inside x86 32 bit system.
> But we are going to add a new request in x86 arch
> for Hyper-V tsc page support.
> 
> To solve the problem the patch replaces vcpu->requests by
> bitmap with 64 bit length and uses bitmap API.
> 
> The patch consists of:
> * announce kvm_has_requests() to check whether vcpu has
> requests
> * announce kvm_clear_request() to clear particular vcpu request
> * announce kvm_test_request() to test particular vcpu request
> * replace if (vcpu->requests) by if (kvm_has_requests(vcpu))
> * replace clear_bit(req, vcpu->requests) by
>  kvm_clear_request(req, vcpu)
> 
> Changes v2:
> * hide internals of vcpu requests bitmap
> by interface usage in all places
> * replace test_bit(req, vcpu->requests) by
>  kvm_test_request()
> * POWERPC: trace vcpu requests bitmap by
> __bitmask, __assign_bitmap, __get_bitmask
> 
> Signed-off-by: Andrey Smetanin <asmeta...@virtuozzo.com>
> Acked-by: James Hogan <james.ho...@imgtec.com>
> CC: Paolo Bonzini <pbonz...@redhat.com>
> CC: Gleb Natapov <g...@kernel.org>
> CC: James Hogan <james.ho...@imgtec.com>
> CC: Paul Burton <paul.bur...@imgtec.com>
> CC: Ralf Baechle <r...@linux-mips.org>
> CC: Alexander Graf <ag...@suse.com>
> CC: Christian Borntraeger <borntrae...@de.ibm.com>
> CC: Cornelia Huck <cornelia.h...@de.ibm.com>
> CC: linux-m...@linux-mips.org
> CC: kvm-...@vger.kernel.org
> CC: linux-s...@vger.kernel.org
> CC: Roman Kagan <rka...@virtuozzo.com>
> CC: Denis V. Lunev <d...@openvz.org>
> CC: qemu-de...@nongnu.org
> ---
>  arch/mips/kvm/emulate.c   |  4 +---
>  arch/powerpc/kvm/book3s_pr.c  |  2 +-
>  arch/powerpc/kvm/book3s_pr_papr.c |  2 +-
>  arch/powerpc/kvm/booke.c  |  6 +++---
>  arch/powerpc/kvm/powerpc.c|  6 +++---
>  arch/powerpc/kvm/trace.h  |  9 +
>  arch/s390/kvm/kvm-s390.c  |  4 ++--
>  arch/x86/kvm/vmx.c|  2 +-
>  arch/x86/kvm/x86.c| 16 
>  include/linux/kvm_host.h  | 38 +-
>  10 files changed, 50 insertions(+), 39 deletions(-)

Reviewed-by: Roman Kagan <rka...@virtuozzo.com>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v1 2/2] kvm/x86: Hyper-V SynIC timers tracepoints

2015-12-24 Thread Roman Kagan
On Wed, Dec 23, 2015 at 04:54:00PM +0300, Andrey Smetanin wrote:
> Trace the following Hyper SynIC timers events:
> * periodic timer start
> * one-shot timer start
> * timer callback
> * timer expiration and message delivery result
> * timer config setup
> * timer count setup
> * timer cleanup
> 
> Signed-off-by: Andrey Smetanin <asmeta...@virtuozzo.com>
> CC: Gleb Natapov <g...@kernel.org>
> CC: Paolo Bonzini <pbonz...@redhat.com>
> CC: Roman Kagan <rka...@virtuozzo.com>
> CC: Denis V. Lunev <d...@openvz.org>
> CC: qemu-de...@nongnu.org
> ---
>  arch/x86/kvm/hyperv.c |  27 +++-
>  arch/x86/kvm/trace.h  | 170 
> ++++++
>  2 files changed, 196 insertions(+), 1 deletion(-)

Reviewed-by: Roman Kagan <rka...@virtuozzo.com>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 2/5] KVM: add KVM_EXIT_MSR exit reason and capability.

2015-12-22 Thread 'Roman Kagan'
On Tue, Dec 22, 2015 at 10:24:13AM +0300, Pavel Fedin wrote:
> > +On the return path into kvm, user space should set handled to
> > +KVM_EXIT_MSR_HANDLED if it successfully handled the MSR access. Otherwise,
> > +handled should be set to KVM_EXIT_MSR_UNHANDLED, which will cause a general
> > +protection fault to be injected into the vcpu. If an error occurs during 
> > the
> > +return into kvm, the vcpu will not be run and another exit will be 
> > generated
> > +with type set to KVM_EXIT_MSR_COMPLETION_FAILED.
> > +
> > +If exit_reason is KVM_EXIT_MSR_AFTER_WRITE, then the vcpu has executed a 
> > wrmsr
> > +instruction which is handled by kvm but which user space may need to be
> > +notified about. index and data are set as described above; the value of 
> > type
> > +depends on the MSR that was written. handled is ignored on reentry into 
> > kvm.
> 
> 1. Is there any real need to distinguish between KVM_EXIT_MSR_WRITE and 
> KVM_EXIT_MSR_AFTER_WRITE ? IMHO from userland's point of view these are the 
> same.

Indeed.  Perhaps the kernel can set .handled to true to let userspace
know it already took care of it, instead of introducing yet another
exit_reason.  The field would need to be marked in/out, then.

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 2/5] KVM: add KVM_EXIT_MSR exit reason and capability.

2015-12-22 Thread 'Roman Kagan'
On Tue, Dec 22, 2015 at 03:51:52PM +0300, Pavel Fedin wrote:
>  Hello!
> 
> > > 1. Is there any real need to distinguish between KVM_EXIT_MSR_WRITE and
> > KVM_EXIT_MSR_AFTER_WRITE ? IMHO from userland's point of view these are the 
> > same.
> > 
> > Indeed.  Perhaps the kernel can set .handled to true to let userspace
> > know it already took care of it, instead of introducing yet another
> > exit_reason.  The field would need to be marked in/out, then.
> 
>  I'm not sure that you need even this. Anyway, particular MSRs are 
> function-specific, and if you're emulating an MSR in userspace,
> then, i believe, you know the function behind it. And it's IMHO safe to just 
> know that SynIC MSRs have some extra handling in
> kernel. And i believe this has no direct impact on userland's behavior.

It has: unlike the scenario that was the original motivation for Peter's
patches, where the the userspace wanted to handle register accesses
which the kernel *didn't*, in case of SynIC the userspace wants do
something about MSR accesses *only* if the kernel *also* handles them.

I guess that was the reason why Paolo suggested an extra exit_reason,
and I think .handled field can be used to pass that information instead.

You're probably right that, at least in SynIC case, it should be safe to
assume that, if all the SynIC setup succeeded, the corresponding MSR
accesses would only trigger exits when the kernel processed them
appropriately.

But the proposed use of .handled costs basically nothing, and it may
prove useful in general (as a conisistency proof, if anything).

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 5/5] kvm/x86: Hyper-V kvm exit

2015-12-18 Thread 'Roman Kagan'
On Fri, Dec 18, 2015 at 06:19:25PM +0300, Pavel Fedin wrote:
>  I dislike implementing architecture-dependent exit code where we could 
> implement an architecture-independent one.
> 
>  As far as i understand this code, KVM_EXIT_HYPERV is called when one of 
> three MSRs are accessed. But, shouldn't we have implemented
> instead something more generic, like KVM_EXIT_REG_IO, which would work 
> similar to KVM_EXIT_PIO or KVM_EXIT_MMIO, but carry register
> code and value?
> 
>  This would allow us to solve the same task which we have done here, but this 
> solution would be reusable for other devices and other
> archirectures. What if in future we have more system registers to emulate in 
> userspace?
> 
>  I write this because at one point i suggested similar thing for ARM64 (but i 
> never actually wrote it), to emulate physical CP15
> timer. And it would require exactly the same capability - process some 
> trapped system register accesses in userspace.

Note that, as Paolo pointed out, in case of HyperV the value of the
register is of interest not only to the userspace but to the KVM, too.
Otherwise I don't see off hand why a generic infrastructure wouldn't fit
in our usecase.

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 5/5] kvm/x86: Hyper-V kvm exit

2015-12-18 Thread 'Roman Kagan'
On Fri, Dec 18, 2015 at 05:01:59PM +0100, Paolo Bonzini wrote:
> On 18/12/2015 16:19, Pavel Fedin wrote:
> > As far as i understand this code, KVM_EXIT_HYPERV is called when one
> > of three MSRs are accessed. But, shouldn't we have implemented 
> > instead something more generic, like KVM_EXIT_REG_IO, which would
> > work similar to KVM_EXIT_PIO or KVM_EXIT_MMIO, but carry register 
> > code and value?
> 
> Yes, we considered that.  There were actually patches for this as well.
>  However, in this case the register is still emulated in the kernel, and
> userspace just gets informed of the new value.
> 
> > This would allow us to solve the same task which we have done here,
> > but this solution would be reusable for other devices and other 
> > archirectures. What if in future we have more system registers to
> > emulate in userspace?
> 
> If we do get that, we will just rename KVM_EXIT_HYPERV to
> KVM_EXIT_MSR_ACCESS, and KVM_EXIT_HYPERV_SYNIC to
> KVM_EXIT_MSR_HYPERV_SYNIC, and struct kvm_hyperv_exit to kvm_msr_exit.

A generic implemenation will probably just convey (msr#, value) pair,
and KVM_EXIT_MSR_HYPERV_SYNIC wouldn't be needed at all.

I don't immediately see why it wouldn't work for us; we'd have reused
the infrastructure if it existed when we started our work.  I didn't see
Peter's patches yet; maybe we can come up with an interim solution to
fit in the merge window but expose a sufficiently generic API.

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 5/5] kvm/x86: Hyper-V kvm exit

2015-12-18 Thread Roman Kagan
On Fri, Dec 18, 2015 at 10:10:11AM -0800, Peter Hornyack wrote:
> On Fri, Dec 18, 2015 at 8:01 AM, Paolo Bonzini  wrote:
> > On 18/12/2015 16:19, Pavel Fedin wrote:
> >> As far as i understand this code, KVM_EXIT_HYPERV is called when one
> >> of three MSRs are accessed. But, shouldn't we have implemented
> >> instead something more generic, like KVM_EXIT_REG_IO, which would
> >> work similar to KVM_EXIT_PIO or KVM_EXIT_MMIO, but carry register
> >> code and value?
> >
> > Yes, we considered that.  There were actually patches for this as well.
> >  However, in this case the register is still emulated in the kernel, and
> > userspace just gets informed of the new value.
> 
> On brief inspection of Andrey's patch (I have not been following
> closely) it looks like the kvm_hyperv_exit struct that's returned to
> userspace contains more data (control, evt_page, and msg_page fields)
> than simply the value of the MSR, so would the desired SynIC exit fit
> into a general-purpose exit for MSR emulation?

Frankly I'm struggling trying to recall why we implemented it this way.
Actually all three fields are the values of respective MSRs and I don't
see any necessity to pass all three at the same time when any of them
gets updated.  The patch for QEMU adds an exit handler which processes
the fields individually, so I have a strong suspicion that union was
meant here rather than struct.

I hope Andrey will help to shed some light on that when he's back in the
office on Monday; meanwhile I think this peculiarity can be ignored.

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v1 7/7] kvm/x86: Hyper-V SynIC timers

2015-11-30 Thread Roman Kagan
On Fri, Nov 27, 2015 at 11:49:40AM +0100, Paolo Bonzini wrote:

[ sorry missed your message on Friday, replying now ]

> On 27/11/2015 09:12, Roman Kagan wrote:
> >> > +n = div64_u64(time_now - stimer->exp_time, stimer->count) + 1;
> >> > +stimer->exp_time += n * stimer->count;
> > This is actually just a reminder calculation so I'd rather do it
> > directly with div64_u64_rem().
> 
> It took me a while to understand why it was a remained. :)

It gets easier if you think of it this way: we've slipped a few whole
periods and the remainder of the slack into the current period, so
the time left till the next tick is ("count" is the timer period here)

  delta = count - slack % count
where
  slack = time_now - exp_time

This gives you immediately your

>   exp_time = time_now + (count - (time_now - exp_time) % count)

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v1 7/7] kvm/x86: Hyper-V SynIC timers

2015-11-27 Thread Roman Kagan
On Wed, Nov 25, 2015 at 06:20:21PM +0300, Andrey Smetanin wrote:
> Per Hyper-V specification (and as required by Hyper-V-aware guests),
> SynIC provides 4 per-vCPU timers.  Each timer is programmed via a pair
> of MSRs, and signals expiration by delivering a special format message
> to the configured SynIC message slot and triggering the corresponding
> synthetic interrupt.
> 
> Note: as implemented by this patch, all periodic timers are "lazy"
> (i.e. if the vCPU wasn't scheduled for more than the timer period the
> timer events are lost), regardless of the corresponding configuration
> MSR.  If deemed necessary, the "catch up" mode (the timer period is
> shortened until the timer catches up) will be implemented later.
> 
> Signed-off-by: Andrey Smetanin <asmeta...@virtuozzo.com>
> CC: Gleb Natapov <g...@kernel.org>
> CC: Paolo Bonzini <pbonz...@redhat.com>
> CC: "K. Y. Srinivasan" <k...@microsoft.com>
> CC: Haiyang Zhang <haiya...@microsoft.com>
> CC: Vitaly Kuznetsov <vkuzn...@redhat.com>
> CC: Roman Kagan <rka...@virtuozzo.com>
> CC: Denis V. Lunev <d...@openvz.org>
> CC: qemu-de...@nongnu.org
> ---
>  arch/x86/include/asm/kvm_host.h|  13 ++
>  arch/x86/include/uapi/asm/hyperv.h |   6 +
>  arch/x86/kvm/hyperv.c  | 325 
> -
>  arch/x86/kvm/hyperv.h  |  24 +++
>  arch/x86/kvm/x86.c |   9 +
>  include/linux/kvm_host.h   |   1 +
>  6 files changed, 375 insertions(+), 3 deletions(-)

A couple of nitpicks:

> +static void stimer_restart(struct kvm_vcpu_hv_stimer *stimer)
> +{
> + u64 time_now;
> + ktime_t ktime_now;
> + u64 n;
> +
> + time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
> + ktime_now = ktime_get();
> +
> + /*
> +  * Calculate positive integer n for which condtion -
> +  * (stimer->exp_time + n * stimer->count) > time_now
> +  * is true. We will use (stimer->exp_time + n * stimer->count)
> +  * as new stimer->exp_time.
> +  */
> +
> + n = div64_u64(time_now - stimer->exp_time, stimer->count) + 1;
> + stimer->exp_time += n * stimer->count;

This is actually just a reminder calculation so I'd rather do it
directly with div64_u64_rem().

> +void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
> + struct kvm_vcpu_hv_stimer *stimer;
> + u64 time_now;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
> + if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
> + stimer = _vcpu->stimer[i];
> +     stimer_stop(stimer);

I think there's no need in this explicit stop: I see no way to arrive
here with a running timer, and even if there were, it would be safe as
your timer callback only manipulates the bitmaps atomically.

Neither comment is critical so

Reviewed-by: Roman Kagan <rka...@virtuozzo.com>

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v1 6/7] kvm/x86: Hyper-V SynIC message slot pending clearing at SINT ack

2015-11-27 Thread Roman Kagan
On Wed, Nov 25, 2015 at 06:20:20PM +0300, Andrey Smetanin wrote:
> The SynIC message protocol mandates that the message slot is claimed
> by atomically setting message type to something other than HVMSG_NONE.
> If another message is to be delivered while the slot is still busy,
> message pending flag is asserted to indicate to the guest that the
> hypervisor wants to be notified when the slot is released.
> 
> To make sure the protocol works regardless of where the message
> sources are (kernel or userspace), clear the pending flag on SINT ACK
> notification, and let the message sources compete for the slot again.
> 
> Signed-off-by: Andrey Smetanin <asmeta...@virtuozzo.com>
> CC: Gleb Natapov <g...@kernel.org>
> CC: Paolo Bonzini <pbonz...@redhat.com>
> CC: "K. Y. Srinivasan" <k...@microsoft.com>
> CC: Haiyang Zhang <haiya...@microsoft.com>
> CC: Vitaly Kuznetsov <vkuzn...@redhat.com>
> CC: Roman Kagan <rka...@virtuozzo.com>
> CC: Denis V. Lunev <d...@openvz.org>
> CC: qemu-de...@nongnu.org

Reviewed-by: Roman Kagan <rka...@virtuozzo.com>

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH v3 9/9] kvm/x86: Hyper-V kvm exit

2015-11-03 Thread Roman Kagan
On Tue, Nov 03, 2015 at 03:51:16PM +0100, Paolo Bonzini wrote:
> 
> 
> On 03/11/2015 15:36, Andrey Smetanin wrote:
> >>
> >>
> >> if I run a patched QEMU but I *do not* enable the synthetic interrupt
> >> controller.  I can fix it by wrapping the calls to synic_exit with "if
> >> (!host)", but I haven't checked yet the source---so that may not be the
> >> proper fix.  Sorry for not having looked more in detail.
> >>
> > Could you please specify test case(kvm unit tests ?) and kernel/qemu(if
> > it's not standard)?
> 
> It happens just by starting QEMU.
> 
> Kernel: kvm/queue
> + kvm/irqchip: kvm_arch_irq_routing_update renaming split
> + kvm/x86: split ioapic-handled and EOI exit bitmaps
> + kvm/x86: Hyper-V synthetic interrupt controller
> + kvm/x86: Hyper-V kvm exit
> 
> QEMU: 3a958f559ecd
> + standard-headers/x86: add Hyper-V SynIC constants
> + target-i386/kvm: Hyper-V SynIC MSR's support
> + linux-headers/kvm: add Hyper-V SynIC irq routing type and struct
> + kvm: Hyper-V SynIC irq routing support
> + linux-headers/kvm: KVM_EXIT_HYPERV type and struct
> + target-i386/hyperv: Hyper-V SynIC SINT routing and vCPU exit
> + hw/misc: Hyper-V test device 'hyperv-testdev'
> 
> Can be reproduced just with
> "../qemu/+build/x86_64-softmmu/qemu-system-x86_64 --enable-kvm -cpu
> kvm64 -display none".

Thanks!  We've figured it out:

qemu initializes the MSRs if has_msr_hv_synic is set, which depends only
on whether the kernel supports the MSRs and ignores the cpu property.

OTOH setting those MSRs (on the host side) triggers a vcpu exit which
checks the cpu property and aborts if it's unset.  Voila.

This way we also discovered that no error was triggered when the cpu
property was set but the kernel didn't support it (and this problem was
also present in other hyperv-related features).

The solution appears to be to bail out when a hyperv property is
requested but the host doesn't support it, and then check for the
property only when deciding if the relevant actions need to be taken.

Protecting vcpu exits with !host in the kernel seems to make sense, too.

We're in progress of preparing the updated patches.

Thanks,
Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvm-unit-tests PATCH] x86: hyperv_synic: Hyper-V SynIC test

2015-11-02 Thread Roman Kagan
On Mon, Nov 02, 2015 at 01:16:02PM +0100, Paolo Bonzini wrote:
> On 26/10/2015 10:56, Andrey Smetanin wrote:
> > Hyper-V SynIC is a Hyper-V synthetic interrupt controller.
> > 
> > The test runs on every vCPU and performs the following steps:
> > * read from all Hyper-V SynIC MSR's
> > * setup Hyper-V SynIC evt/msg pages
> > * setup SINT's routing
> > * inject SINT's into destination vCPU by 'hyperv-synic-test-device'
> > * wait for SINT's isr's completion
> > * clear Hyper-V SynIC evt/msg pages and destroy SINT's routing
> > 
> > Signed-off-by: Andrey Smetanin <asmeta...@virtuozzo.com>
> > Reviewed-by: Roman Kagan <rka...@virtuozzo.com>
> > Signed-off-by: Denis V. Lunev <d...@openvz.org>
> > CC: Vitaly Kuznetsov <vkuzn...@redhat.com>
> > CC: "K. Y. Srinivasan" <k...@microsoft.com>
> > CC: Gleb Natapov <g...@kernel.org>
> > CC: Paolo Bonzini <pbonz...@redhat.com>
> > CC: Roman Kagan <rka...@virtuozzo.com>
> > CC: Denis V. Lunev <d...@openvz.org>
> > CC: qemu-de...@nongnu.org
> > CC: virtualizat...@lists.linux-foundation.org
> 
> Bad news.
> 
> The test breaks with APICv, because of the following sequence of events:

Thanks for testing and analyzing this!

(... running around looking for an APICv-capable machine to be able to
catch this ourselves before we resubmit ...)

> The question then is... does Hyper-V actually use auto-EOI interrupts?
> If it doesn't, we might as well not implement them... :/

As Den wrote, we've yet to see a hyperv device which doesn't :(

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 8/9] kvm/x86: Hyper-V synthetic interrupt controller

2015-10-29 Thread Roman Kagan
On Wed, Oct 28, 2015 at 06:41:45PM +0100, Paolo Bonzini wrote:
> Hi Andrey,
> 
> just one question.  Is kvm_arch_set_irq actually needed?  I think
> everything should work fine without it.  Can you check?  If so, I can
> remove it myself and revert the patch that introduced the hook.

While Andrey is testing it, I'd like to ask similar question re. MSI:
why is there a "shortcut" for KVM_IRQ_ROUTING_MSI case (which we
basically modelled after) when it would probably get handled through
->set handler in irqfd_inject() too?

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 9/9] kvm/x86: Hyper-V kvm exit

2015-10-16 Thread Roman Kagan
On Fri, Oct 16, 2015 at 09:51:58AM +0200, Paolo Bonzini wrote:
> The documentation should include the definition of the struct and the
> definition of the subtypes (currently KVM_EXIT_HYPERV_SYNIC only).
> 
> Documentation for KVM_CAP_HYPERV_SINIC and KVM_IRQ_ROUTING_HV_SINT is
> missing, too.
> 
> Finally, it would be better to have unit tests in kvm-unit-tests.
> Either this or QEMU support is a requirement for merging, and the unit
> tests are probably easier.

OK we'll try to get this done early next week.

Thanks,
Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvm:queue 27/38] arch/x86/kvm/hyperv.c:186:41: sparse: incorrect type in argument 2 (different modifiers)

2015-09-18 Thread Roman Kagan
On Fri, Sep 18, 2015 at 03:55:00PM +0200, Paolo Bonzini wrote:
> 
> 
> On 18/09/2015 15:51, Denis V. Lunev wrote:
> >> 185   
> >>   > 186task_cputime_adjusted(current, , );
> >> 187return div_u64(cputime_to_nsecs(utime + stime), 100);
> >> 188}
> >> 189   
> >> 190static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr,
> >> u64 data, bool host)
> >> 191{
> >> 192struct kvm_vcpu_hv *hv = >arch.hyperv;
> >> 193   
> >> 194switch (msr) {
> >>
> >> ---
> >> 0-DAY kernel test infrastructureOpen Source Technology
> >> Center
> >> https://lists.01.org/pipermail/kbuild-all   Intel
> >> Corporation
> > can not get an idea what is this warning about...
> > For me it looks pretty lame.
> 
> I think it wants you to do
> 
> - return div_u64(cputime_to_nsecs(utime + stime), 100);
> + return div_u64(cputime_to_nsecs(utime) +
> +cputime_to_nsecs(stime), 100);

The warning is pretty specific about the point where it triggered.

I have reduced it to the following standalone reproducer:

%%% cat x.c
#ifdef __CHECKER__
# define __nocast   __attribute__((nocast))
#else
# define __nocast
#endif

typedef unsigned long __nocast cputime_t;

extern void task_cputime_adjusted(cputime_t *);
extern void current_task_runtime_100ns(void);

void current_task_runtime_100ns(void)
{
cputime_t utime;

task_cputime_adjusted();
}
%%% gcc -c x.c -Wall -Werror -O2; echo $?
0
%%% sparse x.c
x.c:16:32: warning: incorrect type in argument 1 (different modifiers)
x.c:16:32:expected unsigned long [nocast] [usertype] *
x.c:16:32:got unsigned long *
x.c:16:32: warning: implicit cast to nocast type

Looks like a sparse bug to me.

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html