Re: XP smp using a lot of CPU

2009-05-12 Thread Avi Kivity

Ross Boylan wrote:

I just installed XP into a new VM, specifying -smp 2 for the machine.
According to top, it's using nearly 200% of a cpu even when I'm not
doing anything.

Is this real CPU useage, or just a reporting problem (just as my disk
image is big according to ls, but isn't really)?

If it's real, is there anything I can do about it?

kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64.  Xeon chips; 32
bit version of XP pro installed, now fully patched (including the
Windows Genuine Advantage stuff, though I cancelled it when it wanted to
run).  


Task manager in XP shows virtually no CPU useage.

Please cc me on responses.

  


I'm guessing Windows uses a pio port to sleep, which kvm doesn't 
support.  Can you provide kvm_stat output?


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Yang, Sheng
On Wednesday 13 May 2009 12:41:29 Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit.
> This can mean only a few minutes of usable run time.  Instead, track
> used GSIs in a bitmap.
>
> Signed-off-by: Alex Williamson 
> ---

Acked.

-- 
regards
Yang, Sheng

>
>  v2: Added mutex to protect gsi bitmap
>  v3: Updated for comments from Michael Tsirkin
>  No longer depends on "[PATCH] kvm: device-assignment: Catch GSI
> overflow" v4: Fix gsi_bytes calculation noted by Sheng Yang
>
>  hw/device-assignment.c  |4 ++
>  kvm/libkvm/kvm-common.h |4 ++
>  kvm/libkvm/libkvm.c |   81
> +-- kvm/libkvm/libkvm.h |  
> 10 ++
>  4 files changed, 86 insertions(+), 13 deletions(-)
>
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index a7365c8..a6cc9b9 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
>  {
>  int i;
>
> -for (i = 0; i < dev->irq_entries_nr; i++)
> +for (i = 0; i < dev->irq_entries_nr; i++) {
>  kvm_del_routing_entry(kvm_context, &dev->entry[i]);
> +kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
> +}
>  free(dev->entry);
>  dev->entry = NULL;
>  dev->irq_entries_nr = 0;
> diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
> index 591fb53..4b3cb51 100644
> --- a/kvm/libkvm/kvm-common.h
> +++ b/kvm/libkvm/kvm-common.h
> @@ -66,8 +66,10 @@ struct kvm_context {
>  #ifdef KVM_CAP_IRQ_ROUTING
>   struct kvm_irq_routing *irq_routes;
>   int nr_allocated_irq_routes;
> + void *used_gsi_bitmap;
> + int max_gsi;
> + pthread_mutex_t gsi_mutex;
>  #endif
> - int max_used_gsi;
>  };
>
>  int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
> diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
> index ba0a5d1..3eaa120 100644
> --- a/kvm/libkvm/libkvm.c
> +++ b/kvm/libkvm/libkvm.c
> @@ -35,6 +35,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "libkvm.h"
>
>  #if defined(__x86_64__) || defined(__i386__)
> @@ -65,6 +66,8 @@
>  int kvm_abi = EXPECTED_KVM_API_VERSION;
>  int kvm_page_size;
>
> +static inline void set_bit(uint32_t *buf, unsigned int bit);
> +
>  struct slot_info {
>   unsigned long phys_addr;
>   unsigned long len;
> @@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
>   int fd;
>   kvm_context_t kvm;
>   int r;
> +#ifdef KVM_CAP_IRQ_ROUTING
> + int gsi_count, gsi_bytes, i;
> +#endif
>
>   fd = open("/dev/kvm", O_RDWR);
>   if (fd == -1) {
> @@ -322,6 +328,25 @@ kvm_context_t kvm_init(struct kvm_callbacks
> *callbacks, kvm->dirty_pages_log_all = 0;
>   kvm->no_irqchip_creation = 0;
>   kvm->no_pit_creation = 0;
> +#ifdef KVM_CAP_IRQ_ROUTING
> + pthread_mutex_init(&kvm->gsi_mutex, NULL);
> +
> + gsi_count = kvm_get_gsi_count(kvm);
> + /* Round up so we can search ints using ffs */
> + gsi_bytes = ((gsi_count + 31) / 32) * 4;
> + kvm->used_gsi_bitmap = malloc(gsi_bytes);
> + if (!kvm->used_gsi_bitmap)
> + goto out_close;
> + memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> + kvm->max_gsi = gsi_bytes * 8;
> +
> + /* Mark all the IOAPIC pin GSIs and any over-allocated
> +  * GSIs as already in use. */
> + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
> + set_bit(kvm->used_gsi_bitmap, i);
> + for (i = gsi_count; i < kvm->max_gsi; i++)
> + set_bit(kvm->used_gsi_bitmap, i);
> +#endif
>
>   return kvm;
>   out_close:
> @@ -1298,8 +1323,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
>   new->flags = entry->flags;
>   new->u = entry->u;
>
> - if (entry->gsi > kvm->max_used_gsi)
> - kvm->max_used_gsi = entry->gsi;
>   return 0;
>  #else
>   return -ENOSYS;
> @@ -1404,18 +1427,54 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
>  #endif
>  }
>
> +#ifdef KVM_CAP_IRQ_ROUTING
> +static inline void set_bit(uint32_t *buf, unsigned int bit)
> +{
> + buf[bit / 32] |= 1U << (bit % 32);
> +}
> +
> +static inline void clear_bit(uint32_t *buf, unsigned int bit)
> +{
> + buf[bit / 32] &= ~(1U << (bit % 32));
> +}
> +
> +static int kvm_find_free_gsi(kvm_context_t kvm)
> +{
> + int i, bit, gsi;
> + uint32_t *buf = kvm->used_gsi_bitmap;
> +
> + for (i = 0; i < kvm->max_gsi / 32; i++) {
> + bit = ffs(~buf[i]);
> + if (!bit)
> + continue;
> +
> + gsi = bit - 1 + i * 32;
> + set_bit(buf, gsi);
> + return gsi;
> + }
> +
> + return -ENOSPC;
> +}
> +#endif
> +
>  int kvm_get_irq_route_gsi(kvm_context_t kvm)
>  {
> + int gsi = -ENOSYS;
> +
>  #ifdef KVM_CAP_IRQ_ROUTING
> - if 

Re: [PATCH v2] kvm: device-assignment: Fix kvm_get_irq_route_gsi() return check

2009-05-12 Thread Yang, Sheng
On Wednesday 13 May 2009 12:45:11 Alex Williamson wrote:
> Use 'r' for the return value since gsi is unsigned.
>
> Signed-off-by: Alex Williamson 
> ---

Acked.

-- 
regards
Yang, Sheng

>
>  v2: Use 'r' instead of a cast, per Sheng Yang
>
>  hw/device-assignment.c |5 +++--
>  1 files changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index a6cc9b9..4107915 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -797,11 +797,12 @@ static void assigned_dev_update_msi(PCIDevice
> *pci_dev, unsigned int ctrl_pos) assigned_dev->entry->u.msi.data =
> *(uint16_t *)(pci_dev->config + pci_dev->cap.start + PCI_MSI_DATA_32);
>  assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
> -assigned_dev->entry->gsi = kvm_get_irq_route_gsi(kvm_context);
> -if (assigned_dev->entry->gsi < 0) {
> +r = kvm_get_irq_route_gsi(kvm_context);
> +if (r < 0) {
>  perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
>  return;
>  }
> +assigned_dev->entry->gsi = r;
>
>  kvm_add_routing_entry(kvm_context, assigned_dev->entry);
>  if (kvm_commit_irq_routes(kvm_context) < 0) {


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2791009 ] -vga vmware + X-windows: display errors, jumpy mouse, hangs

2009-05-12 Thread SourceForge.net
Bugs item #2791009, was opened at 2009-05-12 20:58
Message generated for change (Comment added) made by jiggly
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2791009&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: intel
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Balzac von Jigglypuff (jiggly)
Assigned to: Nobody/Anonymous (nobody)
Summary: -vga vmware + X-windows: display errors, jumpy mouse, hangs

Initial Comment:
With -vga vmware, Xorg (in the guest) doesn't display properly. The display is 
vaguely legible, but it's scrambled. With -vga vmware -smp 2, I get the same 
display problem, plus the mouse jumps erratically and kvm hangs.

CPU: Core 2 Duo P8600
KVM: kvm-85 (Gentoo ebuild kvm-85-r1, ncurses enabled, sdl enabled, all other 
USE flags disabled)
Host kernel: 2.6.29 (linux-2.6.29-gentoo-r3)
Host kernel arch: x86_64
Host distro: Gentoo AMD64
Guest kernel: 2.6.29 (linux-2.6.29-gentoo-r3), no virtio, no KVM_GUEST
Guest kernel arch: x86_64
Guest distro: Gentoo AMD64
Command line: kvm -kernel vmlinuz -append root=/dev/hda -hda 
/dev/mapper/vg0-linroot -vga vmware
Command line (alternate): kvm -smp 2 -kernel vmlinuz -append root=/dev/hda -hda 
/dev/mapper/vg0-linroot -vga vmware

Here's a more detailed description of the problem:

I'm trying to run Xorg on a Gentoo guest. With kvm -vga std and the vesa X 
driver, everything works fine, even with -smp 2. With kvm -vga vmware and the 
vmware X driver, though, the X display is scrambled. I can barely see some 
garbled twm xterm windows (see attached screenshot), and I can type in those 
xterms. I tried different resolutions and HorizSync/VertRefresh's, but it's 
always scrambled.

If I use kvm -vga vmware -smp 2, the display is similarly scrambled, and 
additionally the mouse is unusable. If I move the mouse, the guest pointer 
starts jumping all over erratically. Sometimes the pointer settles down and 
stops (until I touch the mouse again), sometimes it keeps jumping around. Xorg 
usually outputs a bunch of "[mi] EQ overflowing. The server is probably stuck 
in an infinite loop." errors while this is happening.

If I keep moving the mouse, kvm eventually hangs: I can't ungrab the pointer 
from the kvm window, so I have to switch to VT1; top shows that kvm is using 
max CPU; kvm doesn't respond to SIGINT; and I have to kill -9 kvm. The amount 
of mouse waving it takes to hang kvm varies (~5s-120s), and it's only 
reproducible ~20% of the time. If the guest kernel has any paravirtualization, 
though, the hang happens much faster and is 100% reproducible.

--

>Comment By: Balzac von Jigglypuff (jiggly)
Date: 2009-05-12 21:47

Message:
-no-kvm-pit doesn't change anything.

With -no-kvm or -no-kvm-irqchip, the display is still garbled, but I don't
get mouse jitters and hangs with -smp 2.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2791009&group_id=180599
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] kvm: device-assignment: Fix kvm_get_irq_route_gsi() return check

2009-05-12 Thread Alex Williamson
Use 'r' for the return value since gsi is unsigned.

Signed-off-by: Alex Williamson 
---

 v2: Use 'r' instead of a cast, per Sheng Yang

 hw/device-assignment.c |5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index a6cc9b9..4107915 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -797,11 +797,12 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, 
unsigned int ctrl_pos)
 assigned_dev->entry->u.msi.data = *(uint16_t *)(pci_dev->config +
 pci_dev->cap.start + PCI_MSI_DATA_32);
 assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
-assigned_dev->entry->gsi = kvm_get_irq_route_gsi(kvm_context);
-if (assigned_dev->entry->gsi < 0) {
+r = kvm_get_irq_route_gsi(kvm_context);
+if (r < 0) {
 perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
 return;
 }
+assigned_dev->entry->gsi = r;
 
 kvm_add_routing_entry(kvm_context, assigned_dev->entry);
 if (kvm_commit_irq_routes(kvm_context) < 0) {

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Alex Williamson
We're currently using a counter to track the most recent GSI we've
handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
assignment with a driver that regularly toggles the MSI enable bit.
This can mean only a few minutes of usable run time.  Instead, track
used GSIs in a bitmap.

Signed-off-by: Alex Williamson 
---

 v2: Added mutex to protect gsi bitmap
 v3: Updated for comments from Michael Tsirkin
 No longer depends on "[PATCH] kvm: device-assignment: Catch GSI overflow"
 v4: Fix gsi_bytes calculation noted by Sheng Yang

 hw/device-assignment.c  |4 ++
 kvm/libkvm/kvm-common.h |4 ++
 kvm/libkvm/libkvm.c |   81 +--
 kvm/libkvm/libkvm.h |   10 ++
 4 files changed, 86 insertions(+), 13 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index a7365c8..a6cc9b9 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
 {
 int i;
 
-for (i = 0; i < dev->irq_entries_nr; i++)
+for (i = 0; i < dev->irq_entries_nr; i++) {
 kvm_del_routing_entry(kvm_context, &dev->entry[i]);
+kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
+}
 free(dev->entry);
 dev->entry = NULL;
 dev->irq_entries_nr = 0;
diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
index 591fb53..4b3cb51 100644
--- a/kvm/libkvm/kvm-common.h
+++ b/kvm/libkvm/kvm-common.h
@@ -66,8 +66,10 @@ struct kvm_context {
 #ifdef KVM_CAP_IRQ_ROUTING
struct kvm_irq_routing *irq_routes;
int nr_allocated_irq_routes;
+   void *used_gsi_bitmap;
+   int max_gsi;
+   pthread_mutex_t gsi_mutex;
 #endif
-   int max_used_gsi;
 };
 
 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index ba0a5d1..3eaa120 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "libkvm.h"
 
 #if defined(__x86_64__) || defined(__i386__)
@@ -65,6 +66,8 @@
 int kvm_abi = EXPECTED_KVM_API_VERSION;
 int kvm_page_size;
 
+static inline void set_bit(uint32_t *buf, unsigned int bit);
+
 struct slot_info {
unsigned long phys_addr;
unsigned long len;
@@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
int fd;
kvm_context_t kvm;
int r;
+#ifdef KVM_CAP_IRQ_ROUTING
+   int gsi_count, gsi_bytes, i;
+#endif
 
fd = open("/dev/kvm", O_RDWR);
if (fd == -1) {
@@ -322,6 +328,25 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
kvm->dirty_pages_log_all = 0;
kvm->no_irqchip_creation = 0;
kvm->no_pit_creation = 0;
+#ifdef KVM_CAP_IRQ_ROUTING
+   pthread_mutex_init(&kvm->gsi_mutex, NULL);
+
+   gsi_count = kvm_get_gsi_count(kvm);
+   /* Round up so we can search ints using ffs */
+   gsi_bytes = ((gsi_count + 31) / 32) * 4;
+   kvm->used_gsi_bitmap = malloc(gsi_bytes);
+   if (!kvm->used_gsi_bitmap)
+   goto out_close;
+   memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
+   kvm->max_gsi = gsi_bytes * 8;
+
+   /* Mark all the IOAPIC pin GSIs and any over-allocated
+* GSIs as already in use. */
+   for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
+   set_bit(kvm->used_gsi_bitmap, i);
+   for (i = gsi_count; i < kvm->max_gsi; i++)
+   set_bit(kvm->used_gsi_bitmap, i);
+#endif
 
return kvm;
  out_close:
@@ -1298,8 +1323,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
new->flags = entry->flags;
new->u = entry->u;
 
-   if (entry->gsi > kvm->max_used_gsi)
-   kvm->max_used_gsi = entry->gsi;
return 0;
 #else
return -ENOSYS;
@@ -1404,18 +1427,54 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
 #endif
 }
 
+#ifdef KVM_CAP_IRQ_ROUTING
+static inline void set_bit(uint32_t *buf, unsigned int bit)
+{
+   buf[bit / 32] |= 1U << (bit % 32);
+}
+
+static inline void clear_bit(uint32_t *buf, unsigned int bit)
+{
+   buf[bit / 32] &= ~(1U << (bit % 32));
+}
+
+static int kvm_find_free_gsi(kvm_context_t kvm)
+{
+   int i, bit, gsi;
+   uint32_t *buf = kvm->used_gsi_bitmap;
+
+   for (i = 0; i < kvm->max_gsi / 32; i++) {
+   bit = ffs(~buf[i]);
+   if (!bit)
+   continue;
+
+   gsi = bit - 1 + i * 32;
+   set_bit(buf, gsi);
+   return gsi;
+   }
+
+   return -ENOSPC;
+}
+#endif
+
 int kvm_get_irq_route_gsi(kvm_context_t kvm)
 {
+   int gsi = -ENOSYS;
+
 #ifdef KVM_CAP_IRQ_ROUTING
-   if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
-   if (kvm->max_used_gsi <= kvm_get_gsi_count(kvm))
-return kvm->max_used_gsi + 1;
-else
-return -ENOSPC;
-} else
-return KVM_IOAPIC_NUM_PINS;
-#else
-   

Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Yang, Sheng
On Wednesday 13 May 2009 12:10:34 Alex Williamson wrote:
> On Tue, 2009-05-12 at 21:42 -0600, Alex Williamson wrote:
> > On Wed, 2009-05-13 at 11:30 +0800, Yang, Sheng wrote:
> > > > +   kvm->used_gsi_bitmap = malloc(gsi_bytes);
> > > > +   if (!kvm->used_gsi_bitmap) {
> > > > +   pthread_mutex_unlock(&kvm->gsi_mutex);
> > > > +   goto out_close;
> > > > +   }
> > > > +   memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> > > > +   kvm->max_gsi = gsi_bytes * 8;
> > >
> > > So max_gsi = gsi_count / 4?
>
> kvm->max_gsi actually becomes the number of GSIs available in the
> bitmap, which may be more than gsi_count if we rounded up.  We
> preallocate GSIs between gsi_count and max_gsi to avoid using them.
> This just lets us not need to special case testing whether a bit in the
> last index is < gsi_count.  Am I overlooking anything here?  Thanks,
>
Oh, I understand that, and I just follow the logic of last comment here(which 
"gsi_bytes = (gsi_count + 31) / 32" )... Sorry for confusion...

-- 
regards
Yang, Sheng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Alex Williamson
On Tue, 2009-05-12 at 21:42 -0600, Alex Williamson wrote:
> On Wed, 2009-05-13 at 11:30 +0800, Yang, Sheng wrote:
> > > + kvm->used_gsi_bitmap = malloc(gsi_bytes);
> > > + if (!kvm->used_gsi_bitmap) {
> > > + pthread_mutex_unlock(&kvm->gsi_mutex);
> > > + goto out_close;
> > > + }
> > > + memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> > > + kvm->max_gsi = gsi_bytes * 8;
> > 
> > So max_gsi = gsi_count / 4?

kvm->max_gsi actually becomes the number of GSIs available in the
bitmap, which may be more than gsi_count if we rounded up.  We
preallocate GSIs between gsi_count and max_gsi to avoid using them.
This just lets us not need to special case testing whether a bit in the
last index is < gsi_count.  Am I overlooking anything here?  Thanks,

Alex


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: device-assignment: Fix kvm_get_irq_route_gsi() return check

2009-05-12 Thread Yang, Sheng
On Wednesday 13 May 2009 11:55:50 Alex Williamson wrote:
> On Wed, 2009-05-13 at 11:36 +0800, Yang, Sheng wrote:
> > On Wednesday 13 May 2009 06:14:01 Alex Williamson wrote:
> > > --- a/hw/device-assignment.c
> > > +++ b/hw/device-assignment.c
> > > @@ -798,7 +798,7 @@ static void assigned_dev_update_msi(PCIDevice
> > > *pci_dev, unsigned int ctrl_pos) pci_dev->cap.start + PCI_MSI_DATA_32);
> > >  assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
> > >  assigned_dev->entry->gsi = kvm_get_irq_route_gsi(kvm_context);
> > > -if (assigned_dev->entry->gsi < 0) {
> > > +if ((int)(assigned_dev->entry->gsi) < 0) {
> > >  perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
> > >  return;
> > >  }
> >
> > Use a return value(r) seems better.
>
> Hi Sheng,
>
> Do you mean:
>
> r = kvm_get_irq_route_gsi(kvm_context);
> if (r < 0) {
> ...
> }
> assigned_dev->entry->gsi = r;

Yes.

> > And I realized there is memory leak here. Entry seems haven't been freed
> > for error... So does MSI-X...
>
> I hadn't noticed that one, but now that you mention it, yep.  Thanks,

Thanks. :)

-- 
regards
Yang, Sheng

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2791009 ] -vga vmware + X-windows: display errors, jumpy mouse, hangs

2009-05-12 Thread SourceForge.net
Bugs item #2791009, was opened at 2009-05-12 20:58
Message generated for change (Tracker Item Submitted) made by jiggly
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2791009&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: intel
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Balzac von Jigglypuff (jiggly)
Assigned to: Nobody/Anonymous (nobody)
Summary: -vga vmware + X-windows: display errors, jumpy mouse, hangs

Initial Comment:
With -vga vmware, Xorg (in the guest) doesn't display properly. The display is 
vaguely legible, but it's scrambled. With -vga vmware -smp 2, I get the same 
display problem, plus the mouse jumps erratically and kvm hangs.

CPU: Core 2 Duo P8600
KVM: kvm-85 (Gentoo ebuild kvm-85-r1, ncurses enabled, sdl enabled, all other 
USE flags disabled)
Host kernel: 2.6.29 (linux-2.6.29-gentoo-r3)
Host kernel arch: x86_64
Host distro: Gentoo AMD64
Guest kernel: 2.6.29 (linux-2.6.29-gentoo-r3), no virtio, no KVM_GUEST
Guest kernel arch: x86_64
Guest distro: Gentoo AMD64
Command line: kvm -kernel vmlinuz -append root=/dev/hda -hda 
/dev/mapper/vg0-linroot -vga vmware
Command line (alternate): kvm -smp 2 -kernel vmlinuz -append root=/dev/hda -hda 
/dev/mapper/vg0-linroot -vga vmware

Here's a more detailed description of the problem:

I'm trying to run Xorg on a Gentoo guest. With kvm -vga std and the vesa X 
driver, everything works fine, even with -smp 2. With kvm -vga vmware and the 
vmware X driver, though, the X display is scrambled. I can barely see some 
garbled twm xterm windows (see attached screenshot), and I can type in those 
xterms. I tried different resolutions and HorizSync/VertRefresh's, but it's 
always scrambled.

If I use kvm -vga vmware -smp 2, the display is similarly scrambled, and 
additionally the mouse is unusable. If I move the mouse, the guest pointer 
starts jumping all over erratically. Sometimes the pointer settles down and 
stops (until I touch the mouse again), sometimes it keeps jumping around. Xorg 
usually outputs a bunch of "[mi] EQ overflowing. The server is probably stuck 
in an infinite loop." errors while this is happening.

If I keep moving the mouse, kvm eventually hangs: I can't ungrab the pointer 
from the kvm window, so I have to switch to VT1; top shows that kvm is using 
max CPU; kvm doesn't respond to SIGINT; and I have to kill -9 kvm. The amount 
of mouse waving it takes to hang kvm varies (~5s-120s), and it's only 
reproducible ~20% of the time. If the guest kernel has any paravirtualization, 
though, the hang happens much faster and is 100% reproducible.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2791009&group_id=180599
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: device-assignment: Fix kvm_get_irq_route_gsi() return check

2009-05-12 Thread Alex Williamson
On Wed, 2009-05-13 at 11:36 +0800, Yang, Sheng wrote:
> On Wednesday 13 May 2009 06:14:01 Alex Williamson wrote:
> > --- a/hw/device-assignment.c
> > +++ b/hw/device-assignment.c
> > @@ -798,7 +798,7 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev,
> > unsigned int ctrl_pos) pci_dev->cap.start + PCI_MSI_DATA_32);
> >  assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
> >  assigned_dev->entry->gsi = kvm_get_irq_route_gsi(kvm_context);
> > -if (assigned_dev->entry->gsi < 0) {
> > +if ((int)(assigned_dev->entry->gsi) < 0) {
> >  perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
> >  return;
> >  }
> 
> Use a return value(r) seems better.

Hi Sheng,

Do you mean:

r = kvm_get_irq_route_gsi(kvm_context);
if (r < 0) {
...
}
assigned_dev->entry->gsi = r;

> And I realized there is memory leak here. Entry seems haven't been freed for 
> error... So does MSI-X...

I hadn't noticed that one, but now that you mention it, yep.  Thanks,

Alex


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Alex Williamson
On Wed, 2009-05-13 at 11:30 +0800, Yang, Sheng wrote:
> On Wednesday 13 May 2009 06:07:15 Alex Williamson wrote:
> > We're currently using a counter to track the most recent GSI we've
> > handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> > assignment with a driver that regularly toggles the MSI enable bit.
> > This can mean only a few minutes of usable run time.  Instead, track
> > used GSIs in a bitmap.
> >
> > Signed-off-by: Alex Williamson 
> > ---
> >
> >  v2: Added mutex to protect gsi bitmap
> >  v3: Updated for comments from Michael Tsirkin
> >  No longer depends on "[PATCH] kvm: device-assignment: Catch GSI
> > overflow"
> >
> >  hw/device-assignment.c  |4 ++
> >  kvm/libkvm/kvm-common.h |4 ++
> >  kvm/libkvm/libkvm.c |   83
> > +-- kvm/libkvm/libkvm.h |  
> > 10 ++
> >  4 files changed, 88 insertions(+), 13 deletions(-)
> >
> > diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> > index a7365c8..a6cc9b9 100644
> > --- a/hw/device-assignment.c
> > +++ b/hw/device-assignment.c
> > @@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
> >  {
> >  int i;
> >
> > -for (i = 0; i < dev->irq_entries_nr; i++)
> > +for (i = 0; i < dev->irq_entries_nr; i++) {
> >  kvm_del_routing_entry(kvm_context, &dev->entry[i]);
> > +kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
> > +}
> >  free(dev->entry);
> >  dev->entry = NULL;
> >  dev->irq_entries_nr = 0;
> > diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
> > index 591fb53..4b3cb51 100644
> > --- a/kvm/libkvm/kvm-common.h
> > +++ b/kvm/libkvm/kvm-common.h
> > @@ -66,8 +66,10 @@ struct kvm_context {
> >  #ifdef KVM_CAP_IRQ_ROUTING
> > struct kvm_irq_routing *irq_routes;
> > int nr_allocated_irq_routes;
> > +   void *used_gsi_bitmap;
> > +   int max_gsi;
> > +   pthread_mutex_t gsi_mutex;
> >  #endif
> > -   int max_used_gsi;
> >  };
> >
> >  int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
> > diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
> > index ba0a5d1..3d7ab75 100644
> > --- a/kvm/libkvm/libkvm.c
> > +++ b/kvm/libkvm/libkvm.c
> > @@ -35,6 +35,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include "libkvm.h"
> >
> >  #if defined(__x86_64__) || defined(__i386__)
> > @@ -65,6 +66,8 @@
> >  int kvm_abi = EXPECTED_KVM_API_VERSION;
> >  int kvm_page_size;
> >
> > +static inline void set_bit(uint32_t *buf, unsigned int bit);
> > +
> >  struct slot_info {
> > unsigned long phys_addr;
> > unsigned long len;
> > @@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
> > int fd;
> > kvm_context_t kvm;
> > int r;
> > +#ifdef KVM_CAP_IRQ_ROUTING
> > +   int gsi_count, gsi_bytes, i;
> > +#endif
> >
> > fd = open("/dev/kvm", O_RDWR);
> > if (fd == -1) {
> > @@ -322,6 +328,27 @@ kvm_context_t kvm_init(struct kvm_callbacks
> > *callbacks, kvm->dirty_pages_log_all = 0;
> > kvm->no_irqchip_creation = 0;
> > kvm->no_pit_creation = 0;
> > +#ifdef KVM_CAP_IRQ_ROUTING
> > +   pthread_mutex_init(&kvm->gsi_mutex, NULL);
> > +
> > +   gsi_count = kvm_get_gsi_count(kvm);
> > +   /* Round up so we can search ints using ffs */
> > +   gsi_bytes = (gsi_count + 31) / 32;
> 
> CMIW, should it be gsi_bytes = (gsi_count + 7) / 8? This looks like bits-to-
> int. 

Oops, I missed a multiplier in there.  What you have would be correct
for rounding up to a byte, but we really want to round up to an int, so
I need to factor in a bytes/int, which gives me this:

gsi_bytes = ((gsi_count + 31) / 32) * 4;

Then the rest works out correctly.  Thanks,

Alex

> > +   kvm->used_gsi_bitmap = malloc(gsi_bytes);
> > +   if (!kvm->used_gsi_bitmap) {
> > +   pthread_mutex_unlock(&kvm->gsi_mutex);
> > +   goto out_close;
> > +   }
> > +   memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> > +   kvm->max_gsi = gsi_bytes * 8;
> 
> So max_gsi = gsi_count / 4?


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: device-assignment: Fix kvm_get_irq_route_gsi() return check

2009-05-12 Thread Yang, Sheng
On Wednesday 13 May 2009 06:14:01 Alex Williamson wrote:
> Cast to a signed int to test for < 0.
>
> Signed-off-by: Alex Williamson 
> ---
>
>  This supersedes "[PATCH] kvm: device-assignment: Catch GSI overflow"
>
>  hw/device-assignment.c |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index a6cc9b9..5bdae24 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -798,7 +798,7 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev,
> unsigned int ctrl_pos) pci_dev->cap.start + PCI_MSI_DATA_32);
>  assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
>  assigned_dev->entry->gsi = kvm_get_irq_route_gsi(kvm_context);
> -if (assigned_dev->entry->gsi < 0) {
> +if ((int)(assigned_dev->entry->gsi) < 0) {
>  perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
>  return;
>  }

Use a return value(r) seems better.

And I realized there is memory leak here. Entry seems haven't been freed for 
error... So does MSI-X...

-- 
regards
Yang, Sheng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Yang, Sheng
On Wednesday 13 May 2009 06:07:15 Alex Williamson wrote:
> We're currently using a counter to track the most recent GSI we've
> handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
> assignment with a driver that regularly toggles the MSI enable bit.
> This can mean only a few minutes of usable run time.  Instead, track
> used GSIs in a bitmap.
>
> Signed-off-by: Alex Williamson 
> ---
>
>  v2: Added mutex to protect gsi bitmap
>  v3: Updated for comments from Michael Tsirkin
>  No longer depends on "[PATCH] kvm: device-assignment: Catch GSI
> overflow"
>
>  hw/device-assignment.c  |4 ++
>  kvm/libkvm/kvm-common.h |4 ++
>  kvm/libkvm/libkvm.c |   83
> +-- kvm/libkvm/libkvm.h |  
> 10 ++
>  4 files changed, 88 insertions(+), 13 deletions(-)
>
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index a7365c8..a6cc9b9 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
>  {
>  int i;
>
> -for (i = 0; i < dev->irq_entries_nr; i++)
> +for (i = 0; i < dev->irq_entries_nr; i++) {
>  kvm_del_routing_entry(kvm_context, &dev->entry[i]);
> +kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
> +}
>  free(dev->entry);
>  dev->entry = NULL;
>  dev->irq_entries_nr = 0;
> diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
> index 591fb53..4b3cb51 100644
> --- a/kvm/libkvm/kvm-common.h
> +++ b/kvm/libkvm/kvm-common.h
> @@ -66,8 +66,10 @@ struct kvm_context {
>  #ifdef KVM_CAP_IRQ_ROUTING
>   struct kvm_irq_routing *irq_routes;
>   int nr_allocated_irq_routes;
> + void *used_gsi_bitmap;
> + int max_gsi;
> + pthread_mutex_t gsi_mutex;
>  #endif
> - int max_used_gsi;
>  };
>
>  int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
> diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
> index ba0a5d1..3d7ab75 100644
> --- a/kvm/libkvm/libkvm.c
> +++ b/kvm/libkvm/libkvm.c
> @@ -35,6 +35,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include "libkvm.h"
>
>  #if defined(__x86_64__) || defined(__i386__)
> @@ -65,6 +66,8 @@
>  int kvm_abi = EXPECTED_KVM_API_VERSION;
>  int kvm_page_size;
>
> +static inline void set_bit(uint32_t *buf, unsigned int bit);
> +
>  struct slot_info {
>   unsigned long phys_addr;
>   unsigned long len;
> @@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
>   int fd;
>   kvm_context_t kvm;
>   int r;
> +#ifdef KVM_CAP_IRQ_ROUTING
> + int gsi_count, gsi_bytes, i;
> +#endif
>
>   fd = open("/dev/kvm", O_RDWR);
>   if (fd == -1) {
> @@ -322,6 +328,27 @@ kvm_context_t kvm_init(struct kvm_callbacks
> *callbacks, kvm->dirty_pages_log_all = 0;
>   kvm->no_irqchip_creation = 0;
>   kvm->no_pit_creation = 0;
> +#ifdef KVM_CAP_IRQ_ROUTING
> + pthread_mutex_init(&kvm->gsi_mutex, NULL);
> +
> + gsi_count = kvm_get_gsi_count(kvm);
> + /* Round up so we can search ints using ffs */
> + gsi_bytes = (gsi_count + 31) / 32;

CMIW, should it be gsi_bytes = (gsi_count + 7) / 8? This looks like bits-to-
int. 

> + kvm->used_gsi_bitmap = malloc(gsi_bytes);
> + if (!kvm->used_gsi_bitmap) {
> + pthread_mutex_unlock(&kvm->gsi_mutex);
> + goto out_close;
> + }
> + memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
> + kvm->max_gsi = gsi_bytes * 8;

So max_gsi = gsi_count / 4?

-- 
regards
Yang, Sheng

> +
> + /* Mark all the IOAPIC pin GSIs and any over-allocated
> +  * GSIs as already in use. */
> + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
> + set_bit(kvm->used_gsi_bitmap, i);
> + for (i = gsi_count; i < kvm->max_gsi; i++)
> + set_bit(kvm->used_gsi_bitmap, i);
> +#endif
>
>   return kvm;
>   out_close:
> @@ -1298,8 +1325,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
>   new->flags = entry->flags;
>   new->u = entry->u;
>
> - if (entry->gsi > kvm->max_used_gsi)
> - kvm->max_used_gsi = entry->gsi;
>   return 0;
>  #else
>   return -ENOSYS;
> @@ -1404,18 +1429,54 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
>  #endif
>  }
>
> +#ifdef KVM_CAP_IRQ_ROUTING
> +static inline void set_bit(uint32_t *buf, unsigned int bit)
> +{
> + buf[bit / 32] |= 1U << (bit % 32);
> +}
> +
> +static inline void clear_bit(uint32_t *buf, unsigned int bit)
> +{
> + buf[bit / 32] &= ~(1U << (bit % 32));
> +}
> +
> +static int kvm_find_free_gsi(kvm_context_t kvm)
> +{
> + int i, bit, gsi;
> + uint32_t *buf = kvm->used_gsi_bitmap;
> +
> + for (i = 0; i < kvm->max_gsi / 32; i++) {
> + bit = ffs(~buf[i]);
> + if (!bit)
> + continue;
> +
> + gsi = bit - 1 + i * 32;
> + set_bit(buf, gsi);
> + return gsi;
> + }
> +
> + return -ENOSPC;
> +}
> +#endif
> +
>  int k

Re: [KVM PATCH v7.2] kvm: add iofd support

2009-05-12 Thread Gregory Haskins
Gregory Haskins wrote:
> [ updated with figures, graphs, performance-test-harness info ]
>
> iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd
> signal when written to by a guest.  Userspace can register any arbitrary
> address with a corresponding eventfd and then pass the eventfd to a specific
> end-point of interest for handling.
>
> Normal IO requires a blocking round-trip since the operation may cause
> side-effects in the emulated model or may return data to the caller.
> Therefore, an IO in KVM traps from the guest to the host, causes a VMX/SVM
> "heavy-weight" exit back to userspace, and is ultimately serviced by qemu's
> device model synchronously before returning control back to the vcpu.
>
> However, there is a subclass of IO which acts purely as a trigger for
> other IO (such as to kick off an out-of-band DMA request, etc).  For these
> patterns, the synchronous call is particularly expensive since we really
> only want to simply get our notification transmitted asychronously and
> return as quickly as possible.  All the sychronous infrastructure to ensure
> proper data-dependencies are met in the normal IO case are just unecessary
> overhead for signalling.  This adds additional computational load on the
> system, as well as latency to the signalling path.
>
> Therefore, we provide a mechanism for registration of an in-kernel trigger
> point that allows the VCPU to only require a very brief, lightweight
> exit just long enough to signal an eventfd.  This also means that any
> clients compatible with the eventfd interface (which includes userspace
> and kernelspace equally well) can now register to be notified. The end
> result should be a more flexible and higher performance notification API
> for the backend KVM hypervisor and perhipheral components.
>
> To test this theory, we built a test-harness called "doorbell".  This
> module has a function called "doorbell_ring()" which simply increments a
> counter for each time the doorbell is signaled.  It supports signalling
> from either an eventfd, or an ioctl().
>
> We then wired up two paths to the doorbell: One via QEMU via a registered
> io region and through the doorbell ioctl().  The other is direct via iofd.
>
> You can download this test harness here:
>
> ftp://ftp.novell.com/dev/ghaskins/doorbell.tar.bz2
>
> The measured results are as follows:
>
> qemu-mmio: 11 iops, 9.09us rtt
> iofd-mmio: 200100 iops, 5.00us rtt
> iofd-pio:  367300 iops, 2.72us rtt
>
> I didn't measure qemu-pio, because I have to figure out how to register a
> PIO region, and I got lazy.  However, for now we can extrapolate based on
> the data from the NULLIO runs of +2.56us for MMIO, and -350ns for HC, we
> get:
>
> qemu-pio: 153139 iops, 6.53us rtt
> iofd-hc: 412585 iops, 2.37us rtt
>
> these are just for fun, for now, until I can gather more data.
>
> Here is a graph for your convenience:
>
> http://developer.novell.com/wiki/images/7/76/Iofd-chart.png
>
> The conclusion to draw is that we save about 4us by skipping the userspace
> hop.
>
> 
>
> Signed-off-by: Gregory Haskins 
> ---
>
>  include/linux/kvm.h  |   12 +
>  include/linux/kvm_host.h |2 +
>  virt/kvm/eventfd.c   |  107 
> ++
>  virt/kvm/kvm_main.c  |   13 ++
>  4 files changed, 134 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index dfc4bcc..99b6e45 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -292,6 +292,17 @@ struct kvm_guest_debug {
>   struct kvm_guest_debug_arch arch;
>  };
>  
> +#define KVM_IOFD_FLAG_DEASSIGN  (1 << 0)
> +#define KVM_IOFD_FLAG_PIO   (1 << 1)
> +
> +struct kvm_iofd {
> + __u64 addr;
> + __u32 len;
> + __u32 fd;
> + __u32 flags;
> + __u8  pad[12];
> +};
> +
>  #define KVM_TRC_SHIFT   16
>  /*
>   * kvm trace categories
> @@ -508,6 +519,7 @@ struct kvm_irqfd {
>  #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
>  #define KVM_ASSIGN_IRQFD   _IOW(KVMIO, 0x76, struct kvm_irqfd)
>  #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32)
> +#define KVM_IOFD   _IOW(KVMIO, 0x78, struct kvm_iofd)
>  
>  /*
>   * ioctls for vcpu fds
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 1acc528..d53cb70 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -529,5 +529,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) 
> {}
>  int kvm_assign_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
>  int kvm_deassign_irqfd(struct kvm *kvm, int fd);
>  void kvm_irqfd_release(struct kvm *kvm);
> +int kvm_iofd(struct kvm *kvm, unsigned long addr, size_t len,
> +  int fd, int flags);
>  
>  #endif
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 71afd62..8b23317 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -21,12 +21,16 @@
>   */
>  
> 

Re: [PATCH 1/1] KVM: Fix potentially recursively get kvm lock

2009-05-12 Thread Yang, Sheng
On Wednesday 13 May 2009 06:09:08 Marcelo Tosatti wrote:
> On Tue, May 12, 2009 at 03:36:27PM -0600, Alex Williamson wrote:
> > On Tue, 2009-05-12 at 16:44 -0300, Marcelo Tosatti wrote:
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index 4d00942..ba067db 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -250,7 +250,15 @@ static void deassign_host_irq(struct kvm *kvm,
> > >   disable_irq_nosync(assigned_dev->
> > >  host_msix_entries[i].vector);
> > >
> > > + /*
> > > +  * FIXME: kvm_assigned_dev_interrupt_work_handler can deadlock
> > > +  * with cancel_work_sync, since it requires kvm->lock for irq
> > > +  * injection. This is a hack, the irq code must use
> > > +  * a separate lock.
> > > +  */
> > > + mutex_unlock(&kvm->lock);
> > >   cancel_work_sync(&assigned_dev->interrupt_work);
> > > + mutex_lock(&kvm->lock);
> >
> > Seems to work, I assume you've got a similar unlock/lock for the
> > MSI/INTx block.  Thanks,
>
> KVM: workaround workqueue / deassign_host_irq deadlock
>
> I think I'm running into the following deadlock in the kvm kernel module
> when trying to use device assignment:
>
> CPU A   CPU B
> kvm_vm_ioctl_deassign_dev_irq()
>   mutex_lock(&kvm->lock);   worker_thread()
>   -> kvm_deassign_irq()   ->
> kvm_assigned_dev_interrupt_work_handler()
> -> deassign_host_irq()  mutex_lock(&kvm->lock);
>   -> cancel_work_sync() [blocked]
>
> Workaround the issue by dropping kvm->lock for cancel_work_sync().
>
> Reported-by: Alex Williamson 
> From: Sheng Yang 
> Signed-off-by: Marcelo Tosatti 

Another calling path(kvm_free_all_assigned_devices()) don't hold kvm->lock... 
Seems it need the lock for travel assigned dev list?

-- 
regards
Yang, Sheng

> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 4d00942..d4af719 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -250,7 +250,15 @@ static void deassign_host_irq(struct kvm *kvm,
>   disable_irq_nosync(assigned_dev->
>  host_msix_entries[i].vector);
>
> + /*
> +  * FIXME: kvm_assigned_dev_interrupt_work_handler can deadlock
> +  * with cancel_work_sync, since it requires kvm->lock for irq
> +  * injection. This is a hack, the irq code must use
> +  * a separate lock. Same below for MSI.
> +  */
> + mutex_unlock(&kvm->lock);
>   cancel_work_sync(&assigned_dev->interrupt_work);
> + mutex_lock(&kvm->lock);
>
>   for (i = 0; i < assigned_dev->entries_nr; i++)
>   free_irq(assigned_dev->host_msix_entries[i].vector,
> @@ -263,7 +271,9 @@ static void deassign_host_irq(struct kvm *kvm,
>   } else {
>   /* Deal with MSI and INTx */
>   disable_irq_nosync(assigned_dev->host_irq);
> + mutex_unlock(&kvm->lock);
>   cancel_work_sync(&assigned_dev->interrupt_work);
> + mutex_lock(&kvm->lock);
>
>   free_irq(assigned_dev->host_irq, (void *)assigned_dev);


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH, RFC] virtio_blk: add cache flush command

2009-05-12 Thread Rusty Russell
On Tue, 12 May 2009 11:48:36 pm Christian Borntraeger wrote:
> Am Tuesday 12 May 2009 15:54:14 schrieb Rusty Russell:
> > On Mon, 11 May 2009 06:09:08 pm Christoph Hellwig wrote:
> > > Do we need a new feature flag for this command or can we expect that
> > > all previous barrier support was buggy enough anyway?
> >
> > You mean reuse the VIRTIO_BLK_F_BARRIER for this as well?  Seems fine.
>
> It is also used by kuli
> (http://www.ibm.com/developerworks/linux/linux390/kuli.html) and kuli used
> fdatasync.

OK, that's sufficient for me.  It's not like block is going to catch up with 
net 
for feature flags any time soon.

Christoph, please make a new feature flag.

Oh and FYI Christian, you might not have seen this patch:

lguest: barrier me harder

Impact: barrier correctness in example launcher

I doubt either lguest user will complain about performance.

Reported-by: Christoph Hellwig 
Cc: Jens Axboe 
Signed-off-by: Rusty Russell 
---
 Documentation/lguest/lguest.c |7 +++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1630,6 +1630,13 @@ static bool service_io(struct device *de
}
}
 
+   /* OK, so we noted that it was pretty poor to use an fdatasync as a
+* barrier.  But Christoph Hellwig points out that we need a sync
+* *afterwards* as well: "Barriers specify no reordering to the front
+* or the back."  And Jens Axboe confirmed it, so here we are: */
+   if (out->type & VIRTIO_BLK_T_BARRIER)
+   fdatasync(vblk->fd);
+
/* We can't trigger an IRQ, because we're not the Launcher.  It does
 * that when we tell it we're done. */
add_used(dev->vq, head, wlen);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] virtio: find_vqs/del_vqs virtio operations

2009-05-12 Thread Rusty Russell
On Wed, 13 May 2009 01:03:30 am Michael S. Tsirkin wrote:
> On Wed, May 13, 2009 at 12:00:02AM +0930, Rusty Russell wrote
> > and perhaps consider
> > varargs for the callbacks (or would that be too horrible at the
> > implementation end?)
> >
> > Thanks,
> > Rusty.
>
> Ugh ... I think it will be. And AFAIK gcc generates a lot of code
> for varargs - not something we want to do in each interrupt handler.

Err, no I mean for find_vqs:  eg.
(block device)
err = vdev->config->find_vqs(vdev, 1, &vblk->vq, blk_done);

(net device)
err = vdev->config->find_vqs(vdev, 3, vqs, skb_recv_done, 
skb_xmit_done, NULL);

A bit neater for for the single-queue case.

Cheers,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: XP smp using a lot of CPU

2009-05-12 Thread Elias Probst
Hi,

I'm facing the same problem here.

A Windows XP (SP3 + all current Updates) which uses constantly ~95% CPU on 
each core.

I've already tried:
- Using only 1 core
- Replacing the HAL in Windows
- Using another KVM version
- Using different timer options for KVM

I'm currently on KVM-85 where the problem still exists.

Regards, Elias P.


On Wednesday 13 May 2009 02:41:38 Ross Boylan wrote:
> I just installed XP into a new VM, specifying -smp 2 for the machine.
> According to top, it's using nearly 200% of a cpu even when I'm not
> doing anything.
>
> Is this real CPU useage, or just a reporting problem (just as my disk
> image is big according to ls, but isn't really)?
>
> If it's real, is there anything I can do about it?
>
> kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64.  Xeon chips; 32
> bit version of XP pro installed, now fully patched (including the
> Windows Genuine Advantage stuff, though I cancelled it when it wanted to
> run).
>
> Task manager in XP shows virtually no CPU useage.
>
> Please cc me on responses.
>
> Thanks for any assistance.



signature.asc
Description: This is a digitally signed message part.


Best choice for copy/clone/snapshot

2009-05-12 Thread Ross Boylan
First, I have a feeling this might be a question I could ask on a qemu
list.  Is there a way for me to tell which questions should go where?
Is it OK to ask here?

As I install software onto a system I want to preserve its state--just
the disk state---at various points so I can go back.  What is the best
way to do this?

First, I think I could just make a copy of the virtual disk, although I
haven't seen this suggested anywhere.  I assume this will work if the VM
is off; are there other circumstances in which it is safe?  Since my
original virtual disk file isn't really occupying its nominal space, I
assume this will be true of the copy too.

Second, kvm-img could create a copy on write image.  There are several
things I don't understand about this.  Suppose I go
kvm-img -b A.img  B.img

If I then go on and use A.img as I did before, changing what is on disk,
have I screwed up B.img?

Do A.img or B.img have to be qcow2 format?  I created a raw image for
portability.

Suppose I work for awhile installing new stuff on B.img, and then want
to preserve the state.  Is
kvm-img -b B.img C.img
sensible, or is this kind of recursive operation (B.img is already the
copy on write version of A.img) not OK?

Does ‘commit [-f fmt] filename’, documented as
Commit the changes recorded in filename in its base image.
mean commit the recorded changes TO its base image?

Here are some other things I think I don't want to do.  Please let me
know if I'm mistaken.

-snapshot on the kvm command line: nothing persistent comes of this
(maybe if you commit you update the original image, but you don't get
2).

snapshot in the monitor: this snapshots the non-disk state of the VM;
further, that state is not guaranteed to work if you later change what
is on the disk.  I think kvm-img snapshot also accesses these
facilities.

Yours in confusion :)
Ross

P.S. Please cc me.
-- 
Ross Boylan  wk:  (415) 514-8146
185 Berry St #5700   r...@biostat.ucsf.edu
Dept of Epidemiology and Biostatistics   fax: (415) 514-8150
University of California, San Francisco
San Francisco, CA 94107-1739 hm:  (415) 550-1062

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


XP smp using a lot of CPU

2009-05-12 Thread Ross Boylan
I just installed XP into a new VM, specifying -smp 2 for the machine.
According to top, it's using nearly 200% of a cpu even when I'm not
doing anything.

Is this real CPU useage, or just a reporting problem (just as my disk
image is big according to ls, but isn't really)?

If it's real, is there anything I can do about it?

kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64.  Xeon chips; 32
bit version of XP pro installed, now fully patched (including the
Windows Genuine Advantage stuff, though I cancelled it when it wanted to
run).  

Task manager in XP shows virtually no CPU useage.

Please cc me on responses.

Thanks for any assistance.
-- 
Ross Boylan  wk:  (415) 514-8146
185 Berry St #5700   r...@biostat.ucsf.edu
Dept of Epidemiology and Biostatistics   fax: (415) 514-8150
University of California, San Francisco
San Francisco, CA 94107-1739 hm:  (415) 550-1062

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH] bios: Use the correct mask to size the PCI option ROM BAR

2009-05-12 Thread Alex Williamson
On Tue, 2009-05-12 at 23:41 +0100, Paul Brook wrote:
> On Tuesday 12 May 2009, Alex Williamson wrote:
> > Bit 0 is the enable bit, which we not only don't want to set, but
> > it will stick and make us think it's an I/O port resource.
> 
> Why is the ROM slot special? Doesn't the same apply to all BARs?

The PCI option (or expansion) ROM is assumed to be in MMIO space, so bit
0 becomes the enable bit rather than the memory space flag.  The option
ROM is also only a 4 byte register.  The regular 6 base address
registers can support MMIO or I/O port addresses and for PCI 2.0 (iirc),
2 regular base address registers can be combined to describe an 8 byte
address.  You can look at drivers/pci/probe.c:__pci_read_base() in the
Linux source code and note that it makes the same special case for
sizing the ROM BAR (~PCI_ROM_ADDRESS_ENABLE vs ~0).

Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-12 Thread malc
On Wed, 13 May 2009, Sebastian Herbszt wrote:

> Jan Kiszka wrote:
> > Sebastian Herbszt wrote:
> > > With the modified vgabios (see below) it does beep on bochs on vista,
> > > but not on qemu.
> > 
> > Works fine for me!
> > 
> > Did you check twice that your modified vgabios is actually loaded by
> > qemu (e.g. via strace)?
> 
> Yes, it uses the right rom.
> 
> > Moreover, does sound work at all with your qemu?
> > The image I tried [1] issues two beeps after loading (obviously via
> > direct hw access) - a good way to check general support. Note that one
> > reason for broken host sound with qemu can be OSS. For that reason I
> > always configure my qemu with --audio-drv-list=alsa.
> 
> Thats a good hint :)
> Seems i used to compile qemu without "--audio-drv-list". Since "dsound" and
> "fmod" drivers don't compile here (i likely miss some libs in my mingw), i
> used "sdl".

Don't do that. Here's a nice tutorial Kazu made that will probably help 
you: http://www.h7.dion.ne.jp/~qemu-win/Audio-en.html

> Now i can hear those two beeps with the image you suggested. Tho those are
> coming
> thru my sound card and not the hosts pc speaker (even with "-soundhw pcspk",
> but maybe
> that option means something different).

And it will always come through your soundcard. pcspk is not a passthrough
thing.

> With INT 10h AH=0Eh i now can hear a beep too, but it doesn't stop and qemu
> somewhat freezes.

Huh?

> > Jan
> > 
> > PS: Your patch was mangled by your mail client. Fortunately, it was
> > small enough for manual fixing.
> 
> Unfortunatelly Windows Mail tends to do this and i failed to find an option to
> fix it.
> 
> - Sebastian
> 
> 
> 

-- 
mailto:av1...@comtv.ru
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Enable dirty logging for all regions during migration

2009-05-12 Thread Glauber Costa
From: Glauber de Oliveira Costa 

In current calculations, we are not activating dirty logging
for all regions, leading migration to fail. This problem was
already raised by Yaniv Kamay a while ago. The proposed
solution at the time (not merged), was a calculation to convert
from target_phys_addr_t to ram_addr_t, which the dirty logging code
expects.

Avi noticed that enabling dirty logging for the region 0 -> -1ULL
would do the trick. As I hit the problem, I can confirm it does.

This patch, therefore, goes with this simpler approach. Before
this patch, migration fails. With this patch, simple migration
tests succeds.

Signed-off-by: Glauber de Oliveira Costa 
---
 qemu-kvm.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index f55cee8..b5d4313 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1244,7 +1244,7 @@ int kvm_update_dirty_pages_log(void)
 int r = 0;
 
 
-r = kvm_get_dirty_pages_range(kvm_context, 0, phys_ram_size,
+r = kvm_get_dirty_pages_range(kvm_context, 0, -1ULL,
   kvm_dirty_bitmap, NULL,
   kvm_get_dirty_bitmap_cb);
 return r;
-- 
1.6.2.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH] bios: Use the correct mask to size the PCI option ROM BAR

2009-05-12 Thread Paul Brook
On Tuesday 12 May 2009, Alex Williamson wrote:
> Bit 0 is the enable bit, which we not only don't want to set, but
> it will stick and make us think it's an I/O port resource.

Why is the ROM slot special? Doesn't the same apply to all BARs?

Paul
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] bios: Use the correct mask to size the PCI option ROM BAR

2009-05-12 Thread Alex Williamson
Bit 0 is the enable bit, which we not only don't want to set, but
it will stick and make us think it's an I/O port resource.

Signed-off-by: Alex Williamson 
---

diff --git a/bios/rombios32.c b/bios/rombios32.c
index d7e18e9..f861f81 100644
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -985,11 +985,13 @@ static void pci_bios_init_device(PCIDevice *d)
 int ofs;
 uint32_t val, size ;
 
-if (i == PCI_ROM_SLOT)
+if (i == PCI_ROM_SLOT) {
 ofs = 0x30;
-else
+pci_config_writel(d, ofs, 0xfffe);
+} else {
 ofs = 0x10 + i * 4;
-pci_config_writel(d, ofs, 0x);
+pci_config_writel(d, ofs, 0x);
+}
 val = pci_config_readl(d, ofs);
 if (val != 0) {
 size = (~(val & ~0xf)) + 1;


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-12 Thread Samuel Thibault
Sebastian Herbszt, le Wed 13 May 2009 00:21:25 +0200, a écrit :
> Tho those are coming thru my sound card and not the hosts pc speaker
> (even with "-soundhw pcspk", but maybe that option means something
> different).

-soundhw pcspk means to emulate the PC speaker for the guest, it doesn't
have to do with how it is rendered by the host.

> >PS: Your patch was mangled by your mail client. Fortunately, it was
> >small enough for manual fixing.
> 
> Unfortunatelly Windows Mail tends to do this and i failed to find an option 
> to fix it.

Then change your mail client :)

Samuel
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-12 Thread Sebastian Herbszt

Jan Kiszka wrote:

Sebastian Herbszt wrote:

With the modified vgabios (see below) it does beep on bochs on vista,
but not on qemu.


Works fine for me!

Did you check twice that your modified vgabios is actually loaded by
qemu (e.g. via strace)?


Yes, it uses the right rom.


Moreover, does sound work at all with your qemu?
The image I tried [1] issues two beeps after loading (obviously via
direct hw access) - a good way to check general support. Note that one
reason for broken host sound with qemu can be OSS. For that reason I
always configure my qemu with --audio-drv-list=alsa.


Thats a good hint :)
Seems i used to compile qemu without "--audio-drv-list". Since "dsound" and
"fmod" drivers don't compile here (i likely miss some libs in my mingw), i used 
"sdl".
Now i can hear those two beeps with the image you suggested. Tho those are 
coming
thru my sound card and not the hosts pc speaker (even with "-soundhw pcspk", 
but maybe
that option means something different).
With INT 10h AH=0Eh i now can hear a beep too, but it doesn't stop and qemu 
somewhat freezes.


Jan

PS: Your patch was mangled by your mail client. Fortunately, it was
small enough for manual fixing.


Unfortunatelly Windows Mail tends to do this and i failed to find an option to 
fix it.

- Sebastian

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: just a dump

2009-05-12 Thread Hans de Bruin

Hans de Bruin wrote:

Staring to vms simultaneously end in crash

linux 30-rc5
kvm-qemu kvm-85-378-g143eb2b
proc AMD dualcore

vm's like:

#!/bin/sh
n=10
cdrom=/iso/server2008x64.iso
drive=file=/kvm/disks/vm$n
mem=1024
cpu=qemu64
vga=std
mac=52:54:00:12:34:$n
bridge=br1

qemu-system-x86_64 -cdrom $cdrom -drive $drive -m $mem -cpu $cpu -vga 
$vga -net nic,macaddr=$mac -net tap,script=/etc/qemu/$bridge




another dmesg:

device tap0 entered promiscuous mode
br1: topology change detected, propagating
br1: port 1(tap0) entering forwarding state
device tap1 entered promiscuous mode
br1: topology change detected, propagating
br1: port 2(tap1) entering forwarding state
tap0: no IPv6 routers present
tap1: no IPv6 routers present
kvm: 2915: cpu0 unimplemented perfctr wrmsr: 0xc001 data 0x0
kvm: 2915: cpu0 unimplemented perfctr wrmsr: 0xc0010001 data 0x0
kvm: 2915: cpu0 unimplemented perfctr wrmsr: 0xc0010002 data 0x0
kvm: 2915: cpu0 unimplemented perfctr wrmsr: 0xc0010003 data 0x0
kvm: 2914: cpu0 unimplemented perfctr wrmsr: 0xc001 data 0x0
kvm: 2914: cpu0 unimplemented perfctr wrmsr: 0xc0010001 data 0x0
kvm: 2914: cpu0 unimplemented perfctr wrmsr: 0xc0010002 data 0x0
kvm: 2914: cpu0 unimplemented perfctr wrmsr: 0xc0010003 data 0x0
rmap_remove: 880100de5500 8 0->BUG
[ cut here ]
kernel BUG at arch/x86/kvm/mmu.c:576!
invalid opcode:  [#1] SMP
last sysfs file: /sys/devices/pci:00/:00:10.0/:01:09.0/resource
CPU 1
Modules linked in:
Pid: 2925, comm: qemu-system-x86 Not tainted 2.6.30-rc5 #3 System 
Product Name

RIP: 0010:[]  [] rmap_remove+0x151/0x200
RSP: 0018:8801a0d379f8  EFLAGS: 00010292
RAX: 002a RBX: 0008 RCX: 809a3b40
RDX: 88002804d000 RSI: 0046 RDI: 809a3a34
RBP: 8801a0d37a28 R08: 8777 R09: 
R10:  R11:  R12: 
R13: 880100de5500 R14: 880101e23580 R15: 8801a0e1c000
FS:  4270d950(0063) GS:88002804d000() knlGS:07faa000
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 014a8c18 CR3: 0001a0c62000 CR4: 06e0
DR0:  DR1:  DR2: 
DR3:  DR6: 0ff0 DR7: 0400
Process qemu-system-x86 (pid: 2925, threadinfo 8801a0d36000, task 
8801af3605a0)

Stack:
 8801a0d37a28   
 0500 880101e23580 8801a0d37ac8 8021ad8d
  8801 0003020d 0016e772
Call Trace:
 [] paging64_sync_page+0x9d/0x1a0
 [] ? rmap_write_protect+0xd5/0x150
 [] kvm_sync_page+0x6b/0x90
 [] mmu_sync_children+0xcd/0x120
 [] ? x86_emulate_insn+0x292/0x4d30
 [] ? x86_decode_insn+0x412/0xf10
 [] mmu_sync_roots+0xc2/0xd0
 [] kvm_mmu_load+0x138/0x200
 [] ? handle_exit+0x14a/0x2c0
 [] kvm_arch_vcpu_ioctl_run+0x863/0xaa0
 [] ? kvm_vm_ioctl+0x165/0x910
 [] ? do_futex+0x679/0x9a0
 [] kvm_vcpu_ioctl+0x5d3/0x790
 [] ? common_interrupt+0xe/0x13
 [] ? __dequeue_entity+0x2b/0x50
 [] vfs_ioctl+0x31/0x90
 [] do_vfs_ioctl+0x2f1/0x4e0
 [] sys_ioctl+0x82/0xa0
 [] system_call_fastpath+0x16/0x1b
Code: 04 75 e7 48 8b 47 20 49 89 fb 48 85 c0 0f 84 b7 00 00 00 48 89 c7 
eb d0 49 8b 55 00 4c 89 ee 48 c7 c7 b8 2e 7f 80 e8 1f 29

04 00 <0f> 0b eb fe 48 8b 4f 18 48 85 c9 0f 94 c2 83 fe 02 0f 9e c0 84
RIP  [] rmap_remove+0x151/0x200
 RSP 
---[ end trace c11385df745a1fea ]---
BUG: unable to handle kernel NULL pointer dereference at 0058
IP: [] mmu_page_remove_parent_pte+0xc/0x100
PGD 1a0ca8067 PUD 1a0ca9067 PMD 0
Oops:  [#2] SMP
last sysfs file: /sys/devices/pci:00/:00:10.0/:01:09.0/resource
CPU 0
Modules linked in:
Pid: 2926, comm: qemu-system-x86 Tainted: G  D2.6.30-rc5 #3 
System Product Name
RIP: 0010:[]  [] 
mmu_page_remove_parent_pte+0xc/0x100

RSP: 0018:8801a0da57a8  EFLAGS: 00010292
RAX:  RBX:  RCX: 002b
RDX: e200 RSI: 8800ccac0220 RDI: 
RBP: 8801a0da57b8 R08: 006a R09: 8800ccd85e70
R10:  R11:  R12: 8800ccac0220
R13: 8800ccd85dc0 R14: 0044 R15: 8801a0db
FS:  40fbc950(0063) GS:880028034000() knlGS:07fd5000
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 0058 CR3: 0001a0c63000 CR4: 06e0
DR0:  DR1:  DR2: 
DR3:  DR6: 0ff0 DR7: 0400
Process qemu-system-x86 (pid: 2926, threadinfo 8801a0da4000, task 
8801ae971c20)

Stack:
 8800ccd85590 007a 8801a0da5948 80217323
 8801a0da5808 0056 8800ccd85dc0 e200
 8801030f8160 0003 880103f87000 01b8
Call Trace:
 [] kvm_mmu_zap_page+0x153/0x3a0
 [] ? mark_page_dirty+0x

Re: [PATCH 1/1] KVM: Fix potentially recursively get kvm lock

2009-05-12 Thread Alex Williamson
On Tue, 2009-05-12 at 19:09 -0300, Marcelo Tosatti wrote:
> KVM: workaround workqueue / deassign_host_irq deadlock
> 
> I think I'm running into the following deadlock in the kvm kernel module
> when trying to use device assignment:
> 
> CPU A   CPU B
> kvm_vm_ioctl_deassign_dev_irq()
>   mutex_lock(&kvm->lock);   worker_thread()
>   -> kvm_deassign_irq()   ->
> kvm_assigned_dev_interrupt_work_handler()
> -> deassign_host_irq()  mutex_lock(&kvm->lock);
>   -> cancel_work_sync() [blocked]
> 
> Workaround the issue by dropping kvm->lock for cancel_work_sync().
> 
> Reported-by: Alex Williamson 
> From: Sheng Yang 
> Signed-off-by: Marcelo Tosatti 

Perfect, thanks.

Acked-by: Alex Williamson 

> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 4d00942..d4af719 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -250,7 +250,15 @@ static void deassign_host_irq(struct kvm *kvm,
>   disable_irq_nosync(assigned_dev->
>  host_msix_entries[i].vector);
>  
> + /*
> +  * FIXME: kvm_assigned_dev_interrupt_work_handler can deadlock
> +  * with cancel_work_sync, since it requires kvm->lock for irq
> +  * injection. This is a hack, the irq code must use
> +  * a separate lock. Same below for MSI.
> +  */
> + mutex_unlock(&kvm->lock);
>   cancel_work_sync(&assigned_dev->interrupt_work);
> + mutex_lock(&kvm->lock);
>  
>   for (i = 0; i < assigned_dev->entries_nr; i++)
>   free_irq(assigned_dev->host_msix_entries[i].vector,
> @@ -263,7 +271,9 @@ static void deassign_host_irq(struct kvm *kvm,
>   } else {
>   /* Deal with MSI and INTx */
>   disable_irq_nosync(assigned_dev->host_irq);
> + mutex_unlock(&kvm->lock);
>   cancel_work_sync(&assigned_dev->interrupt_work);
> + mutex_lock(&kvm->lock);
>  
>   free_irq(assigned_dev->host_irq, (void *)assigned_dev);
>  


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v7.2] kvm: add iofd support

2009-05-12 Thread Gregory Haskins
[ updated with figures, graphs, performance-test-harness info ]

iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd
signal when written to by a guest.  Userspace can register any arbitrary
address with a corresponding eventfd and then pass the eventfd to a specific
end-point of interest for handling.

Normal IO requires a blocking round-trip since the operation may cause
side-effects in the emulated model or may return data to the caller.
Therefore, an IO in KVM traps from the guest to the host, causes a VMX/SVM
"heavy-weight" exit back to userspace, and is ultimately serviced by qemu's
device model synchronously before returning control back to the vcpu.

However, there is a subclass of IO which acts purely as a trigger for
other IO (such as to kick off an out-of-band DMA request, etc).  For these
patterns, the synchronous call is particularly expensive since we really
only want to simply get our notification transmitted asychronously and
return as quickly as possible.  All the sychronous infrastructure to ensure
proper data-dependencies are met in the normal IO case are just unecessary
overhead for signalling.  This adds additional computational load on the
system, as well as latency to the signalling path.

Therefore, we provide a mechanism for registration of an in-kernel trigger
point that allows the VCPU to only require a very brief, lightweight
exit just long enough to signal an eventfd.  This also means that any
clients compatible with the eventfd interface (which includes userspace
and kernelspace equally well) can now register to be notified. The end
result should be a more flexible and higher performance notification API
for the backend KVM hypervisor and perhipheral components.

To test this theory, we built a test-harness called "doorbell".  This
module has a function called "doorbell_ring()" which simply increments a
counter for each time the doorbell is signaled.  It supports signalling
from either an eventfd, or an ioctl().

We then wired up two paths to the doorbell: One via QEMU via a registered
io region and through the doorbell ioctl().  The other is direct via iofd.

You can download this test harness here:

ftp://ftp.novell.com/dev/ghaskins/doorbell.tar.bz2

The measured results are as follows:

qemu-mmio: 11 iops, 9.09us rtt
iofd-mmio: 200100 iops, 5.00us rtt
iofd-pio:  367300 iops, 2.72us rtt

I didn't measure qemu-pio, because I have to figure out how to register a
PIO region, and I got lazy.  However, for now we can extrapolate based on
the data from the NULLIO runs of +2.56us for MMIO, and -350ns for HC, we
get:

qemu-pio: 153139 iops, 6.53us rtt
iofd-hc: 412585 iops, 2.37us rtt

these are just for fun, for now, until I can gather more data.

Here is a graph for your convenience:

http://developer.novell.com/wiki/images/7/76/Iofd-chart.png

The conclusion to draw is that we save about 4us by skipping the userspace
hop.



Signed-off-by: Gregory Haskins 
---

 include/linux/kvm.h  |   12 +
 include/linux/kvm_host.h |2 +
 virt/kvm/eventfd.c   |  107 ++
 virt/kvm/kvm_main.c  |   13 ++
 4 files changed, 134 insertions(+), 0 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index dfc4bcc..99b6e45 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -292,6 +292,17 @@ struct kvm_guest_debug {
struct kvm_guest_debug_arch arch;
 };
 
+#define KVM_IOFD_FLAG_DEASSIGN  (1 << 0)
+#define KVM_IOFD_FLAG_PIO   (1 << 1)
+
+struct kvm_iofd {
+   __u64 addr;
+   __u32 len;
+   __u32 fd;
+   __u32 flags;
+   __u8  pad[12];
+};
+
 #define KVM_TRC_SHIFT   16
 /*
  * kvm trace categories
@@ -508,6 +519,7 @@ struct kvm_irqfd {
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
 #define KVM_ASSIGN_IRQFD   _IOW(KVMIO, 0x76, struct kvm_irqfd)
 #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32)
+#define KVM_IOFD   _IOW(KVMIO, 0x78, struct kvm_iofd)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1acc528..d53cb70 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -529,5 +529,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 int kvm_assign_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
 int kvm_deassign_irqfd(struct kvm *kvm, int fd);
 void kvm_irqfd_release(struct kvm *kvm);
+int kvm_iofd(struct kvm *kvm, unsigned long addr, size_t len,
+int fd, int flags);
 
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 71afd62..8b23317 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -21,12 +21,16 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
+
+#include "iodev.h"
 
 /*
  * 
@@ -185,3 +189,106 @@ kvm_irqfd_release(struct 

[PATCH] kvm: device-assignment: Fix kvm_get_irq_route_gsi() return check

2009-05-12 Thread Alex Williamson
Cast to a signed int to test for < 0.

Signed-off-by: Alex Williamson 
---

 This supersedes "[PATCH] kvm: device-assignment: Catch GSI overflow"

 hw/device-assignment.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index a6cc9b9..5bdae24 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -798,7 +798,7 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, 
unsigned int ctrl_pos)
 pci_dev->cap.start + PCI_MSI_DATA_32);
 assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
 assigned_dev->entry->gsi = kvm_get_irq_route_gsi(kvm_context);
-if (assigned_dev->entry->gsi < 0) {
+if ((int)(assigned_dev->entry->gsi) < 0) {
 perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
 return;
 }

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] KVM: Fix potentially recursively get kvm lock

2009-05-12 Thread Marcelo Tosatti
On Tue, May 12, 2009 at 03:36:27PM -0600, Alex Williamson wrote:
> On Tue, 2009-05-12 at 16:44 -0300, Marcelo Tosatti wrote:
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 4d00942..ba067db 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -250,7 +250,15 @@ static void deassign_host_irq(struct kvm *kvm,
> > disable_irq_nosync(assigned_dev->
> >host_msix_entries[i].vector);
> >  
> > +   /*
> > +* FIXME: kvm_assigned_dev_interrupt_work_handler can deadlock
> > +* with cancel_work_sync, since it requires kvm->lock for irq
> > +* injection. This is a hack, the irq code must use
> > +* a separate lock.
> > +*/
> > +   mutex_unlock(&kvm->lock);
> > cancel_work_sync(&assigned_dev->interrupt_work);
> > +   mutex_lock(&kvm->lock);
> 
> Seems to work, I assume you've got a similar unlock/lock for the
> MSI/INTx block.  Thanks,

KVM: workaround workqueue / deassign_host_irq deadlock

I think I'm running into the following deadlock in the kvm kernel module
when trying to use device assignment:

CPU A   CPU B
kvm_vm_ioctl_deassign_dev_irq()
  mutex_lock(&kvm->lock);   worker_thread()
  -> kvm_deassign_irq()   ->
kvm_assigned_dev_interrupt_work_handler()
-> deassign_host_irq()  mutex_lock(&kvm->lock);
  -> cancel_work_sync() [blocked]

Workaround the issue by dropping kvm->lock for cancel_work_sync().

Reported-by: Alex Williamson 
From: Sheng Yang 
Signed-off-by: Marcelo Tosatti 


diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4d00942..d4af719 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -250,7 +250,15 @@ static void deassign_host_irq(struct kvm *kvm,
disable_irq_nosync(assigned_dev->
   host_msix_entries[i].vector);
 
+   /*
+* FIXME: kvm_assigned_dev_interrupt_work_handler can deadlock
+* with cancel_work_sync, since it requires kvm->lock for irq
+* injection. This is a hack, the irq code must use
+* a separate lock. Same below for MSI.
+*/
+   mutex_unlock(&kvm->lock);
cancel_work_sync(&assigned_dev->interrupt_work);
+   mutex_lock(&kvm->lock);
 
for (i = 0; i < assigned_dev->entries_nr; i++)
free_irq(assigned_dev->host_msix_entries[i].vector,
@@ -263,7 +271,9 @@ static void deassign_host_irq(struct kvm *kvm,
} else {
/* Deal with MSI and INTx */
disable_irq_nosync(assigned_dev->host_irq);
+   mutex_unlock(&kvm->lock);
cancel_work_sync(&assigned_dev->interrupt_work);
+   mutex_lock(&kvm->lock);
 
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4d00942..d4af719 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -250,7 +250,15 @@ static void deassign_host_irq(struct kvm *kvm,
disable_irq_nosync(assigned_dev->
   host_msix_entries[i].vector);
 
+   /*
+* FIXME: kvm_assigned_dev_interrupt_work_handler can deadlock
+* with cancel_work_sync, since it requires kvm->lock for irq
+* injection. This is a hack, the irq code must use
+* a separate lock. Same below for MSI.
+*/
+   mutex_unlock(&kvm->lock);
cancel_work_sync(&assigned_dev->interrupt_work);
+   mutex_lock(&kvm->lock);
 
for (i = 0; i < assigned_dev->entries_nr; i++)
free_irq(assigned_dev->host_msix_entries[i].vector,
@@ -263,7 +271,9 @@ static void deassign_host_irq(struct kvm *kvm,
} else {
/* Deal with MSI and INTx */
disable_irq_nosync(assigned_dev->host_irq);
+   mutex_unlock(&kvm->lock);
cancel_work_sync(&assigned_dev->interrupt_work);
+   mutex_lock(&kvm->lock);
 
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 


[patch 0/3] locking fixes / cr3 validation v3

2009-05-12 Thread mtosatti
Addressing comments.



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 3/3] KVM: x86: check for cr3 validity in mmu_alloc_roots

2009-05-12 Thread mtosatti
Verify the cr3 address stored in vcpu->arch.cr3 points to an existant
memslot. If not, inject a triple fault.

Signed-off-by: Marcelo Tosatti 

Index: kvm/arch/x86/kvm/mmu.c
===
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -1912,7 +1912,19 @@ static void mmu_free_roots(struct kvm_vc
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 }
 
-static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
+static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
+{
+   int ret = 0;
+
+   if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) {
+   set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+   ret = 1;
+   }
+
+   return ret;
+}
+
+static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 {
int i;
gfn_t root_gfn;
@@ -1927,13 +1939,15 @@ static void mmu_alloc_roots(struct kvm_v
ASSERT(!VALID_PAGE(root));
if (tdp_enabled)
direct = 1;
+   if (mmu_check_root(vcpu, root_gfn))
+   return 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
  PT64_ROOT_LEVEL, direct,
  ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->arch.mmu.root_hpa = root;
-   return;
+   return 0;
}
direct = !is_paging(vcpu);
if (tdp_enabled)
@@ -1950,6 +1964,8 @@ static void mmu_alloc_roots(struct kvm_v
root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT;
} else if (vcpu->arch.mmu.root_level == 0)
root_gfn = 0;
+   if (mmu_check_root(vcpu, root_gfn))
+   return 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
  PT32_ROOT_LEVEL, direct,
  ACC_ALL, NULL);
@@ -1958,6 +1974,7 @@ static void mmu_alloc_roots(struct kvm_v
vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
}
vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+   return 0;
 }
 
 static void mmu_sync_roots(struct kvm_vcpu *vcpu)
@@ -1976,7 +1993,7 @@ static void mmu_sync_roots(struct kvm_vc
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
 
-   if (root) {
+   if (root && VALID_PAGE(root)) {
root &= PT64_BASE_ADDR_MASK;
sp = page_header(root);
mmu_sync_children(vcpu, sp);
@@ -2311,9 +2328,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
goto out;
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
-   mmu_alloc_roots(vcpu);
+   r = mmu_alloc_roots(vcpu);
mmu_sync_roots(vcpu);
spin_unlock(&vcpu->kvm->mmu_lock);
+   if (r)
+   goto out;
kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
kvm_mmu_flush_tlb(vcpu);
 out:
Index: kvm/arch/x86/kvm/x86.c
===
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -4554,6 +4554,7 @@ int kvm_arch_set_memory_region(struct kv
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
kvm_mmu_zap_all(kvm);
+   kvm_reload_remote_mmus(kvm);
 }
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 1/3] KVM: MMU: protect kvm_mmu_change_mmu_pages with mmu_lock

2009-05-12 Thread mtosatti
kvm_handle_hva, called by MMU notifiers, manipulates mmu data only with
the protection of mmu_lock.

Update kvm_mmu_change_mmu_pages callers to take mmu_lock, thus protecting
against kvm_handle_hva.

Signed-off-by: Marcelo Tosatti 

Index: kvm-pending/arch/x86/kvm/mmu.c
===
--- kvm-pending.orig/arch/x86/kvm/mmu.c
+++ kvm-pending/arch/x86/kvm/mmu.c
@@ -2723,7 +2723,6 @@ void kvm_mmu_slot_remove_write_access(st
 {
struct kvm_mmu_page *sp;
 
-   spin_lock(&kvm->mmu_lock);
list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
int i;
u64 *pt;
@@ -2738,7 +2737,6 @@ void kvm_mmu_slot_remove_write_access(st
pt[i] &= ~PT_WRITABLE_MASK;
}
kvm_flush_remote_tlbs(kvm);
-   spin_unlock(&kvm->mmu_lock);
 }
 
 void kvm_mmu_zap_all(struct kvm *kvm)
Index: kvm-pending/arch/x86/kvm/x86.c
===
--- kvm-pending.orig/arch/x86/kvm/x86.c
+++ kvm-pending/arch/x86/kvm/x86.c
@@ -1607,10 +1607,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages
return -EINVAL;
 
down_write(&kvm->slots_lock);
+   spin_lock(&kvm->mmu_lock);
 
kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
+   spin_unlock(&kvm->mmu_lock);
up_write(&kvm->slots_lock);
return 0;
 }
@@ -1786,7 +1788,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
 
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
+   spin_lock(&kvm->mmu_lock);
kvm_mmu_slot_remove_write_access(kvm, log->slot);
+   spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
memslot = &kvm->memslots[log->slot];
n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
@@ -4530,12 +4534,14 @@ int kvm_arch_set_memory_region(struct kv
}
}
 
+   spin_lock(&kvm->mmu_lock);
if (!kvm->arch.n_requested_mmu_pages) {
unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
}
 
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
+   spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
 
return 0;


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 2/3] KVM: take mmu_lock when updating a deleted slot

2009-05-12 Thread mtosatti
kvm_handle_hva relies on mmu_lock protection to safely access 
the memslot structures.

Signed-off-by: Marcelo Tosatti 

Index: kvm-pending/virt/kvm/kvm_main.c
===
--- kvm-pending.orig/virt/kvm/kvm_main.c
+++ kvm-pending/virt/kvm/kvm_main.c
@@ -1199,8 +1199,10 @@ int __kvm_set_memory_region(struct kvm *
 
kvm_free_physmem_slot(&old, npages ? &new : NULL);
/* Slot deletion case: we have to update the current slot */
+   spin_lock(&kvm->mmu_lock);
if (!npages)
*memslot = old;
+   spin_unlock(&kvm->mmu_lock);
 #ifdef CONFIG_DMAR
/* map the pages in iommu page table */
r = kvm_iommu_map_pages(kvm, base_gfn, npages);


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Alex Williamson
We're currently using a counter to track the most recent GSI we've
handed out.  This quickly hits KVM_MAX_IRQ_ROUTES when using device
assignment with a driver that regularly toggles the MSI enable bit.
This can mean only a few minutes of usable run time.  Instead, track
used GSIs in a bitmap.

Signed-off-by: Alex Williamson 
---

 v2: Added mutex to protect gsi bitmap
 v3: Updated for comments from Michael Tsirkin
 No longer depends on "[PATCH] kvm: device-assignment: Catch GSI overflow"

 hw/device-assignment.c  |4 ++
 kvm/libkvm/kvm-common.h |4 ++
 kvm/libkvm/libkvm.c |   83 +--
 kvm/libkvm/libkvm.h |   10 ++
 4 files changed, 88 insertions(+), 13 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index a7365c8..a6cc9b9 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -561,8 +561,10 @@ static void free_dev_irq_entries(AssignedDevice *dev)
 {
 int i;
 
-for (i = 0; i < dev->irq_entries_nr; i++)
+for (i = 0; i < dev->irq_entries_nr; i++) {
 kvm_del_routing_entry(kvm_context, &dev->entry[i]);
+kvm_free_irq_route_gsi(kvm_context, dev->entry[i].gsi);
+}
 free(dev->entry);
 dev->entry = NULL;
 dev->irq_entries_nr = 0;
diff --git a/kvm/libkvm/kvm-common.h b/kvm/libkvm/kvm-common.h
index 591fb53..4b3cb51 100644
--- a/kvm/libkvm/kvm-common.h
+++ b/kvm/libkvm/kvm-common.h
@@ -66,8 +66,10 @@ struct kvm_context {
 #ifdef KVM_CAP_IRQ_ROUTING
struct kvm_irq_routing *irq_routes;
int nr_allocated_irq_routes;
+   void *used_gsi_bitmap;
+   int max_gsi;
+   pthread_mutex_t gsi_mutex;
 #endif
-   int max_used_gsi;
 };
 
 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index ba0a5d1..3d7ab75 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "libkvm.h"
 
 #if defined(__x86_64__) || defined(__i386__)
@@ -65,6 +66,8 @@
 int kvm_abi = EXPECTED_KVM_API_VERSION;
 int kvm_page_size;
 
+static inline void set_bit(uint32_t *buf, unsigned int bit);
+
 struct slot_info {
unsigned long phys_addr;
unsigned long len;
@@ -286,6 +289,9 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
int fd;
kvm_context_t kvm;
int r;
+#ifdef KVM_CAP_IRQ_ROUTING
+   int gsi_count, gsi_bytes, i;
+#endif
 
fd = open("/dev/kvm", O_RDWR);
if (fd == -1) {
@@ -322,6 +328,27 @@ kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
kvm->dirty_pages_log_all = 0;
kvm->no_irqchip_creation = 0;
kvm->no_pit_creation = 0;
+#ifdef KVM_CAP_IRQ_ROUTING
+   pthread_mutex_init(&kvm->gsi_mutex, NULL);
+
+   gsi_count = kvm_get_gsi_count(kvm);
+   /* Round up so we can search ints using ffs */
+   gsi_bytes = (gsi_count + 31) / 32;
+   kvm->used_gsi_bitmap = malloc(gsi_bytes);
+   if (!kvm->used_gsi_bitmap) {
+   pthread_mutex_unlock(&kvm->gsi_mutex);
+   goto out_close;
+   }
+   memset(kvm->used_gsi_bitmap, 0, gsi_bytes);
+   kvm->max_gsi = gsi_bytes * 8;
+
+   /* Mark all the IOAPIC pin GSIs and any over-allocated
+* GSIs as already in use. */
+   for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
+   set_bit(kvm->used_gsi_bitmap, i);
+   for (i = gsi_count; i < kvm->max_gsi; i++)
+   set_bit(kvm->used_gsi_bitmap, i);
+#endif
 
return kvm;
  out_close:
@@ -1298,8 +1325,6 @@ int kvm_add_routing_entry(kvm_context_t kvm,
new->flags = entry->flags;
new->u = entry->u;
 
-   if (entry->gsi > kvm->max_used_gsi)
-   kvm->max_used_gsi = entry->gsi;
return 0;
 #else
return -ENOSYS;
@@ -1404,18 +1429,54 @@ int kvm_commit_irq_routes(kvm_context_t kvm)
 #endif
 }
 
+#ifdef KVM_CAP_IRQ_ROUTING
+static inline void set_bit(uint32_t *buf, unsigned int bit)
+{
+   buf[bit / 32] |= 1U << (bit % 32);
+}
+
+static inline void clear_bit(uint32_t *buf, unsigned int bit)
+{
+   buf[bit / 32] &= ~(1U << (bit % 32));
+}
+
+static int kvm_find_free_gsi(kvm_context_t kvm)
+{
+   int i, bit, gsi;
+   uint32_t *buf = kvm->used_gsi_bitmap;
+
+   for (i = 0; i < kvm->max_gsi / 32; i++) {
+   bit = ffs(~buf[i]);
+   if (!bit)
+   continue;
+
+   gsi = bit - 1 + i * 32;
+   set_bit(buf, gsi);
+   return gsi;
+   }
+
+   return -ENOSPC;
+}
+#endif
+
 int kvm_get_irq_route_gsi(kvm_context_t kvm)
 {
+   int gsi = -ENOSYS;
+
 #ifdef KVM_CAP_IRQ_ROUTING
-   if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
-   if (kvm->max_used_gsi <= kvm_get_gsi_count(kvm))
-return kvm->max_used_gsi + 1;
-else
-return -ENOSPC;
-} else
-return KVM_IOAPIC_NUM_PINS;
-

Re: [PATCH -tip v5 0/7] tracing: kprobe-based event tracer and x86 instruction decoder

2009-05-12 Thread Masami Hiramatsu
Ingo Molnar wrote:
> * Masami Hiramatsu  wrote:
> 
>> Hi,
>>
>> Here are the patches of kprobe-based event tracer for x86, version 
>> 5, which allows you to probe various kernel events through ftrace 
>> interface.
>>
>> This version supports only x86(-32/-64) (but porting it on other 
>> arch just needs kprobes/kretprobes and register and stack access 
>> APIs).
>>
>> This patchset also includes x86(-64) instruction decoder which 
>> supports non-SSE/FP opcodes and includes x86 opcode map. I think 
>> it will be possible to share this opcode map with KVM's decoder.
>>
>> This series can be applied on the latest linux-2.6-tip tree.
>>
>> This patchset includes following changes:
>> - Add x86 instruction decoder [1/7]
>> - Check insertion point safety in kprobe [2/7]
>> - Cleanup fix_riprel() with insn decoder [3/7]
>> - Add kprobe-tracer plugin [4/7]
>> - Fix kernel_trap_sp() on x86 according to systemtap runtime. [5/7]
>> - Add arch-dep register and stack fetching functions [6/7]
>> - Support fetching various status (register/stack/memory/etc.) [7/7]
>>
>> Future items:
>> - .init function tracing support.
>> - Support primitive types(long, ulong, int, uint, etc) for args.
> 
> Ok, this looks pretty complete already.
> 
> Two high-level comments:
> 
>  - There's no self-test - would it be possible to add one? See 
>trace_selftest* in kernel/trace/
> 
>  - No generic integration.

Hmm, Ingo, could you tell me what I can do for the integration?
Would you means that I should use filters?

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: mhira...@redhat.com

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] qemu-kvm: Make PC speaker emulation aware of in-kernel PIT

2009-05-12 Thread Jan Kiszka
When using the in-kernel PIT the speaker emulation has to synchronize
the PIT state with KVM. Enhance the existing speaker sound device and
allow it to take over port 0x61 by using KVM_CREATE_PIT2 where
available. This unbreaks -soundhw pcspk in KVM mode.

Changes in v3:
 - re-added incorrectly dropped kvm_enabled checks

Changes in v2:
 - rebased over qemu-kvm and KVM_CREATE_PIT2
 - refactored hooks in pcspk

Signed-off-by: Jan Kiszka 
---

 hw/pcspk.c |   41 
 kvm/kernel/include/linux/kvm.h |   10 ++
 kvm/libkvm/libkvm-x86.c|   26 ++---
 3 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/hw/pcspk.c b/hw/pcspk.c
index ec1d0c6..fea3863 100644
--- a/hw/pcspk.c
+++ b/hw/pcspk.c
@@ -27,6 +27,8 @@
 #include "isa.h"
 #include "audio/audio.h"
 #include "qemu-timer.h"
+#include "i8254.h"
+#include "qemu-kvm.h"
 
 #define PCSPK_BUF_LEN 1792
 #define PCSPK_SAMPLE_RATE 32000
@@ -48,6 +50,37 @@ typedef struct {
 static const char *s_spk = "pcspk";
 static PCSpkState pcspk_state;
 
+#ifdef USE_KVM_PIT
+static void kvm_get_pit_ch2(PITState *pit)
+{
+struct kvm_pit_state pit_state;
+
+if (kvm_enabled() && qemu_kvm_pit_in_kernel()) {
+kvm_get_pit(kvm_context, &pit_state);
+pit->channels[2].mode = pit_state.channels[2].mode;
+pit->channels[2].count = pit_state.channels[2].count;
+pit->channels[2].count_load_time = 
pit_state.channels[2].count_load_time;
+pit->channels[2].gate = pit_state.channels[2].gate;
+}
+}
+
+static void kvm_set_pit_ch2(PITState *pit)
+{
+struct kvm_pit_state pit_state;
+
+if (kvm_enabled() && qemu_kvm_pit_in_kernel()) {
+pit_state.channels[2].mode = pit->channels[2].mode;
+pit_state.channels[2].count = pit->channels[2].count;
+pit_state.channels[2].count_load_time = 
pit->channels[2].count_load_time;
+pit_state.channels[2].gate = pit->channels[2].gate;
+kvm_set_pit(kvm_context, &pit_state);
+}
+}
+#else
+static inline void kvm_get_pit_ch2(PITState *pit) { }
+static inline void kvm_set_pit_ch2(PITState *pit) { }
+#endif
+
 static inline void generate_samples(PCSpkState *s)
 {
 unsigned int i;
@@ -72,6 +105,8 @@ static void pcspk_callback(void *opaque, int free)
 PCSpkState *s = opaque;
 unsigned int n;
 
+kvm_get_pit_ch2(s->pit);
+
 if (pit_get_mode(s->pit, 2) != 3)
 return;
 
@@ -121,6 +156,8 @@ static uint32_t pcspk_ioport_read(void *opaque, uint32_t 
addr)
 PCSpkState *s = opaque;
 int out;
 
+kvm_get_pit_ch2(s->pit);
+
 s->dummy_refresh_clock ^= (1 << 4);
 out = pit_get_out(s->pit, 2, qemu_get_clock(vm_clock)) << 5;
 
@@ -132,6 +169,8 @@ static void pcspk_ioport_write(void *opaque, uint32_t addr, 
uint32_t val)
 PCSpkState *s = opaque;
 const int gate = val & 1;
 
+kvm_get_pit_ch2(s->pit);
+
 s->data_on = (val >> 1) & 1;
 pit_set_gate(s->pit, 2, gate);
 if (s->voice) {
@@ -139,6 +178,8 @@ static void pcspk_ioport_write(void *opaque, uint32_t addr, 
uint32_t val)
 s->play_pos = 0;
 AUD_set_active_out(s->voice, gate & s->data_on);
 }
+
+kvm_set_pit_ch2(s->pit);
 }
 
 void pcspk_init(PITState *pit)
diff --git a/kvm/kernel/include/linux/kvm.h b/kvm/kernel/include/linux/kvm.h
index f5e9d66..9ed4892 100644
--- a/kvm/kernel/include/linux/kvm.h
+++ b/kvm/kernel/include/linux/kvm.h
@@ -110,6 +110,14 @@ struct kvm_irqchip {
} chip;
 };
 
+/* for KVM_CREATE_PIT2 */
+struct kvm_pit_config {
+   __u32 flags;
+   __u32 pad;
+};
+
+#define KVM_PIT_SPEAKER_DUMMY 1
+
 #define KVM_EXIT_UNKNOWN  0
 #define KVM_EXIT_EXCEPTION1
 #define KVM_EXIT_IO   2
@@ -455,6 +463,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#define KVM_CAP_PIT2 31
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -538,6 +547,7 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_SET_MSIX_ENTRY \
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
+#define KVM_CREATE_PIT2   _IOW(KVMIO, 0x76, struct 
kvm_pit_config)
 
 /*
  * ioctls for vcpu fds
diff --git a/kvm/libkvm/libkvm-x86.c b/kvm/libkvm/libkvm-x86.c
index a2f6320..9c0d967 100644
--- a/kvm/libkvm/libkvm-x86.c
+++ b/kvm/libkvm/libkvm-x86.c
@@ -59,16 +59,26 @@ static int kvm_create_pit(kvm_context_t kvm)
 
kvm->pit_in_kernel = 0;
if (!kvm->no_pit_creation) {
-   r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_PIT);
-   if (r > 0) {
+#ifdef KVM_CAP_PIT2
+   struct kvm_pit_config config = { .flags = 0 };
+
+   r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_PIT2);
+   if (r > 0)
+   r = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &config);
+ 

Re: [PATCH v2] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Alex Williamson
On Tue, 2009-05-12 at 22:39 +0300, Michael S. Tsirkin wrote:
> On Fri, May 08, 2009 at 04:31:21PM -0600, Alex Williamson wrote:
> > +#ifdef KVM_CAP_IRQ_ROUTING
> 
> We don't need these anymore.

Doesn't that depend on the kernel it's built against?  In any case, I
think it would deserve it's own patch to remove those throughout.
Removing them here would make their usage inconsistent.

> > +static inline void set_bit(unsigned int *buf, int bit)
> > +{
> > +   buf[bit >> 5] |= (1U << (bit & 0x1f));
> > +}
> 
> external () not needed here. bit >> 5 might be clearer as bit / 32 IMO.

Fixed

> > +
> > +static inline void clear_bit(unsigned int *buf, int bit)
> > +{
> > +   buf[bit >> 5] &= ~(1U << (bit & 0x1f));
> > +}
> 
> Make bit unsigned. And then bit & 0x1f can be written as bit % 32.
> IMO that's easier to parse.

Fixed

> > +
> > +static int kvm_find_free_gsi(kvm_context_t kvm)
> > +{
> > +   int i, bit, gsi;
> > +   unsigned int *buf = kvm->used_gsi_bitmap;
> > +
> > +   for (i = 0; i < (kvm->max_gsi >> 5); i++) {
> 
> may be clearer as kvm->max_gsi / 32

Fixed

> > +   if (buf[i] != ~0U)
> > +   break;
> > +   }
> 
> {} around single statement isn't needed

This is different in my new version, so the {} makes sense.

> > +
> > +   if (i == kvm->max_gsi >> 5)
> > +   return -ENOSPC;
> 
> May be clearer as kvm->max_gsi / 32
> By the way, this math means we can't use all gsi's
> if the number is not a multiple of 32.
> Round up instead? It's not hard: (kvm->max_gsi + 31) / 32

Fixed

> > +
> > +   bit = ffs(~buf[i]);
> > +   if (!bit)
> > +   return -EAGAIN;
> 
> We know it won't be 0, right?
> Instead of checking twice, move the ffs call within the loop above?
> E.g. like this:
>   for (i = 0; i < kvm->max_gsi / 32; ++i)
>   if ((bit = ffs(~buf[i])) {
>   gsi = bit - 1 + i * 32;
>   set_bit(buf, gsi);
>   return gsi;
>   }
> 
>   return -ENOSPC;

Nice, I updated to something similar.

> > +
> > +   gsi = (bit - 1) | (i << 5);
> 
> clearer as bit - 1 + i * 32

Yup, fixed.

> > +   set_bit(buf, gsi);
> > +   return gsi;
> > +}
> > +#endif
> > +
> >  int kvm_get_irq_route_gsi(kvm_context_t kvm)
> >  {
> >  #ifdef KVM_CAP_IRQ_ROUTING
> > -   if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
> > -   if (kvm->max_used_gsi + 1 < kvm_get_gsi_count(kvm))
> > -return kvm->max_used_gsi + 1;
> > -else
> > -return -ENOSPC;
> > -} else
> > -return KVM_IOAPIC_NUM_PINS;
> > +   int gsi;
> > +
> > +   pthread_mutex_lock(&kvm->gsi_mutex);
> > +
> > +   if (!kvm->max_gsi) {
> 
> Why is this lazy allocation required?
> Let's do the below in some init function,
> and keep code simple?

Moved to kvm_init()

> > +   int i;
> > +
> > +   /* Round the number of GSIs supported to a 4 byte
> > +* value so we can search it using ints and ffs */
> > +   i = kvm_get_gsi_count(kvm) & ~0x1f;
> 
> Should not we round up? Why not?
> Again, we can count = kvm_get_gsi_count(kvm) / 32, and use count * 4 below.

Yep, rounded up, and pre-marked the excess bits as used.

> > +   kvm->used_gsi_bitmap = malloc(i >> 3);
> 
> why not qemu_malloc?

qemu_malloc isn't available in libkvm.c.  malloc is consistent with the
rest of the file.

> > +   if (!kvm->used_gsi_bitmap) {
> > +   pthread_mutex_unlock(&kvm->gsi_mutex);
> > +   return -ENOMEM;
> > +   }
> > +   memset(kvm->used_gsi_bitmap, 0, i >> 3);
> > +   kvm->max_gsi = i;
> > +
> > +   /* Mark all the IOAPIC pin GSIs as already used */
> > +   for (i = 0; i <= KVM_IOAPIC_NUM_PINS; i++)
> 
> Is this really <=? Not http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-12 Thread Jan Kiszka
Sebastian Herbszt wrote:
> Jan Kiszka wrote:
>>> I have implemented the beep for vgabios-6b and it works on bochs but
>>> fails on qemu.
>>> With "-soundhw pcspk" i should hear it thru the hosts pc speaker, right?
>>
>> Yep, that's how it should work. Mind to share your code and tell us
>> which test case you used?
> 
> FreeDOS floppy with "debug" command and assembly code:
> mov ax,0e07
> xor bx,bx
> int 10
> 
> With the modified vgabios (see below) it does beep on bochs on vista,
> but not on qemu.

Works fine for me!

Did you check twice that your modified vgabios is actually loaded by
qemu (e.g. via strace)? Moreover, does sound work at all with your qemu?
The image I tried [1] issues two beeps after loading (obviously via
direct hw access) - a good way to check general support. Note that one
reason for broken host sound with qemu can be OSS. For that reason I
always configure my qemu with --audio-drv-list=alsa.

Jan

PS: Your patch was mangled by your mail client. Fortunately, it was
small enough for manual fixing.

[1]http://www.ibiblio.org/pub/micro/pc-stuff/freedos/files/distributions/unofficial/balder/balder10.img



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 1/1] KVM: Fix potentially recursively get kvm lock

2009-05-12 Thread Alex Williamson
On Tue, 2009-05-12 at 16:44 -0300, Marcelo Tosatti wrote:
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 4d00942..ba067db 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -250,7 +250,15 @@ static void deassign_host_irq(struct kvm *kvm,
>   disable_irq_nosync(assigned_dev->
>  host_msix_entries[i].vector);
>  
> + /*
> +  * FIXME: kvm_assigned_dev_interrupt_work_handler can deadlock
> +  * with cancel_work_sync, since it requires kvm->lock for irq
> +  * injection. This is a hack, the irq code must use
> +  * a separate lock.
> +  */
> + mutex_unlock(&kvm->lock);
>   cancel_work_sync(&assigned_dev->interrupt_work);
> + mutex_lock(&kvm->lock);

Seems to work, I assume you've got a similar unlock/lock for the
MSI/INTx block.  Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] msi-x: let drivers retry when not enough vectors

2009-05-12 Thread Michael S. Tsirkin
On Fri, May 08, 2009 at 09:25:00AM +0930, Rusty Russell wrote:
> On Thu, 7 May 2009 07:49:53 pm Sheng Yang wrote:
> > On Thursday 07 May 2009 17:53:02 Matthew Wilcox wrote:
> > > Here's a good example.  Let's suppose you have a driver which supports
> > > two different models of cards, one has 16 MSI-X interrupts, the other
> > > has 10.  You can call pci_enable_msix() asking for 16 vectors.  If your
> > > card is model A, you get 16 interrupts.  If your card is model B, it says
> > > "you can have 10".
> 
> Sheng is absolutely right, that's a horrid API.
> 
> If it actually enabled that number and returned it, it might make sense (cf. 
> write() returning less bytes than you give it).  But overloading the return 
> value to save an explicit call is just ugly; it's not worth saving a few 
> lines 
> of code at cost of making all the drivers subtle and tricksy.
> 
> Fail with -ENOSPC or something.
> 
> Rusty.

I do agree that returning a positive value from pci_enable_msix
it an ugly API (but note that this is the API that linux currently has).

Here's a wrapper that I ended up with in my driver:

static int enable_msix(struct pci_dev *dev, struct msix_entry *entries,
   int *options, int noptions)
{
int i;
for (i = 0; i < noptions; ++i)
if (!pci_enable_msix(dev, entries, options[i]))
return options[i];
return -EBUSY;
}

This gets an array of options for # of vectors and tries them one after
the other until an option that the system can support is found.
On success, we get the # of vectors actually enabled, and
driver can then use them as it sees fit.

Is there interest in moving something like this to pci.h?

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


just a dump

2009-05-12 Thread Hans de Bruin

Staring to vms simultaneously end in crash

linux 30-rc5
kvm-qemu kvm-85-378-g143eb2b
proc AMD dualcore

vm's like:

#!/bin/sh
n=10
cdrom=/iso/server2008x64.iso
drive=file=/kvm/disks/vm$n
mem=1024
cpu=qemu64
vga=std
mac=52:54:00:12:34:$n
bridge=br1

qemu-system-x86_64 -cdrom $cdrom -drive $drive -m $mem -cpu $cpu -vga 
$vga -net nic,macaddr=$mac -net tap,script=/etc/qemu/$bridge



dmesg:
device tap0 entered promiscuous mode
device tap1 entered promiscuous mode
br1: topology change detected, propagating
br1: port 1(tap0) entering forwarding state
br1: topology change detected, propagating
br1: port 2(tap1) entering forwarding state
tap1: no IPv6 routers present
tap0: no IPv6 routers present
kvm: 2935: cpu0 unimplemented perfctr wrmsr: 0xc001 data 0x0
kvm: 2935: cpu0 unimplemented perfctr wrmsr: 0xc0010001 data 0x0
kvm: 2935: cpu0 unimplemented perfctr wrmsr: 0xc0010002 data 0x0
kvm: 2935: cpu0 unimplemented perfctr wrmsr: 0xc0010003 data 0x0
BUG: unable to handle kernel NULL pointer dereference at (null)
device tap1 entered promiscuous mode
br1: topology change detected, propagating
br1: port 1(tap0) entering forwarding state
br1: topology change detected, propagating
br1: port 2(tap1) entering forwarding state
tap1: no IPv6 routers present
tap0: no IPv6 routers present
kvm: 2935: cpu0 unimplemented perfctr wrmsr: 0xc001 data 0x0
kvm: 2935: cpu0 unimplemented perfctr wrmsr: 0xc0010001 data 0x0
kvm: 2935: cpu0 unimplemented perfctr wrmsr: 0xc0010002 data 0x0
kvm: 2935: cpu0 unimplemented perfctr wrmsr: 0xc0010003 data 0x0
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [] gfn_to_rmap+0x22/0x60
PGD 1a0c4b067 PUD 1a0c4a067 PMD 0
Oops:  [#1] SMP
last sysfs file: /sys/devices/pci:00/:00:10.0/:01:09.0/resource
CPU 0
Modules linked in:
Pid: 2945, comm: qemu-system-x86 Not tainted 2.6.30-rc5 #3 System 
Product Name

RIP: 0010:[]  [] gfn_to_rmap+0x22/0x60
RSP: 0018:8801a0f979d8  EFLAGS: 00010246
RAX:  RBX:  RCX: 
RDX: 000e1000 RSI: af7dc78941006c5f RDI: 8801a0d9
RBP: 8801a0f979e8 R08: 0006 R09: 0006
R10:  R11:  R12: af7dc78941006c5f
R13: 8801908b1180 R14: 88019e778c60 R15: 8801a0d9
FS:  41591950(0063) GS:880028034000() knlGS:f800017ce500
CS:  0010 DS:  ES:  CR0: 80050033
CR2:  CR3: 0001a3d7c000 CR4: 06e0
DR0:  DR1:  DR2: 
DR3:  DR6: 0ff0 DR7: 0400
Process qemu-system-x86 (pid: 2945, threadinfo 8801a0f96000, task 
8801afb12d00)

Stack:
 0010  8801a0f97a28 80216fce
 8801a0f97a28 af7dc78941006c5f  
 0180 88019e778c60 8801a0f97ac8 8021ad8d
Call Trace:
 [] rmap_remove+0xae/0x200
 [] paging64_sync_page+0x9d/0x1a0
 [] ? gfn_to_memslot+0x1c/0x30
 [] ? gfn_to_rmap+0x1b/0x60
 [] ? rmap_write_protect+0xd5/0x150
 [] kvm_sync_page+0x6b/0x90
 [] mmu_sync_children+0xcd/0x120
 [] mmu_sync_roots+0xc2/0xd0
 [] kvm_mmu_load+0x138/0x200
 [] ? handle_exit+0x14a/0x2c0
 [] kvm_arch_vcpu_ioctl_run+0x863/0xaa0
 [] ? kvm_vm_ioctl+0x165/0x910
 [] ? do_futex+0x679/0x9a0
 [] kvm_vcpu_ioctl+0x5d3/0x790
 [] ? common_interrupt+0xe/0x13
 [] ? apic_timer_interrupt+0xe/0x20
 [] ? __dequeue_entity+0x2b/0x50
 [] vfs_ioctl+0x31/0x90
 [] do_vfs_ioctl+0x2f1/0x4e0
 [] ? sys_futex+0x84/0x130
 [] sys_ioctl+0x82/0xa0
 [] ? sys_lseek+0x7b/0x80
 [] system_call_fastpath+0x16/0x1b
Code: 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 ec 10 48 89 1c 24 89 
d3 4c 89 64 24 08 49 89 f4 e8 a5 36 ff ff 85 db 48 89 c1 75 1e <4c> 2b 
20 4a 8d 14 e5 00 00 00 00 48 03 50 18 48 8b 1c 24 4c 8b

RIP  [] gfn_to_rmap+0x22/0x60
 RSP 
CR2: 
---[ end trace 5b490dc3e478a040 ]---
kvm: 2934: cpu0 unimplemented perfctr wrmsr: 0xc001 data 0x0
kvm: 2934: cpu0 unimplemented perfctr wrmsr: 0xc0010001 data 0x0
kvm: 2934: cpu0 unimplemented perfctr wrmsr: 0xc0010002 data 0x0
kvm: 2934: cpu0 unimplemented perfctr wrmsr: 0xc0010003 data 0x0
CE: hpet increasing min_delta_ns to 15000 nsec

one vm was still running.

--
Hans
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-12 Thread Sebastian Herbszt

Jan Kiszka wrote:

I have implemented the beep for vgabios-6b and it works on bochs but
fails on qemu.
With "-soundhw pcspk" i should hear it thru the hosts pc speaker, right?


Yep, that's how it should work. Mind to share your code and tell us
which test case you used?


FreeDOS floppy with "debug" command and assembly code:
mov ax,0e07
xor bx,bx
int 10

With the modified vgabios (see below) it does beep on bochs on vista,
but not on qemu.

- Sebastian

--- vgabios.c.orig 2009-05-12 22:24:22.0 +0200
+++ vgabios.c 2009-05-12 22:24:32.0 +0200
@@ -80,6 +80,9 @@
static void memcpyb();
static void memcpyw();

+static void delay_ticks();
+static void beep();
+
static void biosfn_set_video_mode();
static void biosfn_set_cursor_shape();
static void biosfn_set_cursor_pos();
@@ -1965,7 +1968,7 @@
 switch(car)
  {
   case 7:
-//FIXME should beep
+beep();
break;

   case 8:
@@ -3765,6 +3768,57 @@
ASM_END
}

+  void
+delay_ticks(ticks)
+  Bit16u ticks;
+{
+ASM_START
+  pusha
+  push ds
+  xor dx,dx
+  mov ds,dx
+  mov cx,4[bp]
+  add cx,0x46c
+  adc dx,0x46e
+  sti
+delay_ticks_wait:
+  hlt
+  mov ax,0x46c
+  mov bx,0x46e
+  cmp dx,bx
+  jb delay_ticks_out
+  cmp cx,ax
+  ja delay_ticks_wait
+delay_ticks_out:
+  pop ds
+  popa
+ASM_END
+}
+
+void beep()
+{
+ASM_START
+  pusha
+  mov al,#0xb6
+  out #0x43,al
+  mov al,#0x33
+  out #0x42,al
+  mov al,#0x5
+  out #0x42,al
+  in al,#0x61
+  push ax
+  or al,#0x03
+  out #0x61,al
+  mov cx,#0x4
+  push cx
+  call _delay_ticks
+  pop cx
+  pop ax
+  out #0x61,al
+  popa
+ASM_END
+}
+
#ifdef DEBUG
void unimplemented()
{

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 1/1] kvm: expand on "help" info to specify kvm intel and amd module names

2009-05-12 Thread akpm
From: "Robert P. J. Day" 

Signed-off-by: Robert P. J. Day 
Cc: Avi Kivity 
Signed-off-by: Andrew Morton 
---

 arch/x86/kvm/Kconfig |6 ++
 1 file changed, 6 insertions(+)

diff -puN 
arch/x86/kvm/Kconfig~kvm-expand-on-help-info-to-specify-kvm-intel-and-amd-module-names
 arch/x86/kvm/Kconfig
--- 
a/arch/x86/kvm/Kconfig~kvm-expand-on-help-info-to-specify-kvm-intel-and-amd-module-names
+++ a/arch/x86/kvm/Kconfig
@@ -50,6 +50,9 @@ config KVM_INTEL
  Provides support for KVM on Intel processors equipped with the VT
  extensions.
 
+ To compile this as a module, choose M here: the module
+ will be called kvm-intel.
+
 config KVM_AMD
tristate "KVM for AMD processors support"
depends on KVM
@@ -57,6 +60,9 @@ config KVM_AMD
  Provides support for KVM on AMD processors equipped with the AMD-V
  (SVM) extensions.
 
+ To compile this as a module, choose M here: the module
+ will be called kvm-amd.
+
 config KVM_TRACE
bool "KVM trace support"
depends on KVM && SYSFS
_
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [ANNOUNCE] qemu-kvm-0.10.4

2009-05-12 Thread Farkas Levente
Avi Kivity wrote:
> qemu-kvm-0.10.4 is now available.  This is the first release of the 0.10
> stable branch of qemu-kvm.  The qemu-kvm 0.10.4 includes all of the
> features and fixes of qemu-0.10.4, plus adaptations for improved kvm
> support.
> 
> Note that qemu-kvm releases do not include the kvm external modules
> (kvm*.ko); you can use the modules provided by your distribution,
> modules from the development releases (kvm-xx), or from the kvm-kmod
> stable branch releases once they become available.
>
> As this is the first release of this branch there is no changelog;
> qemu-kvm-0.10.4 is roughly equivalent (but is not identical) to qemu
> from kvm-84.

this's the plan? ie. the stable userspace will be about kvm-84?
what's the plan for kvm-kmod release date and it's also be somewhere ~ 84?

-- 
  Levente   "Si vis pacem para bellum!"
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] Deal with shadow interrupts after emulated instructions

2009-05-12 Thread Glauber Costa
Same story, more avi's comments merged.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] deal with interrupt shadow state for emulated instruction

2009-05-12 Thread Glauber Costa
we currently unblock shadow interrupt state when we skip an instruction,
but failing to do so when we actually emulate one. This blocks interrupts
in key instruction blocks, in particular sti; hlt; sequences

If the instruction emulated is an sti, we have to block shadow interrupts.
The same goes for mov ss. pop ss also needs it, but we don't currently
emulate it.

Without this patch, I cannot boot gpxe option roms at vmx machines.
This is described at https://bugzilla.redhat.com/show_bug.cgi?id=494469

Signed-off-by: Glauber Costa 
CC: H. Peter Anvin 
CC: Avi Kivity 
CC: Gleb Natapov 
---
 arch/x86/include/asm/kvm_x86_emulate.h |3 +++
 arch/x86/kvm/x86.c |6 +-
 arch/x86/kvm/x86_emulate.c |   20 
 3 files changed, 28 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/kvm_x86_emulate.h 
b/arch/x86/include/asm/kvm_x86_emulate.h
index be40d6e..b7ed2c4 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -155,6 +155,9 @@ struct x86_emulate_ctxt {
int mode;
u32 cs_base;
 
+   /* interruptibility state, as a result of execution of STI or MOV SS */
+   int interruptibility;
+
/* decode cache */
struct decode_cache decode;
 };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3d8fcc5..b45baff 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2362,7 +2362,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
u16 error_code,
int emulation_type)
 {
-   int r;
+   int r, shadow_mask;
struct decode_cache *c;
 
kvm_clear_exception_queue(vcpu);
@@ -2416,6 +2416,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
}
 
r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
+   shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
+
+   if (r == 0)
+   kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
 
if (vcpu->arch.pio.string)
return EMULATE_DO_MMIO;
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d2664fc..c1b6c23 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1361,6 +1361,20 @@ static inline int writeback(struct x86_emulate_ctxt 
*ctxt,
return 0;
 }
 
+void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
+{
+   u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
+   /*
+* an sti; sti; sequence only disable interrupts for the first
+* instruction. So, if the last instruction, be it emulated or
+* not, left the system with the INT_STI flag enabled, it
+* means that the last instruction is an sti. We should not
+* leave the flag on in this case. The same goes for mov ss
+*/
+   if (!(int_shadow & mask))
+   ctxt->interruptibility = mask;
+}
+
 int
 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
@@ -1372,6 +1386,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
int io_dir_in;
int rc = 0;
 
+   ctxt->interruptibility = 0;
+
/* Shadow copy of register state. Committed on successful emulation.
 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
 * modify them.
@@ -1618,6 +1634,9 @@ special_insn:
int err;
 
sel = c->src.val;
+   if (c->modrm_reg == VCPU_SREG_SS)
+   toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
+
if (c->modrm_reg <= 5) {
type_bits = (c->modrm_reg == 1) ? 9 : 1;
err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
@@ -1847,6 +1866,7 @@ special_insn:
c->dst.type = OP_NONE;  /* Disable writeback. */
break;
case 0xfb: /* sti */
+   toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
ctxt->eflags |= X86_EFLAGS_IF;
c->dst.type = OP_NONE;  /* Disable writeback. */
break;
-- 
1.5.6.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] replace drop_interrupt_shadow by set_interrupt_shadow

2009-05-12 Thread Glauber Costa
This patch replaces drop_interrupt_shadow with the more
general set_interrupt_shadow, that can either drop or raise
it, depending on its parameter.

Signed-off-by: Glauber Costa 
CC: H. Peter Anvin 
CC: Avi Kivity 
CC: Gleb Natapov 
---
 arch/x86/include/asm/kvm_host.h|3 +-
 arch/x86/include/asm/kvm_x86_emulate.h |3 ++
 arch/x86/kvm/svm.c |   32 +++-
 arch/x86/kvm/vmx.c |   49 +--
 arch/x86/kvm/x86.c |2 +-
 5 files changed, 63 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8e680c3..3d933cf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -510,6 +510,8 @@ struct kvm_x86_ops {
void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+   void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
+   u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr);
void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
@@ -521,7 +523,6 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-   void (*drop_interrupt_shadow)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h 
b/arch/x86/include/asm/kvm_x86_emulate.h
index 6a15973..be40d6e 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -143,6 +143,9 @@ struct decode_cache {
struct fetch_cache fetch;
 };
 
+#define X86_SHADOW_INT_MOV_SS  1
+#define X86_SHADOW_INT_STI 2
+
 struct x86_emulate_ctxt {
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ef43a18..f4d1bd9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -202,6 +202,27 @@ static int is_external_interrupt(u32 info)
return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
 }
 
+static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+   u32 ret = 0;
+
+   if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
+   ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS;
+   return ret & mask;
+}
+
+static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (mask == 0)
+   svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+   else
+   svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
+
+}
+
 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -215,7 +236,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
   __func__, kvm_rip_read(vcpu), svm->next_rip);
 
kvm_rip_write(vcpu, svm->next_rip);
-   svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+   svm_set_interrupt_shadow(vcpu, 0);
 }
 
 static int has_svm(void)
@@ -2229,12 +2250,6 @@ static void pre_svm_run(struct vcpu_svm *svm)
new_asid(svm, svm_data);
 }
 
-static void svm_drop_interrupt_shadow(struct kvm_vcpu *vcpu)
-{
-   struct vcpu_svm *svm = to_svm(vcpu);
-   svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
-}
-
 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -2637,6 +2652,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
+   .set_interrupt_shadow = svm_set_interrupt_shadow,
+   .get_interrupt_shadow = svm_get_interrupt_shadow,
.patch_hypercall = svm_patch_hypercall,
.set_irq = svm_set_irq,
.set_nmi = svm_inject_nmi,
@@ -2646,7 +2663,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
-   .drop_interrupt_shadow = svm_drop_interrupt_shadow,
 
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e8a5649..f3ab27b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -736,23 +736,45 @@ static void vmx_set_rflags(struct kvm_v

Re: [PATCH 1/1] KVM: Fix potentially recursively get kvm lock

2009-05-12 Thread Marcelo Tosatti
On Tue, May 12, 2009 at 11:30:21AM -0300, Marcelo Tosatti wrote:
> On Tue, May 12, 2009 at 10:13:36PM +0800, Yang, Sheng wrote:
> > > > +   mutex_unlock(&kvm->lock);
> > >
> > > assigned_dev list is protected by kvm->lock. So you could have another
> > > ioctl adding to it at the same time you're searching.
> > 
> > Oh, yes... My fault... 
> > 
> > > Could either have a separate kvm->assigned_devs_lock, to protect
> > > kvm->arch.assigned_dev_head (users are ioctls that manipulate it), or
> > > change the IRQ injection to use a separate spinlock, kill the workqueue
> > > and call kvm_set_irq from the assigned device interrupt handler.
> > 
> > Peferred the latter, though needs more work. But the only reason for put a 
> > workqueue here is because kvm->lock is a mutex? I can't believe... If so, I 
> > think we had made a big mistake - we have to fix all kinds of racy problem 
> > caused by this, but finally find it's unnecessary... 
> 
> One issue is that kvm_set_irq can take too long while interrupts are
> blocked, and you'd have to disable interrupts in other contexes that
> inject interrupts (say qemu->ioctl(SET_INTERRUPT)->...->), so all i can
> see is a tradeoff.
> 
> 
> 
> But the interrupt injection path seems to be pretty short and efficient
> to happen in host interrupt context.
> 
> 
> 
> Avi, Gleb?
> 
> > Maybe another reason is kvm_kick_vcpu(), but have already fixed by you.
> 
> Note you tested the spinlock_irq patch with GigE and there was no
> significant performance regression right?
> 
> > 
> > Continue to check the code...

OK, it might take some time for bigger changes to happen. I've changed
your patch to drop the lock only around cancel_work_sync. Can deadlock
if someone else tries to mess with the assigned device at the same time,
but the VM won't go away under it because of the vmfd reference.

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4d00942..ba067db 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -250,7 +250,15 @@ static void deassign_host_irq(struct kvm *kvm,
disable_irq_nosync(assigned_dev->
   host_msix_entries[i].vector);
 
+   /*
+* FIXME: kvm_assigned_dev_interrupt_work_handler can deadlock
+* with cancel_work_sync, since it requires kvm->lock for irq
+* injection. This is a hack, the irq code must use
+* a separate lock.
+*/
+   mutex_unlock(&kvm->lock);
cancel_work_sync(&assigned_dev->interrupt_work);
+   mutex_lock(&kvm->lock);
 
for (i = 0; i < assigned_dev->entries_nr; i++)
free_irq(assigned_dev->host_msix_entries[i].vector,
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Michael S. Tsirkin

On Fri, May 08, 2009 at 04:31:21PM -0600, Alex Williamson wrote:
> +#ifdef KVM_CAP_IRQ_ROUTING

We don't need these anymore.

> +static inline void set_bit(unsigned int *buf, int bit)
> +{
> + buf[bit >> 5] |= (1U << (bit & 0x1f));
> +}

external () not needed here. bit >> 5 might be clearer as bit / 32 IMO.

> +
> +static inline void clear_bit(unsigned int *buf, int bit)
> +{
> + buf[bit >> 5] &= ~(1U << (bit & 0x1f));
> +}

Make bit unsigned. And then bit & 0x1f can be written as bit % 32.
IMO that's easier to parse.

> +
> +static int kvm_find_free_gsi(kvm_context_t kvm)
> +{
> + int i, bit, gsi;
> + unsigned int *buf = kvm->used_gsi_bitmap;
> +
> + for (i = 0; i < (kvm->max_gsi >> 5); i++) {

may be clearer as kvm->max_gsi / 32

> + if (buf[i] != ~0U)
> + break;
> + }

{} around single statement isn't needed

> +
> + if (i == kvm->max_gsi >> 5)
> + return -ENOSPC;

May be clearer as kvm->max_gsi / 32
By the way, this math means we can't use all gsi's
if the number is not a multiple of 32.
Round up instead? It's not hard: (kvm->max_gsi + 31) / 32

> +
> + bit = ffs(~buf[i]);
> + if (!bit)
> + return -EAGAIN;

We know it won't be 0, right?
Instead of checking twice, move the ffs call within the loop above?
E.g. like this:
for (i = 0; i < kvm->max_gsi / 32; ++i)
if ((bit = ffs(~buf[i])) {
gsi = bit - 1 + i * 32;
set_bit(buf, gsi);
return gsi;
}

return -ENOSPC;

> +
> + gsi = (bit - 1) | (i << 5);

clearer as bit - 1 + i * 32

> + set_bit(buf, gsi);
> + return gsi;
> +}
> +#endif
> +
>  int kvm_get_irq_route_gsi(kvm_context_t kvm)
>  {
>  #ifdef KVM_CAP_IRQ_ROUTING
> - if (kvm->max_used_gsi >= KVM_IOAPIC_NUM_PINS)  {
> - if (kvm->max_used_gsi + 1 < kvm_get_gsi_count(kvm))
> -return kvm->max_used_gsi + 1;
> -else
> -return -ENOSPC;
> -} else
> -return KVM_IOAPIC_NUM_PINS;
> + int gsi;
> +
> + pthread_mutex_lock(&kvm->gsi_mutex);
> +
> + if (!kvm->max_gsi) {

Why is this lazy allocation required?
Let's do the below in some init function,
and keep code simple?

> + int i;
> +
> + /* Round the number of GSIs supported to a 4 byte
> +  * value so we can search it using ints and ffs */
> + i = kvm_get_gsi_count(kvm) & ~0x1f;

Should not we round up? Why not?
Again, we can count = kvm_get_gsi_count(kvm) / 32, and use count * 4 below.

> + kvm->used_gsi_bitmap = malloc(i >> 3);

why not qemu_malloc?

> + if (!kvm->used_gsi_bitmap) {
> + pthread_mutex_unlock(&kvm->gsi_mutex);
> + return -ENOMEM;
> + }
> + memset(kvm->used_gsi_bitmap, 0, i >> 3);
> + kvm->max_gsi = i;
> +
> + /* Mark all the IOAPIC pin GSIs as already used */
> + for (i = 0; i <= KVM_IOAPIC_NUM_PINS; i++)

Is this really <=? Not  + set_bit(kvm->used_gsi_bitmap, i);
> + }
> +
> + gsi = kvm_find_free_gsi(kvm);
> + pthread_mutex_unlock(&kvm->gsi_mutex);
> + return gsi;
>  #else
>   return -ENOSYS;
>  #endif
>  }
> + 
> +void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi)
> +{
> +#ifdef KVM_CAP_IRQ_ROUTING
> + pthread_mutex_lock(&kvm->gsi_mutex);
> + clear_bit(kvm->used_gsi_bitmap, gsi);
> + pthread_mutex_unlock(&kvm->gsi_mutex);
> +#endif
> +}
>  
>  #ifdef KVM_CAP_DEVICE_MSIX
>  int kvm_assign_set_msix_nr(kvm_context_t kvm,
> diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
> index c23d37b..4e9344c 100644
> --- a/kvm/libkvm/libkvm.h
> +++ b/kvm/libkvm/libkvm.h
> @@ -856,6 +856,16 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
>   */
>  int kvm_get_irq_route_gsi(kvm_context_t kvm);
>  
> +/*!
> + * \brief Free used GSI number
> + *
> + * Free used GSI number acquired from kvm_get_irq_route_gsi()
> + *
> + * \param kvm Pointer to the current kvm_context
> + * \param gsi GSI number to free
> + */
> +void kvm_free_irq_route_gsi(kvm_context_t kvm, int gsi);
> +
>  #ifdef KVM_CAP_DEVICE_MSIX
>  int kvm_assign_set_msix_nr(kvm_context_t kvm,
>  struct kvm_assigned_msix_nr *msix_nr);
> 

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v7.1] kvm: add iofd support

2009-05-12 Thread Gregory Haskins
[ here is the updated header ]

iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd
signal when written to.  Userspace can register any arbitrary address
with a corresponding eventfd.

Normal IO requires a synchronous/blocking round-trip since the operation
may cause side-effects in the emulated model or may return data to the caller.
However, there is a subclass of IO which acts purely as a trigger for other IO
(such as to kick off an out-of-band DMA request, etc).  For these patterns,
the synchronous call is particularly expensive because they are done
within the same context as the VCPU thread and therefore cause a VMX/SVM
"heavy-weight" exit, a transition back to userspace, and overhead with
the qemu locking and decoding operations.

Therefore providing the registration of an in-kernel trigger point allows the
VCPU to take a very brief lightweight exit only long enough to signal the
eventfd.  This also means that any clients compatible with the eventfd
interface (which includes userspace and kernelspace equally well) can now
register for asynchronous notification when one of these signals are generated
in the guest.  The end result should be a more flexible and higher performance
notification API for the backend KVM hypervisor and perhipheral components.

Signed-off-by: Gregory Haskins 
---

 include/linux/kvm.h  |   12 +
 include/linux/kvm_host.h |2 +
 virt/kvm/eventfd.c   |  107 ++
 virt/kvm/kvm_main.c  |   13 ++
 4 files changed, 134 insertions(+), 0 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index dfc4bcc..99b6e45 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -292,6 +292,17 @@ struct kvm_guest_debug {
struct kvm_guest_debug_arch arch;
 };
 
+#define KVM_IOFD_FLAG_DEASSIGN  (1 << 0)
+#define KVM_IOFD_FLAG_PIO   (1 << 1)
+
+struct kvm_iofd {
+   __u64 addr;
+   __u32 len;
+   __u32 fd;
+   __u32 flags;
+   __u8  pad[12];
+};
+
 #define KVM_TRC_SHIFT   16
 /*
  * kvm trace categories
@@ -508,6 +519,7 @@ struct kvm_irqfd {
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
 #define KVM_ASSIGN_IRQFD   _IOW(KVMIO, 0x76, struct kvm_irqfd)
 #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32)
+#define KVM_IOFD   _IOW(KVMIO, 0x78, struct kvm_iofd)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1acc528..d53cb70 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -529,5 +529,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 int kvm_assign_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
 int kvm_deassign_irqfd(struct kvm *kvm, int fd);
 void kvm_irqfd_release(struct kvm *kvm);
+int kvm_iofd(struct kvm *kvm, unsigned long addr, size_t len,
+int fd, int flags);
 
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 71afd62..8b23317 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -21,12 +21,16 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
+
+#include "iodev.h"
 
 /*
  * 
@@ -185,3 +189,106 @@ kvm_irqfd_release(struct kvm *kvm)
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds, list)
irqfd_release(irqfd);
 }
+
+/*
+ * 
+ * iofd: translate a PIO/MMIO memory write to an eventfd signal.
+ *
+ * userspace can register a PIO/MMIO address with an eventfd for recieving
+ * notification when the memory has been touched.
+ * 
+ */
+
+struct _iofd {
+   u64  addr;
+   size_t   length;
+   struct file *file;
+   struct kvm_io_device dev;
+};
+
+static int
+iofd_in_range(struct kvm_io_device *this, gpa_t addr, int len, int is_write)
+{
+   struct _iofd *iofd = (struct _iofd *)this->private;
+
+   return ((addr >= iofd->addr && (addr < iofd->addr + iofd->length)));
+}
+
+/* writes trigger an event */
+static void
+iofd_write(struct kvm_io_device *this, gpa_t addr, int len, const void *val)
+{
+   struct _iofd *iofd = (struct _iofd *)this->private;
+
+   eventfd_signal(iofd->file, 1);
+}
+
+/* reads return all zeros */
+static void
+iofd_read(struct kvm_io_device *this, gpa_t addr, int len, void *val)
+{
+   memset(val, 0, len);
+}
+
+static void
+iofd_destructor(struct kvm_io_device *this)
+{
+   struct _iofd *iofd = (struct _iofd *)this->private;
+
+   fput(iofd->file);
+   kfree(iofd);
+}
+
+static int
+kvm_assign_iofd(struct kvm *kvm, unsigned long addr, size_t len,
+   int fd, int flags)
+{
+   intpio = flags & KVM_IOFD_FLAG_PIO;
+   struct kvm_io_bus *bus = pio ? &k

Re: [kvm] Re: [PATCH] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Alex Williamson
On Tue, 2009-05-12 at 22:10 +0300, Michael S. Tsirkin wrote:
> On Tue, May 12, 2009 at 01:06:54PM -0600, Alex Williamson wrote:
> > On Tue, 2009-05-12 at 12:45 -0600, Alex Williamson wrote:
> > > On Mon, 2009-05-11 at 12:00 +, Yang, Sheng wrote:
> > > > On Friday 08 May 2009 06:22:20 Alex Williamson wrote:
> > > > > + /* Round the number of GSIs supported to a 4 byte
> > > > > +  * value so we can search it using ints and ffs */
> > > > > + i = kvm_get_gsi_count(kvm) & ~0x1f;
> > > > > + kvm->used_gsi_bitmap = malloc(i >> 3);
> > > > 
> > > > 3 or 5?
> > > 
> > > 3, ie. /8 (bits to bytes)
> > > 
> > > > I am a little confused by these magic numbers, including 0x1f...
> > > 
> > > The 5 shift gives us the index into the array of ints, the 0x1f gives us
> > > the bit index into a specific int.  This is very similar to the code in
> > > hw/acpi.c.
> > > 
> > > > I think there are something can indicate the length of unsigned long in 
> > > > QEmu(sorry, can't find it now...), so how about using ffsl() and get 
> > > > other 
> > > > constants based on it?
> > > 
> > > We'd probably want to use ffsll() so we can ignore 32b vs 64b longs.
> > > There's HOST_LONG_BITS, but that doesn't actually help defining a shift
> > > value.  Thanks,
> > 
> > Hmm, neither ffsl() or ffsll() are standard.  I'm inclined to stick with
> > 32bits.  We'll likely only be using the first index on x86_64 anyway
> > (2nd on ia64).  Thanks,
> > 
> > Alex
> 
> With MSI, we start with GSI 24, so we'll be using more than just the
> first index.

Right, that leaves 7 GSIs free in the first 32 bit index for device
assignment, plus 999 GSIs free in the entire table.  If we're worried
about the overhead of using a 4 or 8 byte index into the bitmap, maybe
we should be caching GSIs for specific devices.  This feels like a bit
of an overkill for now though.  Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvm] Re: [PATCH] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Michael S. Tsirkin
On Tue, May 12, 2009 at 01:06:54PM -0600, Alex Williamson wrote:
> On Tue, 2009-05-12 at 12:45 -0600, Alex Williamson wrote:
> > On Mon, 2009-05-11 at 12:00 +, Yang, Sheng wrote:
> > > On Friday 08 May 2009 06:22:20 Alex Williamson wrote:
> > > > +   /* Round the number of GSIs supported to a 4 byte
> > > > +* value so we can search it using ints and ffs */
> > > > +   i = kvm_get_gsi_count(kvm) & ~0x1f;
> > > > +   kvm->used_gsi_bitmap = malloc(i >> 3);
> > > 
> > > 3 or 5?
> > 
> > 3, ie. /8 (bits to bytes)
> > 
> > > I am a little confused by these magic numbers, including 0x1f...
> > 
> > The 5 shift gives us the index into the array of ints, the 0x1f gives us
> > the bit index into a specific int.  This is very similar to the code in
> > hw/acpi.c.
> > 
> > > I think there are something can indicate the length of unsigned long in 
> > > QEmu(sorry, can't find it now...), so how about using ffsl() and get 
> > > other 
> > > constants based on it?
> > 
> > We'd probably want to use ffsll() so we can ignore 32b vs 64b longs.
> > There's HOST_LONG_BITS, but that doesn't actually help defining a shift
> > value.  Thanks,
> 
> Hmm, neither ffsl() or ffsll() are standard.  I'm inclined to stick with
> 32bits.  We'll likely only be using the first index on x86_64 anyway
> (2nd on ia64).  Thanks,
> 
> Alex

With MSI, we start with GSI 24, so we'll be using more than just the
first index.

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH v7 1/3] eventfd: export eventfd interfaces for module use

2009-05-12 Thread Davide Libenzi
On Tue, 12 May 2009, Gregory Haskins wrote:

> We want to use eventfd from KVM which can be compiled as a module, so
> export the interfaces.
> 
> Signed-off-by: Gregory Haskins 

Acked-by: Davide Libenzi 



- Davide


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvm] Re: [PATCH] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Alex Williamson
On Tue, 2009-05-12 at 12:45 -0600, Alex Williamson wrote:
> On Mon, 2009-05-11 at 12:00 +, Yang, Sheng wrote:
> > On Friday 08 May 2009 06:22:20 Alex Williamson wrote:
> > > + /* Round the number of GSIs supported to a 4 byte
> > > +  * value so we can search it using ints and ffs */
> > > + i = kvm_get_gsi_count(kvm) & ~0x1f;
> > > + kvm->used_gsi_bitmap = malloc(i >> 3);
> > 
> > 3 or 5?
> 
> 3, ie. /8 (bits to bytes)
> 
> > I am a little confused by these magic numbers, including 0x1f...
> 
> The 5 shift gives us the index into the array of ints, the 0x1f gives us
> the bit index into a specific int.  This is very similar to the code in
> hw/acpi.c.
> 
> > I think there are something can indicate the length of unsigned long in 
> > QEmu(sorry, can't find it now...), so how about using ffsl() and get other 
> > constants based on it?
> 
> We'd probably want to use ffsll() so we can ignore 32b vs 64b longs.
> There's HOST_LONG_BITS, but that doesn't actually help defining a shift
> value.  Thanks,

Hmm, neither ffsl() or ffsll() are standard.  I'm inclined to stick with
32bits.  We'll likely only be using the first index on x86_64 anyway
(2nd on ia64).  Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] KVM: Fix potentially recursively get kvm lock

2009-05-12 Thread Marcelo Tosatti
On Tue, May 12, 2009 at 11:30:21AM -0300, Marcelo Tosatti wrote:
> On Tue, May 12, 2009 at 10:13:36PM +0800, Yang, Sheng wrote:
> > > > +   mutex_unlock(&kvm->lock);
> > >
> > > assigned_dev list is protected by kvm->lock. So you could have another
> > > ioctl adding to it at the same time you're searching.
> > 
> > Oh, yes... My fault... 
> > 
> > > Could either have a separate kvm->assigned_devs_lock, to protect
> > > kvm->arch.assigned_dev_head (users are ioctls that manipulate it), or
> > > change the IRQ injection to use a separate spinlock, kill the workqueue
> > > and call kvm_set_irq from the assigned device interrupt handler.
> > 
> > Peferred the latter, though needs more work. But the only reason for put a 
> > workqueue here is because kvm->lock is a mutex? I can't believe... If so, I 
> > think we had made a big mistake - we have to fix all kinds of racy problem 
> > caused by this, but finally find it's unnecessary... 
> 
> One issue is that kvm_set_irq can take too long while interrupts are
> blocked, and you'd have to disable interrupts in other contexes that
> inject interrupts (say qemu->ioctl(SET_INTERRUPT)->...->), so all i can
> see is a tradeoff.

Or multiple kvm_set_irq calls for MSI.

> 
> 
> 
> But the interrupt injection path seems to be pretty short and efficient
> to happen in host interrupt context.
> 
> 
> 
> Avi, Gleb?
> 
> > Maybe another reason is kvm_kick_vcpu(), but have already fixed by you.
> 
> Note you tested the spinlock_irq patch with GigE and there was no
> significant performance regression right?
> 
> > 
> > Continue to check the code...
> > 
> > -- 
> > regards
> > Yang, Sheng
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH v7 3/3] kvm: add iofd support

2009-05-12 Thread Gregory Haskins
Gregory Haskins wrote:
> iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd
> signal when written to.  Userspace can register any arbitrary address
> with a corresponding eventfd.
>   

Ugg..this patch header sucks, especially given all the talk around how
we need to do them better lately :)

I will add this text as well for future versions:

--

Traditional MMIO/PIO exit paths are expensive because they are done
within the same context as the VCPU thread and therefore cause a VMX/SVM
"heavy-weight" exit, a transition back to userspace, and overhead with
the qemu processing of the operation.  An eventfd mechanism, on the
other hand, allows the VCPU to take a very brief lightweight exit only
long enough to trigger the eventfd_signal.  This means that clients of
the eventfd (supporting both userspace or kernel end-points) can
potentially get notified much more efficiently than if we were to
register through the traditional mechanism via qemu MMIO/PIO notification.

---
> Signed-off-by: Gregory Haskins 
> ---
>
>  include/linux/kvm.h  |   12 +
>  include/linux/kvm_host.h |2 +
>  virt/kvm/eventfd.c   |  107 
> ++
>  virt/kvm/kvm_main.c  |   13 ++
>  4 files changed, 134 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index dfc4bcc..99b6e45 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -292,6 +292,17 @@ struct kvm_guest_debug {
>   struct kvm_guest_debug_arch arch;
>  };
>  
> +#define KVM_IOFD_FLAG_DEASSIGN  (1 << 0)
> +#define KVM_IOFD_FLAG_PIO   (1 << 1)
> +
> +struct kvm_iofd {
> + __u64 addr;
> + __u32 len;
> + __u32 fd;
> + __u32 flags;
> + __u8  pad[12];
> +};
> +
>  #define KVM_TRC_SHIFT   16
>  /*
>   * kvm trace categories
> @@ -508,6 +519,7 @@ struct kvm_irqfd {
>  #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
>  #define KVM_ASSIGN_IRQFD   _IOW(KVMIO, 0x76, struct kvm_irqfd)
>  #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32)
> +#define KVM_IOFD   _IOW(KVMIO, 0x78, struct kvm_iofd)
>  
>  /*
>   * ioctls for vcpu fds
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 1acc528..d53cb70 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -529,5 +529,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) 
> {}
>  int kvm_assign_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
>  int kvm_deassign_irqfd(struct kvm *kvm, int fd);
>  void kvm_irqfd_release(struct kvm *kvm);
> +int kvm_iofd(struct kvm *kvm, unsigned long addr, size_t len,
> +  int fd, int flags);
>  
>  #endif
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 71afd62..8b23317 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -21,12 +21,16 @@
>   */
>  
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
>  #include 
>  #include 
>  #include 
> +#include 
> +
> +#include "iodev.h"
>  
>  /*
>   * 
> @@ -185,3 +189,106 @@ kvm_irqfd_release(struct kvm *kvm)
>   list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds, list)
>   irqfd_release(irqfd);
>  }
> +
> +/*
> + * 
> + * iofd: translate a PIO/MMIO memory write to an eventfd signal.
> + *
> + * userspace can register a PIO/MMIO address with an eventfd for recieving
> + * notification when the memory has been touched.
> + * 
> + */
> +
> +struct _iofd {
> + u64  addr;
> + size_t   length;
> + struct file *file;
> + struct kvm_io_device dev;
> +};
> +
> +static int
> +iofd_in_range(struct kvm_io_device *this, gpa_t addr, int len, int is_write)
> +{
> + struct _iofd *iofd = (struct _iofd *)this->private;
> +
> + return ((addr >= iofd->addr && (addr < iofd->addr + iofd->length)));
> +}
> +
> +/* writes trigger an event */
> +static void
> +iofd_write(struct kvm_io_device *this, gpa_t addr, int len, const void *val)
> +{
> + struct _iofd *iofd = (struct _iofd *)this->private;
> +
> + eventfd_signal(iofd->file, 1);
> +}
> +
> +/* reads return all zeros */
> +static void
> +iofd_read(struct kvm_io_device *this, gpa_t addr, int len, void *val)
> +{
> + memset(val, 0, len);
> +}
> +
> +static void
> +iofd_destructor(struct kvm_io_device *this)
> +{
> + struct _iofd *iofd = (struct _iofd *)this->private;
> +
> + fput(iofd->file);
> + kfree(iofd);
> +}
> +
> +static int
> +kvm_assign_iofd(struct kvm *kvm, unsigned long addr, size_t len,
> + int fd, int flags)
> +{
> + intpio = flags & KVM_IOFD_FLAG_PIO;
> + struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
> + struct _iofd  *

Re: [KVM PATCH v6] kvm: add support for irqfd

2009-05-12 Thread Gregory Haskins
Avi Kivity wrote:
> Gregory Haskins wrote:
>> (Applies to kvm.git:833367b57)
>>
>> KVM provides a complete virtual system environment for guests, including
>> support for injecting interrupts modeled after the real
>> exception/interrupt
>> facilities present on the native platform (such as the IDT on x86).
>> Virtual interrupts can come from a variety of sources (emulated devices,
>> pass-through devices, etc) but all must be injected to the guest via
>> the KVM infrastructure.  This patch adds a new mechanism to inject a
>> specific
>> interrupt to a guest using a decoupled eventfd mechnanism:  Any legal
>> signal
>> on the irqfd (using eventfd semantics from either userspace or
>> kernel) will
>> translate into an injected interrupt in the guest at the next available
>> interrupt window.
>>
>> I am reasonably satisfied with this series, so Avi please consider for
>> inclusion.
>>   
>
> Looks good.  Please add a way to disassociate an irqfd (for hotunplug)
> and I'll merge.
>
Done, and v7 sent out.  Since I completed the testing on the
"hypercall-fd" (I call them "iofd" for reasons described in the patch
headers) I included those as well for review.  Since the irqfd stuff is
heavily reviewed and the iofd patches haven't seen the light of day
until today, feel free to merge them independently.  I included them
together because I wanted to avoid forking the CAP bits, etc, for two
closely related subsystems.  But they technically don't need to go in
together if you prefer.

Thanks Avi,
-Greg



signature.asc
Description: OpenPGP digital signature


Re: [PATCH] kvm: Use a bitmap for tracking used GSIs

2009-05-12 Thread Alex Williamson
On Mon, 2009-05-11 at 12:00 +, Yang, Sheng wrote:
> On Friday 08 May 2009 06:22:20 Alex Williamson wrote:
> > +   /* Round the number of GSIs supported to a 4 byte
> > +* value so we can search it using ints and ffs */
> > +   i = kvm_get_gsi_count(kvm) & ~0x1f;
> > +   kvm->used_gsi_bitmap = malloc(i >> 3);
> 
> 3 or 5?

3, ie. /8 (bits to bytes)

> I am a little confused by these magic numbers, including 0x1f...

The 5 shift gives us the index into the array of ints, the 0x1f gives us
the bit index into a specific int.  This is very similar to the code in
hw/acpi.c.

> I think there are something can indicate the length of unsigned long in 
> QEmu(sorry, can't find it now...), so how about using ffsl() and get other 
> constants based on it?

We'd probably want to use ffsll() so we can ignore 32b vs 64b longs.
There's HOST_LONG_BITS, but that doesn't actually help defining a shift
value.  Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 2/2] qemu-kvm: add iofd support

2009-05-12 Thread Gregory Haskins
An iofd allows an eventfd to attach to a specific PIO/MMIO region in the
guest.  Any guest-writes to that region will trigger an eventfd signal.

Signed-off-by: Gregory Haskins 
---

 kvm/libkvm/libkvm.c |   52 +++
 kvm/libkvm/libkvm.h |   31 ++
 2 files changed, 83 insertions(+), 0 deletions(-)

diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index 74a21a2..0139abd 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -1497,6 +1497,45 @@ int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int 
flags)
return r;
 }
 
+int kvm_assign_iofd(kvm_context_t kvm, unsigned long addr, size_t len,
+   int fd, int type, int flags)
+{
+   int r;
+   struct kvm_iofd data = {
+   .addr  = addr,
+   .len   = len,
+   .fd= fd,
+   .flags = type ? KVM_IOFD_FLAG_PIO : 0,
+   };
+
+   if (!kvm_check_extension(kvm, KVM_CAP_EVENTFD))
+   return -ENOENT;
+
+   r = ioctl(kvm->vm_fd, KVM_IOFD, &data);
+   if (r == -1)
+   r = -errno;
+   return r;
+}
+
+int kvm_deassign_iofd(kvm_context_t kvm, unsigned long addr, size_t len,
+ int type, int flags)
+{
+   int r;
+   struct kvm_iofd data = {
+   .addr  = addr,
+   .len   = len,
+   .flags = KVM_IOFD_FLAG_DEASSIGN | (type ? KVM_IOFD_FLAG_PIO : 
0),
+   };
+
+   if (!kvm_check_extension(kvm, KVM_CAP_EVENTFD))
+   return -ENOENT;
+
+   r = ioctl(kvm->vm_fd, KVM_IOFD, &data);
+   if (r == -1)
+   r = -errno;
+   return r;
+}
+
 #else /* KVM_CAP_EVENTFD */
 
 int kvm_create_irqfd(kvm_context_t kvm, int gsi, int flags)
@@ -1509,4 +1548,17 @@ int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int 
flags)
return -ENOENT;
 }
 
+int kvm_assign_iofd(kvm_context_t kvm, unsigned long addr, size_t len,
+   int fd, int type, int flags)
+{
+   return -ENOENT;
+}
+
+int kvm_deassign_iofd(kvm_context_t kvm, unsigned long addr, size_t len,
+ int type, int flags)
+{
+   return -ENOENT;
+}
+
 #endif /* KVM_CAP_EVENTFD */
+
diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
index 322b4cd..89c558a 100644
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -881,6 +881,37 @@ int kvm_create_irqfd(kvm_context_t kvm, int gsi, int 
flags);
  */
 int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int flags);
 
+/*!
+ * \brief Assign an eventfd to an IO port (PIO or MMIO)
+ *
+ * Assigns an eventfd based file-descriptor to a specific PIO or MMIO
+ * address range.  Any guest writes to the specified range will generate
+ * an eventfd signal.
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param addr The IO address
+ * \param len The length of the IO region at the address
+ * \param fd The eventfd file-descriptor
+ * \param type MMIO=0, PIO=1
+ * \param flags reserved, must be zero
+ */
+int kvm_assign_iofd(kvm_context_t kvm, unsigned long addr, size_t len,
+   int fd, int type, int flags);
+
+/*!
+ * \brief Deassign an iofd from a previously registered IO port
+ *
+ * Deassigns an iofd previously registered with kvm_assign_iofd()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param addr The IO address
+ * \param len The length of the IO region at the address
+ * \param type MMIO=0, PIO=1
+ * \param flags reserved, must be zero
+ */
+int kvm_deassign_iofd(kvm_context_t kvm, unsigned long addr, size_t len,
+ int type, int flags);
+
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
   struct kvm_assigned_msix_nr *msix_nr);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 1/2] qemu-kvm: add irqfd support

2009-05-12 Thread Gregory Haskins
irqfd lets you create an eventfd based file-desriptor to inject interrupts
to a kvm guest.  We associate one gsi per fd for fine-grained routing.

Signed-off-by: Gregory Haskins 
---

 kvm/libkvm/libkvm.c |   66 +++
 kvm/libkvm/libkvm.h |   25 +++
 2 files changed, 91 insertions(+), 0 deletions(-)

diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index ba0a5d1..74a21a2 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "libkvm.h"
 
@@ -1444,3 +1445,68 @@ int kvm_assign_set_msix_entry(kvm_context_t kvm,
 return ret;
 }
 #endif
+
+#ifdef KVM_CAP_EVENTFD
+static int _assign_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
+{
+   int r;
+   struct kvm_irqfd data = {
+   .fd= fd,
+   .gsi   = gsi,
+   .flags = flags,
+   };
+
+   r = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQFD, &data);
+   if (r == -1)
+   r = -errno;
+   return r;
+}
+
+int kvm_create_irqfd(kvm_context_t kvm, int gsi, int flags)
+{
+   int r;
+   int fd;
+
+   if (!kvm_check_extension(kvm, KVM_CAP_EVENTFD))
+   return -ENOENT;
+
+   fd = eventfd(0, 0);
+   if (fd < 0)
+   return -errno;
+
+   r = _assign_irqfd(kvm, fd, gsi, flags);
+   if (r < 0) {
+   close(fd);
+   return -errno;
+   }
+
+   return fd;
+}
+
+int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int flags)
+{
+   int r;
+   __u32 data = fd;
+
+   r = ioctl(kvm->vm_fd, KVM_DEASSIGN_IRQFD, &data);
+   if (r == -1)
+   r = -errno;
+
+   close(fd);
+
+   return r;
+}
+
+#else /* KVM_CAP_EVENTFD */
+
+int kvm_create_irqfd(kvm_context_t kvm, int gsi, int flags)
+{
+   return -ENOENT;
+}
+
+int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int flags)
+{
+   return -ENOENT;
+}
+
+#endif /* KVM_CAP_EVENTFD */
diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
index 4821a1e..322b4cd 100644
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -856,6 +856,31 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
  */
 int kvm_get_irq_route_gsi(kvm_context_t kvm);
 
+/*!
+ * \brief Create a file descriptor for injecting interrupts
+ *
+ * Creates an eventfd based file-descriptor that maps to a specific GSI
+ * in the guest.  eventfd compliant signaling (write() from userspace, or
+ * eventfd_signal() from kernelspace) will cause the GSI to inject
+ * itself into the guest at the next available window.
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param gsi GSI to assign to this fd
+ * \param flags reserved, must be zero
+ */
+int kvm_create_irqfd(kvm_context_t kvm, int gsi, int flags);
+
+/*!
+ * \brief Destroy an irqfd file descriptor
+ *
+ * Destroys a file descriptor previously opened with kvm_create_irqfd()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param fd fd to close
+ * \param flags reserved, must be zero
+ */
+int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int flags);
+
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
   struct kvm_assigned_msix_nr *msix_nr);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v7 0/2] eventfd support for KVM userspace

2009-05-12 Thread Gregory Haskins
This is the userspace support for the irqfd/iofd interfaces published here:

http://lkml.org/lkml/2009/5/12/372

---

Gregory Haskins (2):
  qemu-kvm: add iofd support
  qemu-kvm: add irqfd support


 kvm/libkvm/libkvm.c |  118 +++
 kvm/libkvm/libkvm.h |   56 
 2 files changed, 174 insertions(+), 0 deletions(-)

-- 
Signature
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v7 3/3] kvm: add iofd support

2009-05-12 Thread Gregory Haskins
iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd
signal when written to.  Userspace can register any arbitrary address
with a corresponding eventfd.

Signed-off-by: Gregory Haskins 
---

 include/linux/kvm.h  |   12 +
 include/linux/kvm_host.h |2 +
 virt/kvm/eventfd.c   |  107 ++
 virt/kvm/kvm_main.c  |   13 ++
 4 files changed, 134 insertions(+), 0 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index dfc4bcc..99b6e45 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -292,6 +292,17 @@ struct kvm_guest_debug {
struct kvm_guest_debug_arch arch;
 };
 
+#define KVM_IOFD_FLAG_DEASSIGN  (1 << 0)
+#define KVM_IOFD_FLAG_PIO   (1 << 1)
+
+struct kvm_iofd {
+   __u64 addr;
+   __u32 len;
+   __u32 fd;
+   __u32 flags;
+   __u8  pad[12];
+};
+
 #define KVM_TRC_SHIFT   16
 /*
  * kvm trace categories
@@ -508,6 +519,7 @@ struct kvm_irqfd {
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
 #define KVM_ASSIGN_IRQFD   _IOW(KVMIO, 0x76, struct kvm_irqfd)
 #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32)
+#define KVM_IOFD   _IOW(KVMIO, 0x78, struct kvm_iofd)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1acc528..d53cb70 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -529,5 +529,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 int kvm_assign_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
 int kvm_deassign_irqfd(struct kvm *kvm, int fd);
 void kvm_irqfd_release(struct kvm *kvm);
+int kvm_iofd(struct kvm *kvm, unsigned long addr, size_t len,
+int fd, int flags);
 
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 71afd62..8b23317 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -21,12 +21,16 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
+
+#include "iodev.h"
 
 /*
  * 
@@ -185,3 +189,106 @@ kvm_irqfd_release(struct kvm *kvm)
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds, list)
irqfd_release(irqfd);
 }
+
+/*
+ * 
+ * iofd: translate a PIO/MMIO memory write to an eventfd signal.
+ *
+ * userspace can register a PIO/MMIO address with an eventfd for recieving
+ * notification when the memory has been touched.
+ * 
+ */
+
+struct _iofd {
+   u64  addr;
+   size_t   length;
+   struct file *file;
+   struct kvm_io_device dev;
+};
+
+static int
+iofd_in_range(struct kvm_io_device *this, gpa_t addr, int len, int is_write)
+{
+   struct _iofd *iofd = (struct _iofd *)this->private;
+
+   return ((addr >= iofd->addr && (addr < iofd->addr + iofd->length)));
+}
+
+/* writes trigger an event */
+static void
+iofd_write(struct kvm_io_device *this, gpa_t addr, int len, const void *val)
+{
+   struct _iofd *iofd = (struct _iofd *)this->private;
+
+   eventfd_signal(iofd->file, 1);
+}
+
+/* reads return all zeros */
+static void
+iofd_read(struct kvm_io_device *this, gpa_t addr, int len, void *val)
+{
+   memset(val, 0, len);
+}
+
+static void
+iofd_destructor(struct kvm_io_device *this)
+{
+   struct _iofd *iofd = (struct _iofd *)this->private;
+
+   fput(iofd->file);
+   kfree(iofd);
+}
+
+static int
+kvm_assign_iofd(struct kvm *kvm, unsigned long addr, size_t len,
+   int fd, int flags)
+{
+   intpio = flags & KVM_IOFD_FLAG_PIO;
+   struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+   struct _iofd  *iofd;
+   struct file   *file;
+
+   file = eventfd_fget(fd);
+   if (IS_ERR(file))
+   return PTR_ERR(file);
+
+   iofd = kzalloc(sizeof(*iofd), GFP_KERNEL);
+   if (!iofd) {
+   fput(file);
+   return -ENOMEM;
+   }
+
+   iofd->dev.read   = iofd_read;
+   iofd->dev.write  = iofd_write;
+   iofd->dev.in_range   = iofd_in_range;
+   iofd->dev.destructor = iofd_destructor;
+   iofd->dev.private= iofd;
+
+   iofd->addr   = addr;
+   iofd->length = len;
+   iofd->file   = file;
+
+   kvm_io_bus_register_dev(bus, &iofd->dev);
+
+   printk(KERN_DEBUG "registering %s iofd at %lx of size %d\n",
+  pio  ? "PIO" : "MMIO", addr, (int)len);
+
+   return 0;
+}
+
+static int
+kvm_deassign_iofd(struct kvm *kvm, unsigned long addr, size_t len,
+ int fd, int flags)
+{
+   /* FIXME: We need an io_bus_unregister() function */
+   return -EINVAL;
+}
+
+int
+kvm_iofd(struct kvm *kvm, unsigned long

[KVM PATCH v7 1/3] eventfd: export eventfd interfaces for module use

2009-05-12 Thread Gregory Haskins
We want to use eventfd from KVM which can be compiled as a module, so
export the interfaces.

Signed-off-by: Gregory Haskins 
---

 fs/eventfd.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 2a701d5..3f0e197 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct eventfd_ctx {
wait_queue_head_t wqh;
@@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n)
 
return n;
 }
+EXPORT_SYMBOL_GPL(eventfd_signal);
 
 static int eventfd_release(struct inode *inode, struct file *file)
 {
@@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd)
 
return file;
 }
+EXPORT_SYMBOL_GPL(eventfd_fget);
 
 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 {

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface

2009-05-12 Thread Gregory Haskins
KVM provides a complete virtual system environment for guests, including
support for injecting interrupts modeled after the real exception/interrupt
facilities present on the native platform (such as the IDT on x86).
Virtual interrupts can come from a variety of sources (emulated devices,
pass-through devices, etc) but all must be injected to the guest via
the KVM infrastructure.  This patch adds a new mechanism to inject a specific
interrupt to a guest using a decoupled eventfd mechnanism:  Any legal signal
on the irqfd (using eventfd semantics from either userspace or kernel) will
translate into an injected interrupt in the guest at the next available
interrupt window.

Signed-off-by: Gregory Haskins 
---

 arch/x86/kvm/Makefile|2 
 arch/x86/kvm/x86.c   |1 
 include/linux/kvm.h  |   10 ++
 include/linux/kvm_host.h |5 +
 virt/kvm/eventfd.c   |  187 ++
 virt/kvm/kvm_main.c  |   20 +
 6 files changed, 224 insertions(+), 1 deletions(-)
 create mode 100644 virt/kvm/eventfd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b43c4ef..4d50904 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,7 +3,7 @@
 #
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-coalesced_mmio.o irq_comm.o)
+coalesced_mmio.o irq_comm.o eventfd.o)
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fd0a571..ba541f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1026,6 +1026,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_ASSIGN_DEV_IRQ:
+   case KVM_CAP_EVENTFD:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 3db5d8d..dfc4bcc 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -415,6 +415,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#define KVM_CAP_EVENTFD 31
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -454,6 +455,13 @@ struct kvm_irq_routing {
 
 #endif
 
+struct kvm_irqfd {
+   __u32 fd;
+   __u32 gsi;
+   __u32 flags;
+   __u8  pad[20];
+};
+
 /*
  * ioctls for VM fds
  */
@@ -498,6 +506,8 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_SET_MSIX_ENTRY \
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
+#define KVM_ASSIGN_IRQFD   _IOW(KVMIO, 0x76, struct kvm_irqfd)
+#define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2b8df0c..1acc528 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,7 @@ struct kvm {
struct list_head vm_list;
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
+   struct list_head irqfds;
struct kvm_vm_stat stat;
struct kvm_arch arch;
atomic_t users_count;
@@ -525,4 +526,8 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #endif
 
+int kvm_assign_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
+int kvm_deassign_irqfd(struct kvm *kvm, int fd);
+void kvm_irqfd_release(struct kvm *kvm);
+
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
new file mode 100644
index 000..71afd62
--- /dev/null
+++ b/virt/kvm/eventfd.c
@@ -0,0 +1,187 @@
+/*
+ * kvm eventfd support - use eventfd objects to signal various KVM events
+ *
+ * Copyright 2009 Novell.  All Rights Reserved.
+ *
+ * Author:
+ * Gregory Haskins 
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * 
+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
+ *
+ * Credit goes to Avi Kivity for the original idea.
+ * 
+ */
+struct _irqfd {
+   struct kvm   

[KVM PATCH v7 0/3] kvm: eventfd interfaces (formerly irqfd)

2009-05-12 Thread Gregory Haskins
(Applies to kvm.git:b5e725fa)

This is v7 of the series.  We have generalized the name of the series (as well
as some of the hunks in the series) to reflect the fact that we have multiple
eventfd based components.

This series has been tested and appears to be working as intended.  You can
download the unit-test used to verify this here:

ftp://ftp.novell.com/dev/ghaskins/kvm-eventfd.tar.bz

This will also serve as an example on how to use the new interfaces.

[ Changelog:

   v7:
*) Added "iofd" to allow PIO/MMIO writes to generate an eventfd
   signal.  This was previously discussed as "hypercallfd", but
   since explicit hypercalls are not looking to be very popular,
   and based on the fact that they were not going to carry payload
   anyway, I named them "iofd".
*) Generalized some of the code so that irqfd and iofd could be
   logically grouped together.  For instance
   s/KVM_CAP_IRQFD/KVM_CAP_EVENTFD and
   virt/kvm/irqfd.c becomes virt/kvm/eventfd.c
*) Added support for "deassign" operations to ensure we can properly
   support hot-unplug.
*) Reinstated the eventfd EXPORT_SYMBOL patch since we need it again
   for supporting iofd.
*) Rebased to kvm.git:b5e725fa 

   v6:
*) Moved eventfd creation back to userspace, per Avi's request
*) Dropped no longer necessary supporting patches from series
*) Rebased to kvm.git:833367b57

   v5:
*) Added padding to the ioctl structure
*) Added proper ref-count increment to the file before returning
   success. (Needs review by Al Viro, Davide Libenzi)
*) Cleaned up error-handling path to make sure we remove ourself
   from the waitq if necessary.
*) Make sure we only add ourselves to kvm->irqfds if successful
   creating the irqfd in the first place.
*) Rebased to kvm.git:66b0aed4 

   v4:
*) Changed allocation model to create the new fd last, after
   we get past the last potential error point by using Davide's
   new eventfd_file_create interface (Al Viro, Davide Libenzi)
*) We no longer export sys_eventfd2() since it is replaced
   functionally with eventfd_file_create();
*) Rebased to kvm.git:7da2e3ba

   v3:
*) The kernel now allocates the eventfd (need to export sys_eventfd2)
*) Added a flags field for future expansion to kvm_irqfd()
*) We properly toggle the irq level 1+0.
*) We re-use the USERSPACE_SRC_ID instead of creating our own
*) Properly check for failures establishing a poll-table with eventfd
*) Fixed fd/file leaks on failure
*) Rebased to lateste kvm.git::41b76d8d04

   v2:
*) Dropped notifier_chain based callbacks in favor of
   wait_queue_t::func and file::poll based callbacks (Thanks to
   Davide for the suggestion)

   v1:
*) Initial release

]

[ Todo:
*) Implement the bus_io_unregister() function so the iofd hot-unplug
   path may be completed
*) Test the hot-unplug path
]


---

Gregory Haskins (3):
  kvm: add iofd support
  kvm: add support for irqfd via eventfd-notification interface
  eventfd: export eventfd interfaces for module use


 arch/x86/kvm/Makefile|2 
 arch/x86/kvm/x86.c   |1 
 fs/eventfd.c |3 
 include/linux/kvm.h  |   22 +++
 include/linux/kvm_host.h |7 +
 virt/kvm/eventfd.c   |  294 ++
 virt/kvm/kvm_main.c  |   33 +
 7 files changed, 361 insertions(+), 1 deletions(-)
 create mode 100644 virt/kvm/eventfd.c

-- 
Signature
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: Problem doing pci passthrough of the network card without VT-d

2009-05-12 Thread Passera, Pablo R
One update on this. I disabled VT-d from the BIOS and now I am not getting the 
DMAR error messages in dmesg, but the board still does not work on the guest. 
Any help is welcomed.

e1000e :00:19.0: PCI INT A disabled
pci-stub :00:19.0: PCI INT A -> GSI 20 (level, low) -> IRQ 20
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X

Regards,
Pablo

>-Original Message-
>From: Passera, Pablo R
>Sent: Tuesday, May 12, 2009 12:14 PM
>To: kvm@vger.kernel.org
>Subject: Problem doing pci passthrough of the network card without VT-d
>
>Hi List,
>   I am having problems to do pci passthrough to a network card
>without using VT-d. The card is present in the guest but with a
>different model (Intel Corporation 82801I Gigabit Ethernet Controller
>(rev 2)) and it does not work. The qemu line that I used is:
>
>./devel/bin/qemu-system-x86_64 -hda ./dm.img -m 256 -pcidevice
>host=00:19.0,dma=none -net none
>
>Before running qemu I did
>
>echo "8086 294c" > /sys/bus/pci/drivers/pci-stub/new_id
>echo :00:19.0 > /sys/bus/pci/drivers/e1000e/unbind
>echo :00:19.0 > /sys/bus/pci/drivers/pci-stub/bind
>
>This is the lspci -tv output
>
>-[:00]-+-00.0  Intel Corporation 82X38/X48 Express DRAM Controller
>   +-01.0-[:01]00.0  nVidia Corporation G80 [GeForce
>8800 GTX]
>   +-19.0  Intel Corporation 82566DC-2 Gigabit Network
>Connection
>   +-1a.0  Intel Corporation 82801I (ICH9 Family) USB UHCI
>Controller #4
>   +-1a.1  Intel Corporation 82801I (ICH9 Family) USB UHCI
>Controller #5
>   +-1a.2  Intel Corporation 82801I (ICH9 Family) USB UHCI
>Controller #6
>   +-1a.7  Intel Corporation 82801I (ICH9 Family) USB2 EHCI
>Controller #2
>   +-1b.0  Intel Corporation 82801I (ICH9 Family) HD Audio
>Controller
>   +-1c.0-[:02]--
>   +-1c.4-[:03]00.0  Marvell Technology Group Ltd.
>88SE6121 SATA II Controller
>   +-1d.0  Intel Corporation 82801I (ICH9 Family) USB UHCI
>Controller #1
>   +-1d.1  Intel Corporation 82801I (ICH9 Family) USB UHCI
>Controller #2
>   +-1d.2  Intel Corporation 82801I (ICH9 Family) USB UHCI
>Controller #3
>   +-1d.7  Intel Corporation 82801I (ICH9 Family) USB2 EHCI
>Controller #1
>   +-1e.0-[:04]03.0  Texas Instruments TSB43AB22/A IEEE-
>1394a-2000 Controller (PHY/Link)
>   +-1f.0  Intel Corporation 82801IR (ICH9R) LPC Interface
>Controller
>   +-1f.2  Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 4 port
>SATA IDE Controller
>   +-1f.3  Intel Corporation 82801I (ICH9 Family) SMBus
>Controller
>   \-1f.5  Intel Corporation 82801I (ICH9 Family) 2 port SATA
>IDE Controller
>
>
>I am getting the following error in host dmesg
>
>e1000e :00:19.0: PCI INT A disabled
>pci-stub :00:19.0: PCI INT A -> GSI 20 (level, low) -> IRQ 20
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>DMAR:[DMA Read] Request device [00:19.0] fault addr baee000
>DMAR:[fault reason 02] Present bit in context entry is clear
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>DMAR:[DMA Read] Request device [00:19.0] fault addr b90c000
>DMAR:[fault reason 02] Present bit in context entry is clear
>
>And in a second run I got
>
>e1000e :00:19.0: PCI INT A disabled
>pci-stub :00:19.0: PCI INT A -> GSI 20 (level, low) -> IRQ 20
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>DMAR:[DMA Read] Request device [00:19.0] fault addr ba8
>DMAR:[fault reason 06] PTE Read access is not set
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>pci-stub :00:19.0: irq 29 for MSI/MSI-X
>DMAR:[DMA Read] Request device [00:19.0] fault addr baa4000
>DMAR:[fault reason 06] PTE Read access is not set
>
>It does work if I enable VT-d, even though the network card is
>recognized with a different model. However, I need this to work without
>VT-d. Which could be the problem here? Is this supported?
>
>Thanks a lot,
>Pablo
>
>
>More in

Re: [PATCH 2/2] deal with interrupt shadow state for emulated instruction

2009-05-12 Thread Avi Kivity

Glauber Costa wrote:

we currently unblock shadow interrupt state when we skip an instruction,
but failing to do so when we actually emulate one. This blocks interrupts
in key instruction blocks, in particular sti; hlt; sequences

If the instruction emulated is an sti, we have to block shadow interrupts.
The same goes for mov ss. pop ss also needs it, but we don't currently
emulate it.

Without this patch, I cannot boot gpxe option roms at vmx machines.
This is described at https://bugzilla.redhat.com/show_bug.cgi?id=494469

@@ -1618,6 +1620,15 @@ special_insn:
int err;
 
 		sel = c->src.val;

+   if (c->modrm_reg == VCPU_SREG_SS) {
+   u32 int_shadow =
+   kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu,
+ 
X86_SHADOW_INT_MOV_SS);
+   /* See sti emulation for an explanation of this */
+   if (!(int_shadow & X86_SHADOW_INT_MOV_SS))
+   ctxt->interruptibility = X86_SHADOW_INT_MOV_SS;
+   }
  


The indentation of the first statement here is annoying.  Suggest a 
function toggle_interruptibility(ctxt, mask).  Would eliminate the need 
for the comment forward reference as well.



--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] replace drop_interrupt_shadow by set_interrupt_shadow

2009-05-12 Thread Avi Kivity

Glauber Costa wrote:

This patch replaces drop_interrupt_shadow with the more
general set_interrupt_shadow, that can either drop or raise
it, depending on its parameter.

 }
 
+static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)

+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+   u32 ret = 0;
+
+   if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
+   ret |= X86_SHADOW_INT_STI && X86_SHADOW_INT_MOV_SS;
+   return ret & mask;
+}
  


&& -> |.


+
+static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+   u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+   u32 interruptibility = interruptibility_old;
+
+   interruptibility &= ~((GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
  


May drop one layer of parentheses.



--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [KVM PATCH v6] kvm: add support for irqfd

2009-05-12 Thread Avi Kivity

Gregory Haskins wrote:

(Applies to kvm.git:833367b57)

KVM provides a complete virtual system environment for guests, including
support for injecting interrupts modeled after the real exception/interrupt
facilities present on the native platform (such as the IDT on x86).
Virtual interrupts can come from a variety of sources (emulated devices,
pass-through devices, etc) but all must be injected to the guest via
the KVM infrastructure.  This patch adds a new mechanism to inject a specific
interrupt to a guest using a decoupled eventfd mechnanism:  Any legal signal
on the irqfd (using eventfd semantics from either userspace or kernel) will
translate into an injected interrupt in the guest at the next available
interrupt window.

I am reasonably satisfied with this series, so Avi please consider for
inclusion.
  


Looks good.  Please add a way to disassociate an irqfd (for hotunplug) 
and I'll merge.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ANNOUNCE] qemu-kvm-0.10.4

2009-05-12 Thread Avi Kivity
qemu-kvm-0.10.4 is now available.  This is the first release of the 0.10 
stable branch of qemu-kvm.  The qemu-kvm 0.10.4 includes all of the 
features and fixes of qemu-0.10.4, plus adaptations for improved kvm 
support.


Note that qemu-kvm releases do not include the kvm external modules 
(kvm*.ko); you can use the modules provided by your distribution, 
modules from the development releases (kvm-xx), or from the kvm-kmod 
stable branch releases once they become available.


As this is the first release of this branch there is no changelog; 
qemu-kvm-0.10.4 is roughly equivalent (but is not identical) to qemu 
from kvm-84.


http://www.linux-kvm.org

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] Userspace changes for KVM HPET (v3)

2009-05-12 Thread Beth Kon

Beth Kon wrote:

Avi Kivity wrote:

Beth Kon wrote:

Signed-off-by: Beth Kon 


diff --git a/hw/hpet.c b/hw/hpet.c
index c7945ec..100abf5 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -30,6 +30,7 @@
 #include "console.h"
 #include "qemu-timer.h"
 #include "hpet_emul.h"
+#include "qemu-kvm.h"
 
 //#define HPET_DEBUG

 #ifdef HPET_DEBUG
@@ -48,6 +49,28 @@ uint32_t hpet_in_legacy_mode(void)
 return 0;
 }
 
+static void hpet_legacy_enable(void)

+{
+if (qemu_kvm_pit_in_kernel()) {
+kvm_kpit_disable();
+dprintf("qemu: hpet disabled kernel pit\n");
+} else {
+hpet_pit_disable();
+dprintf("qemu: hpet disabled userspace pit\n");
+}
+}
+
+static void hpet_legacy_disable(void)
+{
+if (qemu_kvm_pit_in_kernel()) {
+kvm_kpit_enable();
+dprintf("qemu: hpet enabled kernel pit\n");
+} else {
+hpet_pit_enable();
+dprintf("qemu: hpet enabled userspace pit\n");
+}
+}
  
I think it's better to move these into hpet_pit_enable() and 
hpet_pit_enable().  This avoids changing the calls below, and puts 
pit stuff in i8254.c instead of hpet.c.


Might also need to be called from hpet_load(); probably a problem in 
upstream as well.


My assumption about hpet_load was that the correct pit state would be 
established via pit_load (since all saves/loads are done together).  
But when I wrote this, I was thinking only about the userspace pit 
(for qemu). I'm not sure how the "load" concept applies to kernel 
state.  Do I need to explicitly re-enable or disable the kernel pit 
during load?
Looking further at the code, it looks like kvm_pit_load should take care 
of this. Agree?





--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv4] virtio: find_vqs/del_vqs virtio operations

2009-05-12 Thread Michael S. Tsirkin
This replaces find_vq/del_vq with find_vqs/del_vqs virtio operations,
and updates all drivers. This is needed for MSI support, because MSI
needs to know the total number of vectors upfront.

Signed-off-by: Michael S. Tsirkin 
---

Untested - Rusty could you pls take a look before I
go ahead and rebase the rest on top of this?

 drivers/block/virtio_blk.c  |   12 
 drivers/char/hw_random/virtio-rng.c |   12 +
 drivers/char/virtio_console.c   |   26 ---
 drivers/lguest/lguest_device.c  |   36 ++-
 drivers/net/virtio_net.c|   45 ++-
 drivers/s390/kvm/kvm_virtio.c   |   37 +++-
 drivers/virtio/virtio_balloon.c |   27 
 drivers/virtio/virtio_pci.c |   37 +++-
 include/linux/virtio_config.h   |   33 +
 net/9p/trans_virtio.c   |8 +++---
 10 files changed, 178 insertions(+), 95 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 8f7c956..ec7a47a 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -197,6 +197,8 @@ static int virtblk_probe(struct virtio_device *vdev)
u64 cap;
u32 v;
u32 blk_size, sg_elems;
+   vq_callback_t *callback[] = { blk_done };
+   const char *name[] = { "requests" };
 
if (index_to_minor(index) >= 1 << MINORBITS)
return -ENOSPC;
@@ -224,11 +226,9 @@ static int virtblk_probe(struct virtio_device *vdev)
sg_init_table(vblk->sg, vblk->sg_elems);
 
/* We expect one virtqueue, for output. */
-   vblk->vq = vdev->config->find_vq(vdev, 0, blk_done, "requests");
-   if (IS_ERR(vblk->vq)) {
-   err = PTR_ERR(vblk->vq);
+   err = vdev->config->find_vqs(vdev, 1, &vblk->vq, callback, name);
+   if (err)
goto out_free_vblk;
-   }
 
vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
if (!vblk->pool) {
@@ -323,7 +323,7 @@ out_put_disk:
 out_mempool:
mempool_destroy(vblk->pool);
 out_free_vq:
-   vdev->config->del_vq(vblk->vq);
+   vdev->config->del_vqs(vdev);
 out_free_vblk:
kfree(vblk);
 out:
@@ -344,7 +344,7 @@ static void virtblk_remove(struct virtio_device *vdev)
blk_cleanup_queue(vblk->disk->queue);
put_disk(vblk->disk);
mempool_destroy(vblk->pool);
-   vdev->config->del_vq(vblk->vq);
+   vdev->config->del_vqs(vdev);
kfree(vblk);
 }
 
diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
index 2aeafce..65ee25a 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -91,16 +91,18 @@ static struct hwrng virtio_hwrng = {
 
 static int virtrng_probe(struct virtio_device *vdev)
 {
+   vq_callback_t *callback[] = { random_recv_done };
+   const char *name[] = { "input" };
int err;
 
/* We expect a single virtqueue. */
-   vq = vdev->config->find_vq(vdev, 0, random_recv_done, "input");
-   if (IS_ERR(vq))
-   return PTR_ERR(vq);
+   err = vdev->config->find_vqs(vdev, 1, &vq, callback, name);
+   if (err)
+   return err;
 
err = hwrng_register(&virtio_hwrng);
if (err) {
-   vdev->config->del_vq(vq);
+   vdev->config->del_vqs(vdev);
return err;
}
 
@@ -112,7 +114,7 @@ static void virtrng_remove(struct virtio_device *vdev)
 {
vdev->config->reset(vdev);
hwrng_unregister(&virtio_hwrng);
-   vdev->config->del_vq(vq);
+   vdev->config->del_vqs(vdev);
 }
 
 static struct virtio_device_id id_table[] = {
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 58684e4..144d1b7 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -188,6 +188,9 @@ static void hvc_handle_input(struct virtqueue *vq)
  * Finally we put our input buffer in the input queue, ready to receive. */
 static int __devinit virtcons_probe(struct virtio_device *dev)
 {
+   vq_callback_t *callbacks[] = { hvc_handle_input, NULL};
+   const char *names[] = { "input", "output" };
+   struct virtqueue *vqs[2];
int err;
 
vdev = dev;
@@ -199,20 +202,15 @@ static int __devinit virtcons_probe(struct virtio_device 
*dev)
goto fail;
}
 
-   /* Find the input queue. */
+   /* Find the queues. */
/* FIXME: This is why we want to wean off hvc: we do nothing
 * when input comes in. */
-   in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input, "input");
-   if (IS_ERR(in_vq)) {
-   err = PTR_ERR(in_vq);
+   err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+   if (err)
goto free;
-   }
 
-   out_vq = vdev->config->find_vq(vdev, 1, NULL, "o

Re: [PATCH 1/3] virtio: find_vqs/del_vqs virtio operations

2009-05-12 Thread Michael S. Tsirkin
On Wed, May 13, 2009 at 12:00:02AM +0930, Rusty Russell wrote:
> On Tue, 12 May 2009 07:49:32 am Michael S. Tsirkin wrote:
> > This replaces find_vq/del_vq with find_vqs/del_vqs virtio operations,
> > and updates all drivers. This is needed for MSI support, because MSI
> > needs to know the total number of vectors upfront.
> 
> Sorry, is this not on top of my virtio_device vq linked list patch?

Not yet, working on that.

> Two other things: prefer vq_callback_t as the type name,

ok

> and perhaps consider 
> varargs for the callbacks (or would that be too horrible at the 
> implementation 
> end?)
> 
> Thanks,
> Rusty.

Ugh ... I think it will be. And AFAIK gcc generates a lot of code
for varargs - not something we want to do in each interrupt handler.

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH][KVM-AUTOTEST] Add custom install option for kvm_install

2009-05-12 Thread Mike Burns
From: Michael Burns 


Signed-off-by: Michael Burns 
---
 client/tests/kvm_runtest_2/control|   18 +-
 client/tests/kvm_runtest_2/kvm_install.py |   15 +++
 2 files changed, 32 insertions(+), 1 deletions(-)

diff --git a/client/tests/kvm_runtest_2/control 
b/client/tests/kvm_runtest_2/control
index fd68e94..d6e26bc 100644
--- a/client/tests/kvm_runtest_2/control
+++ b/client/tests/kvm_runtest_2/control
@@ -41,6 +41,19 @@ link_if_not_exist(pwd, qemu_img, 'qemu-img')
 
 # -
 # Build and install kvm
+#
+# Details of Install options
+#   Mode: custom
+#   Description:  install from custom install script
+#   Parameters needed:
+# install_script: 
+#   location of script relative to the kvm-runtest_2 directory.
+#   Script will be executed from test.bindir (generally kvm_runtest_2)
+#   parameters for the script can be passed either as environment variables
+#   in the params array below or in the definition of install_script.
+#   If they are passed as part of params, then they will be accessible as
+#   KVM_INSTALL_ in the OS Environment when your script runs.  
+#
 # -
 params = {
 "name": "kvm_install",
@@ -57,7 +70,10 @@ params = {
 
 ## Install from git
 "git_repo": 'git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git',
-"user_git_repo": 
'git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm-userspace.git'
+"user_git_repo": 
'git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm-userspace.git',
+
+## Custom install
+"install_script": 'custom_kvm_install.sh param1'
 }
 
 # Comment the job.run_test line if you do not want to install kvm on the host.
diff --git a/client/tests/kvm_runtest_2/kvm_install.py 
b/client/tests/kvm_runtest_2/kvm_install.py
index 8be5a93..234c77a 100755
--- a/client/tests/kvm_runtest_2/kvm_install.py
+++ b/client/tests/kvm_runtest_2/kvm_install.py
@@ -77,6 +77,21 @@ def run_kvm_install(test, params, env):
 elif install_mode == "localsrc":
 __install_kvm(test, srcdir)
 
+# install from custom script
+elif install_mode == "custom":
+install_script = params.get("install_script")
+script = os.path.join(test.bindir,install_script)
+if not install_script:
+message = "Custom script filename not specified"
+kvm_log.error(message)
+raise error.TestError, message
+for k in params.keys():
+ kvm_log.info("Adding KVM_INSTALL_%s to Environment" % (k))
+  os.putenv("KVM_INSTALL_%s" % (k), str(params[k]))
+   kvm_log.info("Running " + script + " to install kvm")
+os.system("cd %s; %s" % (test.bindir, script))
+   kvm_log.info("Completed %s" % (script))
+
 # invalid installation mode
 else:
 message = "Invalid installation mode: '%s'" % install_mode
-- 
1.6.0.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][KVM-AUTOTEST] Add custom install option for kvm_install

2009-05-12 Thread Mike Burns

Eduardo Pereira Habkost wrote:

Excerpts from Michael Goldish's message of Mon May 11 16:00:29 -0300 2009:
  

- "Eduardo Habkost"  wrote:




  

Also, can't the install rule be used on the cartesian configuration
file? In this case, the install parameters may be specified on a
different config rule.
  

It's certainly possible. We chose to put kvm_install in the control
file because currently it's the only test we run only once per job.
There is no natural place for kvm_install in kvm_tests.cfg because
everything else there gets multiplied.



True. I haven't thought of that.

  

For example, the Fedora project could use it like this:

cvs_server = "cvs.fedoraproject.org:/..."
variants:
  - CVSF10:
cvs_branch = F10
  - CVSF11:
cvs_branch = F11
  - CVSRawhide:
cvs_branch = devel


variants:
  - install:
type = kvm_install
mode = custom
install_command = "install_from_fedora_cvs.sh"
  - ...

I used Fedora CVS as an example, but the user may use anything we can
imagine, to store KVM code (or pointer to its), possibly having
different branches.
  

This can be done, though the order and syntax of the 'variants' blocks
should be different, more like:

variants:
  - kvm_install:
cvs_server = "cvs.fedoraproject.org:/..."
type = kvm_install
mode = custom
install_command = "install_from_fedora_cvs.sh"
  - everything_else_in_kvm_tests.cfg:

...

...

...

...



We could have something to make it easier to build test runs like ,
without having to nest the whole original file. But I can't think of a
good solution.


  

variants:
  - CVSF10:
kvm_install:
cvs_branch = F10
  - CVSF11:
kvm_install:
cvs_branch = F11
  - CVSRawhide:
kvm_install:
cvs_branch = devel

# test sets
variants:
- nightly:
...

That is, assuming you want to
- install KVM from F10 branch
- run all tests
- install KVM from F11 branch
- run all tests
- install KVM from devel branch
- run all tests

If you meant something different please correct me.



Yeah. I am not really used to the configuration file format, but that
was the idea.  :)
  


I've reworked the patch to use test.bindir and force it to run from 
kvm_runtest_2.  I also doubled checked that both parameters and 
variables defined in the params object are available to the script.  The 
updated past should go out shortly. 

Right now, nothing from the kvm_tests.cfg file is available.  I don't 
see an obvious way to do that, but we can put that into a separate patch 
if needed later.


Mike
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Enable IRQ windows after exception injection if there are pending virq

2009-05-12 Thread Gleb Natapov
On Tue, May 12, 2009 at 11:06:39PM +0800, Dong, Eddie wrote:
> 
> I didn't take many test since our PTS system stop working now due to KVM 
> userspace
> build changes. But since the logic is pretty simple, so I want to post here 
> to see comments.
> Thx, eddie
> 
> 
> 
> 
> If there is pending irq after an virtual exception is injected,
> KVM needs to enable IRQ window to trap back earlier once 
> the exception is handled.
> 
I already posted patch to do that http://patchwork.kernel.org/patch/21830/
Is you patch different?

> Signed-off-by: Eddie Dong 
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 308d8e9..f8ceaea 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3154,15 +3154,18 @@ static void inject_irq(struct kvm_vcpu *vcpu)
>   }
>  }
>  
> -static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run 
> *kvm_run)
> +static void inject_pending_irq(struct kvm_vcpu *vcpu)
>  {
> - bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
> - kvm_run->request_interrupt_window;
> -
>   if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
>   kvm_x86_ops->drop_interrupt_shadow(vcpu);
>  
>   inject_irq(vcpu);
> +}
> +
> +static void set_pending_virq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +{
> + bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
> + kvm_run->request_interrupt_window;
>  
>   /* enable NMI/IRQ window open exits if needed */
>   if (vcpu->arch.nmi_pending)
> @@ -3229,7 +3232,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, 
> struct kvm_run *kvm_run)
>   if (vcpu->arch.exception.pending)
>   __queue_exception(vcpu);
>   else
> - inject_pending_irq(vcpu, kvm_run);
> + inject_pending_irq(vcpu);
> +
> + set_pending_virq(vcpu, kvm_run);
>  
>   if (kvm_lapic_enabled(vcpu)) {
>   if (!vcpu->arch.apic->vapic_addr)


--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/6] kvm-s390: use hrtimer for clock wakeup from idle - v2

2009-05-12 Thread ehrhardt
From: Christian Borntraeger 

This patch reworks the s390 clock comparator wakeup to hrtimer. The clock
comparator is a per-cpu value that is compared against the TOD clock. If
ckc <= TOD an external interrupt 1004 is triggered. Since the clock comparator
and the TOD clock have a much higher resolution than jiffies we should use
hrtimers to trigger the wakeup. This speeds up guest nanosleep for small
values.

Since hrtimers callbacks run in hard-irq context, I added a tasklet to do
the actual work with enabled interrupts. 

Signed-off-by: Christian Borntraeger 
Signed-off-by: Carsten Otte 
Signed-off-by: Christian Ehrhardt 
---
 arch/s390/include/asm/kvm_host.h |5 -
 arch/s390/kvm/interrupt.c|   33 +++--
 arch/s390/kvm/kvm-s390.c |7 +--
 arch/s390/kvm/kvm-s390.h |4 +++-
 4 files changed, 35 insertions(+), 14 deletions(-)

Index: kvm/arch/s390/include/asm/kvm_host.h
===
--- kvm.orig/arch/s390/include/asm/kvm_host.h
+++ kvm/arch/s390/include/asm/kvm_host.h
@@ -13,6 +13,8 @@
 
 #ifndef ASM_KVM_HOST_H
 #define ASM_KVM_HOST_H
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -210,7 +212,8 @@ struct kvm_vcpu_arch {
s390_fp_regs  guest_fpregs;
unsigned int  guest_acrs[NUM_ACRS];
struct kvm_s390_local_interrupt local_int;
-   struct timer_list ckc_timer;
+   struct hrtimerckc_timer;
+   struct tasklet_struct tasklet;
union  {
cpuid_t   cpu_id;
u64   stidp_data;
Index: kvm/arch/s390/kvm/interrupt.c
===
--- kvm.orig/arch/s390/kvm/interrupt.c
+++ kvm/arch/s390/kvm/interrupt.c
@@ -12,6 +12,8 @@
 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include "kvm-s390.h"
@@ -361,12 +363,10 @@ int kvm_s390_handle_wait(struct kvm_vcpu
return 0;
}
 
-   sltime = (vcpu->arch.sie_block->ckc - now) / (0xf424ul / HZ) + 1;
+   sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9;
 
-   vcpu->arch.ckc_timer.expires = jiffies + sltime;
-
-   add_timer(&vcpu->arch.ckc_timer);
-   VCPU_EVENT(vcpu, 5, "enabled wait timer:%llx jiffies", sltime);
+   hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , 
HRTIMER_MODE_REL);
+   VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", 
sltime);
 no_timer:
spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
@@ -389,21 +389,34 @@ no_timer:
remove_wait_queue(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
-   del_timer(&vcpu->arch.ckc_timer);
+   hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
return 0;
 }
 
-void kvm_s390_idle_wakeup(unsigned long data)
+void kvm_s390_tasklet(unsigned long parm)
 {
-   struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+   struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
 
-   spin_lock_bh(&vcpu->arch.local_int.lock);
+   spin_lock(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.timer_due = 1;
if (waitqueue_active(&vcpu->arch.local_int.wq))
wake_up_interruptible(&vcpu->arch.local_int.wq);
-   spin_unlock_bh(&vcpu->arch.local_int.lock);
+   spin_unlock(&vcpu->arch.local_int.lock);
 }
 
+/*
+ * low level hrtimer wake routine. Because this runs in hardirq context
+ * we schedule a tasklet to do the real work.
+ */
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
+{
+   struct kvm_vcpu *vcpu;
+
+   vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+   tasklet_schedule(&vcpu->arch.tasklet);
+
+   return HRTIMER_NORESTART;
+}
 
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
Index: kvm/arch/s390/kvm/kvm-s390.c
===
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -287,8 +288,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu 
vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
vcpu->arch.sie_block->ecb   = 2;
vcpu->arch.sie_block->eca   = 0xC1002001U;
-   setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
-(unsigned long) vcpu);
+   hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+   tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
+(unsigned long) vcpu);
+   vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(&vcpu->arch.cpu_id);
vcpu->arch.cpu_id.version = 0xff;
return 0;
Index: kvm/arch/s390/kvm/kvm-s390.h
===

[PATCH 3/6] kvm-s390: optimize float int lock: spin_lock_bh --> spin_lock

2009-05-12 Thread ehrhardt
From: Christian Borntraeger 

The floating interrupt lock is only taken in process context. We can
replace all spin_lock_bh with standard spin_lock calls.

Signed-off-by: Christian Borntraeger 
Signed-off-by: Christian Ehrhardt 
---
 arch/s390/kvm/interrupt.c |   20 ++--
 arch/s390/kvm/kvm-s390.c  |4 ++--
 arch/s390/kvm/priv.c  |4 ++--
 arch/s390/kvm/sigp.c  |   16 
 4 files changed, 22 insertions(+), 22 deletions(-)

Index: kvm/arch/s390/kvm/interrupt.c
===
--- kvm.orig/arch/s390/kvm/interrupt.c
+++ kvm/arch/s390/kvm/interrupt.c
@@ -301,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcp
}
 
if ((!rc) && atomic_read(&fi->active)) {
-   spin_lock_bh(&fi->lock);
+   spin_lock(&fi->lock);
list_for_each_entry(inti, &fi->list, list)
if (__interrupt_is_deliverable(vcpu, inti)) {
rc = 1;
break;
}
-   spin_unlock_bh(&fi->lock);
+   spin_unlock(&fi->lock);
}
 
if ((!rc) && (vcpu->arch.sie_block->ckc <
@@ -368,7 +368,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , 
HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", 
sltime);
 no_timer:
-   spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+   spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
add_wait_queue(&vcpu->arch.local_int.wq, &wait);
while (list_empty(&vcpu->arch.local_int.list) &&
@@ -377,18 +377,18 @@ no_timer:
!signal_pending(current)) {
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_bh(&vcpu->arch.local_int.lock);
-   spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+   spin_unlock(&vcpu->arch.local_int.float_int->lock);
vcpu_put(vcpu);
schedule();
vcpu_load(vcpu);
-   spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+   spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
}
__unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING);
remove_wait_queue(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock);
-   spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+   spin_unlock(&vcpu->arch.local_int.float_int->lock);
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
return 0;
 }
@@ -455,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts
if (atomic_read(&fi->active)) {
do {
deliver = 0;
-   spin_lock_bh(&fi->lock);
+   spin_lock(&fi->lock);
list_for_each_entry_safe(inti, n, &fi->list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list);
@@ -466,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts
}
if (list_empty(&fi->list))
atomic_set(&fi->active, 0);
-   spin_unlock_bh(&fi->lock);
+   spin_unlock(&fi->lock);
if (deliver) {
__do_deliver_interrupt(vcpu, inti);
kfree(inti);
@@ -531,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 
mutex_lock(&kvm->lock);
fi = &kvm->arch.float_int;
-   spin_lock_bh(&fi->lock);
+   spin_lock(&fi->lock);
list_add_tail(&inti->list, &fi->list);
atomic_set(&fi->active, 1);
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
@@ -548,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
if (waitqueue_active(&li->wq))
wake_up_interruptible(&li->wq);
spin_unlock_bh(&li->lock);
-   spin_unlock_bh(&fi->lock);
+   spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
return 0;
 }
Index: kvm/arch/s390/kvm/kvm-s390.c
===
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -323,11 +323,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
spin_lock_init(&vcpu->arch.local_int.lock);
INIT_LIST_HEAD(&vcpu->arch.local_int.list);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
-   spin_lock_bh(&kvm->arch.float_int.lock);
+   spin_lock(&kvm->arch.float_int.lock);
kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
init_waitqueue_head(&vcpu->arch.local_int.wq);
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_bloc

[PATCH 6/6] kvm-s390: Verify memory in kvm run

2009-05-12 Thread ehrhardt
From: Carsten Otte 

This check verifies that the guest we're trying to run in KVM_RUN
has some memory assigned to it. It enters an endless exception
loop if this is not the case.

Reported-by: Mijo Safradin 
Signed-off-by: Carsten Otte 
Signed-off-by: Christian Ehrhardt 
---
 arch/s390/kvm/kvm-s390.c |6 ++
 1 file changed, 6 insertions(+)

Index: kvm/arch/s390/kvm/kvm-s390.c
===
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -478,6 +478,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
 
vcpu_load(vcpu);
 
+   /* verify, that memory has been registered */
+   if (!vcpu->kvm->arch.guest_memsize) {
+   vcpu_put(vcpu);
+   return -EINVAL;
+   }
+
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/6] kvm-s390: Sanity check on validity intercept

2009-05-12 Thread ehrhardt
From: Carsten Otte 

This patch adds a sanity check for the content of the guest
prefix register content before faulting in the cpu lowcore
that it refers to. The guest might end up in an endless loop
where SIE complains about missing lowcore with incorrect
content of the prefix register without this fix.

Reported-by: Mijo Safradin 
Signed-off-by: Carsten Otte 
Signed-off-by: Christian Ehrhardt 
---
 arch/s390/kvm/intercept.c |   28 ++--
 1 file changed, 18 insertions(+), 10 deletions(-)

Index: kvm/arch/s390/kvm/intercept.c
===
--- kvm.orig/arch/s390/kvm/intercept.c
+++ kvm/arch/s390/kvm/intercept.c
@@ -154,17 +154,25 @@ static int handle_stop(struct kvm_vcpu *
 static int handle_validity(struct kvm_vcpu *vcpu)
 {
int viwhy = vcpu->arch.sie_block->ipb >> 16;
+   int rc;
+
vcpu->stat.exit_validity++;
-   if (viwhy == 0x37) {
-   fault_in_pages_writeable((char __user *)
-vcpu->kvm->arch.guest_origin +
-vcpu->arch.sie_block->prefix,
-PAGE_SIZE);
-   return 0;
-   }
-   VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
-  viwhy);
-   return -ENOTSUPP;
+   if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
+   <= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
+   rc = fault_in_pages_writeable((char __user *)
+vcpu->kvm->arch.guest_origin +
+vcpu->arch.sie_block->prefix,
+2*PAGE_SIZE);
+   if (rc)
+   /* user will receive sigsegv, exit to user */
+   rc = -ENOTSUPP;
+   } else
+   rc = -ENOTSUPP;
+
+   if (rc)
+   VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
+  viwhy);
+   return rc;
 }
 
 static int handle_instruction(struct kvm_vcpu *vcpu)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/6] kvm-s390: collection of kvm-s390 fixes - v3

2009-05-12 Thread ehrhardt
From: Christian Ehrhardt 

*updates in v3*
- fix memory slot vs. run uses trylock to avoid a potential livelock
- fix memory slot vs. run checks if it is the first and only memslot registered

*updates in v2*
- hrtimer wakeup use a more accurate calculation
- unlink vcpu uses smb_mb so the pointer is really zero when the page is freed

This is a collection of fixes for kvm-s390 that originate from several tests
made in the last few months. They are now tested a while and should be ready
to be merged.

Patches included:
[PATCH 1/6] kvm-s390: Fix memory slot versus run' - v3
[PATCH 2/6] kvm-s390: use hrtimer for clock wakeup from idle' - v2
[PATCH 3/6] kvm-s390: optimize float int lock: spin_lock_bh --> spin_lock'
[PATCH 4/6] kvm-s390: Unlink vcpu on destroy' - v2
[PATCH 5/6] kvm-s390: Sanity check on validity intercept'
[PATCH 6/6] kvm-s390: Verify memory in kvm run'

Overall-Diffstat:
 arch/s390/include/asm/kvm_host.h |5 ++-
 arch/s390/kvm/intercept.c|   28 +++--
 arch/s390/kvm/interrupt.c|   53 
 arch/s390/kvm/kvm-s390.c |   63 ---
 arch/s390/kvm/kvm-s390.h |4 +-
 arch/s390/kvm/priv.c |4 +-
 arch/s390/kvm/sigp.c |   16 -
 7 files changed, 120 insertions(+), 53 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/6]: kvm-s390: Unlink vcpu on destroy - v2

2009-05-12 Thread ehrhardt
From: Carsten Otte 

This patch makes sure we do unlink a vcpu's sie control block
from the system control area in kvm_arch_vcpu_destroy. This
prevents illegal accesses to the sie control block from other
virtual cpus after free.

Reported-by: Mijo Safradin 
Signed-off-by: Carsten Otte 
Signed-off-by: Christian Ehrhardt 
---
 kvm-s390.c |   10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

Index: kvm/arch/s390/kvm/kvm-s390.c
===
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -195,6 +195,10 @@ out_nokvm:
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+   if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
+   (__u64) vcpu->arch.sie_block)
+   vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
+   smp_mb();
free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu);
kfree(vcpu);
@@ -307,8 +311,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
 
vcpu->arch.sie_block->icpua = id;
BUG_ON(!kvm->arch.sca);
-   BUG_ON(kvm->arch.sca->cpu[id].sda);
-   kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+   if (!kvm->arch.sca->cpu[id].sda)
+   kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+   else
+   BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/6] kvm-s390: Fix memory slot versus run - v3

2009-05-12 Thread ehrhardt
From: Carsten Otte 

This patch fixes an incorrectness in the kvm backend for s390.
In case virtual cpus are being created before the corresponding
memory slot is being registered, we need to update the sie
control blocks for the virtual cpus.

*updates in v3*
In consideration of the s390 memslot constraints locking was changed
to trylock. These locks should never be held, as vcpu's can't run without
the single memslot we just assign when running this code. To ensure this
never deadlocks in case other code changes the code uses trylocks and bail
out if it can't get all locks.

Additionally most of the discussed special conditions for s390 like
only one memslot and no user_alloc are now checked for validity in
kvm_arch_set_memory_region.

Reported-by: Mijo Safradin 
Signed-off-by: Carsten Otte 
Signed-off-by: Christian Ehrhardt 
---
 kvm-s390.c |   36 +++-
 1 file changed, 31 insertions(+), 5 deletions(-)

Index: kvm/arch/s390/kvm/kvm-s390.c
===
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -657,6 +657,8 @@ int kvm_arch_set_memory_region(struct kv
struct kvm_memory_slot old,
int user_alloc)
 {
+   int i;
+
/* A few sanity checks. We can have exactly one memory slot which has
   to start at guest virtual zero and which has to be located at a
   page boundary in userland and which has to end at a page boundary.
@@ -664,7 +666,7 @@ int kvm_arch_set_memory_region(struct kv
   vmas. It is okay to mmap() and munmap() stuff in this slot after
   doing this call at any time */
 
-   if (mem->slot)
+   if (mem->slot || kvm->arch.guest_memsize)
return -EINVAL;
 
if (mem->guest_phys_addr)
@@ -676,15 +678,39 @@ int kvm_arch_set_memory_region(struct kv
if (mem->memory_size & (PAGE_SIZE - 1))
return -EINVAL;
 
+   if (!user_alloc)
+   return -EINVAL;
+
+   /* lock all vcpus */
+   for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+   if (!kvm->vcpus[i])
+   continue;
+   if (!mutex_trylock(&kvm->vcpus[i]->mutex))
+   goto fail_out;
+   }
+
kvm->arch.guest_origin = mem->userspace_addr;
kvm->arch.guest_memsize = mem->memory_size;
 
-   /* FIXME: we do want to interrupt running CPUs and update their memory
-  configuration now to avoid race conditions. But hey, changing the
-  memory layout while virtual CPUs are running is usually bad
-  programming practice. */
+   /* update sie control blocks, and unlock all vcpus */
+   for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+   if (kvm->vcpus[i]) {
+   kvm->vcpus[i]->arch.sie_block->gmsor =
+   kvm->arch.guest_origin;
+   kvm->vcpus[i]->arch.sie_block->gmslm =
+   kvm->arch.guest_memsize +
+   kvm->arch.guest_origin +
+   VIRTIODESCSPACE - 1ul;
+   mutex_unlock(&kvm->vcpus[i]->mutex);
+   }
+   }
 
return 0;
+
+fail_out:
+   for (; i >= 0; i--)
+   mutex_unlock(&kvm->vcpus[i]->mutex);
+   return -EINVAL;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Problem doing pci passthrough of the network card without VT-d

2009-05-12 Thread Passera, Pablo R
Hi List,
I am having problems to do pci passthrough to a network card without 
using VT-d. The card is present in the guest but with a different model (Intel 
Corporation 82801I Gigabit Ethernet Controller (rev 2)) and it does not work. 
The qemu line that I used is:

./devel/bin/qemu-system-x86_64 -hda ./dm.img -m 256 -pcidevice 
host=00:19.0,dma=none -net none

Before running qemu I did

echo "8086 294c" > /sys/bus/pci/drivers/pci-stub/new_id
echo :00:19.0 > /sys/bus/pci/drivers/e1000e/unbind
echo :00:19.0 > /sys/bus/pci/drivers/pci-stub/bind

This is the lspci -tv output

-[:00]-+-00.0  Intel Corporation 82X38/X48 Express DRAM Controller
   +-01.0-[:01]00.0  nVidia Corporation G80 [GeForce 8800 GTX]
   +-19.0  Intel Corporation 82566DC-2 Gigabit Network Connection
   +-1a.0  Intel Corporation 82801I (ICH9 Family) USB UHCI Controller #4
   +-1a.1  Intel Corporation 82801I (ICH9 Family) USB UHCI Controller #5
   +-1a.2  Intel Corporation 82801I (ICH9 Family) USB UHCI Controller #6
   +-1a.7  Intel Corporation 82801I (ICH9 Family) USB2 EHCI Controller 
#2
   +-1b.0  Intel Corporation 82801I (ICH9 Family) HD Audio Controller
   +-1c.0-[:02]--
   +-1c.4-[:03]00.0  Marvell Technology Group Ltd. 88SE6121 
SATA II Controller
   +-1d.0  Intel Corporation 82801I (ICH9 Family) USB UHCI Controller #1
   +-1d.1  Intel Corporation 82801I (ICH9 Family) USB UHCI Controller #2
   +-1d.2  Intel Corporation 82801I (ICH9 Family) USB UHCI Controller #3
   +-1d.7  Intel Corporation 82801I (ICH9 Family) USB2 EHCI Controller 
#1
   +-1e.0-[:04]03.0  Texas Instruments TSB43AB22/A 
IEEE-1394a-2000 Controller (PHY/Link)
   +-1f.0  Intel Corporation 82801IR (ICH9R) LPC Interface Controller
   +-1f.2  Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 4 port SATA 
IDE Controller
   +-1f.3  Intel Corporation 82801I (ICH9 Family) SMBus Controller
   \-1f.5  Intel Corporation 82801I (ICH9 Family) 2 port SATA IDE 
Controller


I am getting the following error in host dmesg

e1000e :00:19.0: PCI INT A disabled
pci-stub :00:19.0: PCI INT A -> GSI 20 (level, low) -> IRQ 20
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
DMAR:[DMA Read] Request device [00:19.0] fault addr baee000
DMAR:[fault reason 02] Present bit in context entry is clear
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
DMAR:[DMA Read] Request device [00:19.0] fault addr b90c000
DMAR:[fault reason 02] Present bit in context entry is clear

And in a second run I got

e1000e :00:19.0: PCI INT A disabled
pci-stub :00:19.0: PCI INT A -> GSI 20 (level, low) -> IRQ 20
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
DMAR:[DMA Read] Request device [00:19.0] fault addr ba8
DMAR:[fault reason 06] PTE Read access is not set
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
pci-stub :00:19.0: irq 29 for MSI/MSI-X
DMAR:[DMA Read] Request device [00:19.0] fault addr baa4000
DMAR:[fault reason 06] PTE Read access is not set

It does work if I enable VT-d, even though the network card is recognized with 
a different model. However, I need this to work without VT-d. Which could be 
the problem here? Is this supported?

Thanks a lot,
Pablo


More information and logs...

Kernel, kvm and network driver versions:

Host Kernel 2.6.29.2
Guest Kernel 2.6.23.1-42.fc8
KVM-85
Network card driver (guest and host) e1000e 0.5.18.3

Device assignment log
init_assigned_device: Registering real physical device 00:19.0 (bus=0 dev=19 
func=0)
get_real_device: region 0 size 131072 start 0x9320 type 512 resource_fd 12
get_real_device: region 1 size 4096 start 0x93224000 type 512 resource_fd 13
get_real_device: region 2 size 32 start 0x30e0 type 256 resource_fd 0
assigned_dev_pci_read_config: (4.0): address= val=0x8086 len=2
assigned_dev_pci_read_config: (4.0): address=0002 val=0x294c len=2
assigned_dev_pci_read_config: (4.0): address= val=0x8086 len=2
assigned_dev_pci_read_config: (4.0): address=0002 val=0x294c len=2
assigned_dev_pci_read_config: (4.0): address= val=0x8086 len=2
assigned_dev_pci_read_config: (4.0): address=0002 val=0x294c len=2
assigned_dev_pci_read_config: (4.0): address=000a val=0x0200 len=2
assigned_dev_pci_r

[PATCH 1/2] replace drop_interrupt_shadow by set_interrupt_shadow

2009-05-12 Thread Glauber Costa
This patch replaces drop_interrupt_shadow with the more
general set_interrupt_shadow, that can either drop or raise
it, depending on its parameter.

Signed-off-by: Glauber Costa 
CC: H. Peter Anvin 
CC: Avi Kivity 
CC: Gleb Natapov 
---
 arch/x86/include/asm/kvm_host.h|3 +-
 arch/x86/include/asm/kvm_x86_emulate.h |3 ++
 arch/x86/kvm/svm.c |   32 +++-
 arch/x86/kvm/vmx.c |   49 +--
 arch/x86/kvm/x86.c |2 +-
 5 files changed, 63 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8e680c3..3d933cf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -510,6 +510,8 @@ struct kvm_x86_ops {
void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+   void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
+   u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr);
void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
@@ -521,7 +523,6 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-   void (*drop_interrupt_shadow)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h 
b/arch/x86/include/asm/kvm_x86_emulate.h
index 6a15973..be40d6e 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -143,6 +143,9 @@ struct decode_cache {
struct fetch_cache fetch;
 };
 
+#define X86_SHADOW_INT_MOV_SS  1
+#define X86_SHADOW_INT_STI 2
+
 struct x86_emulate_ctxt {
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ef43a18..ae29a95 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -202,6 +202,27 @@ static int is_external_interrupt(u32 info)
return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
 }
 
+static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+   u32 ret = 0;
+
+   if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
+   ret |= X86_SHADOW_INT_STI && X86_SHADOW_INT_MOV_SS;
+   return ret & mask;
+}
+
+static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (mask == 0)
+   svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+   else
+   svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
+
+}
+
 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -215,7 +236,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
   __func__, kvm_rip_read(vcpu), svm->next_rip);
 
kvm_rip_write(vcpu, svm->next_rip);
-   svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+   svm_set_interrupt_shadow(vcpu, 0);
 }
 
 static int has_svm(void)
@@ -2229,12 +2250,6 @@ static void pre_svm_run(struct vcpu_svm *svm)
new_asid(svm, svm_data);
 }
 
-static void svm_drop_interrupt_shadow(struct kvm_vcpu *vcpu)
-{
-   struct vcpu_svm *svm = to_svm(vcpu);
-   svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
-}
-
 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -2637,6 +2652,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
+   .set_interrupt_shadow = svm_set_interrupt_shadow,
+   .get_interrupt_shadow = svm_get_interrupt_shadow,
.patch_hypercall = svm_patch_hypercall,
.set_irq = svm_set_irq,
.set_nmi = svm_inject_nmi,
@@ -2646,7 +2663,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
-   .drop_interrupt_shadow = svm_drop_interrupt_shadow,
 
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e8a5649..c28baf8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -736,23 +736,45 @@ static void vmx_set_rflags(struct kvm_

[PATCH 0/2] Deal with shadow interrupts after emulated instructions

2009-05-12 Thread Glauber Costa
Same as before, addressing avi's comments


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] deal with interrupt shadow state for emulated instruction

2009-05-12 Thread Glauber Costa
we currently unblock shadow interrupt state when we skip an instruction,
but failing to do so when we actually emulate one. This blocks interrupts
in key instruction blocks, in particular sti; hlt; sequences

If the instruction emulated is an sti, we have to block shadow interrupts.
The same goes for mov ss. pop ss also needs it, but we don't currently
emulate it.

Without this patch, I cannot boot gpxe option roms at vmx machines.
This is described at https://bugzilla.redhat.com/show_bug.cgi?id=494469

Signed-off-by: Glauber Costa 
CC: H. Peter Anvin 
CC: Avi Kivity 
CC: Gleb Natapov 
---
 arch/x86/include/asm/kvm_x86_emulate.h |3 +++
 arch/x86/kvm/x86.c |6 +-
 arch/x86/kvm/x86_emulate.c |   26 +-
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_x86_emulate.h 
b/arch/x86/include/asm/kvm_x86_emulate.h
index be40d6e..b7ed2c4 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -155,6 +155,9 @@ struct x86_emulate_ctxt {
int mode;
u32 cs_base;
 
+   /* interruptibility state, as a result of execution of STI or MOV SS */
+   int interruptibility;
+
/* decode cache */
struct decode_cache decode;
 };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3d8fcc5..b45baff 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2362,7 +2362,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
u16 error_code,
int emulation_type)
 {
-   int r;
+   int r, shadow_mask;
struct decode_cache *c;
 
kvm_clear_exception_queue(vcpu);
@@ -2416,6 +2416,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
}
 
r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
+   shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
+
+   if (r == 0)
+   kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
 
if (vcpu->arch.pio.string)
return EMULATE_DO_MMIO;
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d2664fc..b847523 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1372,6 +1372,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
int io_dir_in;
int rc = 0;
 
+   ctxt->interruptibility = 0;
+
/* Shadow copy of register state. Committed on successful emulation.
 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
 * modify them.
@@ -1618,6 +1620,15 @@ special_insn:
int err;
 
sel = c->src.val;
+   if (c->modrm_reg == VCPU_SREG_SS) {
+   u32 int_shadow =
+   kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu,
+ 
X86_SHADOW_INT_MOV_SS);
+   /* See sti emulation for an explanation of this */
+   if (!(int_shadow & X86_SHADOW_INT_MOV_SS))
+   ctxt->interruptibility = X86_SHADOW_INT_MOV_SS;
+   }
+
if (c->modrm_reg <= 5) {
type_bits = (c->modrm_reg == 1) ? 9 : 1;
err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
@@ -1846,10 +1857,23 @@ special_insn:
ctxt->eflags &= ~X86_EFLAGS_IF;
c->dst.type = OP_NONE;  /* Disable writeback. */
break;
-   case 0xfb: /* sti */
+   case 0xfb: { /* sti */
+   u32 int_shadow =
+   kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu,
+ X86_SHADOW_INT_STI);
+   /*
+* an sti; sti; sequence only disable interrupts for the first
+* instruction. So, if the last instruction, be it emulated or
+* not, left the system with the INT_STI flag enabled, it
+* means that the last instruction is an sti. We should not
+* leave the flag on in this case
+*/
+   if (!(int_shadow & X86_SHADOW_INT_STI))
+   ctxt->interruptibility = X86_SHADOW_INT_STI;
ctxt->eflags |= X86_EFLAGS_IF;
c->dst.type = OP_NONE;  /* Disable writeback. */
break;
+   }
case 0xfc: /* cld */
ctxt->eflags &= ~EFLG_DF;
c->dst.type = OP_NONE;  /* Disable writeback. */
-- 
1.5.6.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Enable IRQ windows after exception injection if there are pending virq

2009-05-12 Thread Dong, Eddie

I didn't take many test since our PTS system stop working now due to KVM 
userspace
build changes. But since the logic is pretty simple, so I want to post here to 
see comments.
Thx, eddie




If there is pending irq after an virtual exception is injected,
KVM needs to enable IRQ window to trap back earlier once 
the exception is handled.

Signed-off-by: Eddie Dong 

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 308d8e9..f8ceaea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3154,15 +3154,18 @@ static void inject_irq(struct kvm_vcpu *vcpu)
}
 }
 
-static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void inject_pending_irq(struct kvm_vcpu *vcpu)
 {
-   bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-   kvm_run->request_interrupt_window;
-
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
kvm_x86_ops->drop_interrupt_shadow(vcpu);
 
inject_irq(vcpu);
+}
+
+static void set_pending_virq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+   bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+   kvm_run->request_interrupt_window;
 
/* enable NMI/IRQ window open exits if needed */
if (vcpu->arch.nmi_pending)
@@ -3229,7 +3232,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
if (vcpu->arch.exception.pending)
__queue_exception(vcpu);
else
-   inject_pending_irq(vcpu, kvm_run);
+   inject_pending_irq(vcpu);
+
+   set_pending_virq(vcpu, kvm_run);
 
if (kvm_lapic_enabled(vcpu)) {
if (!vcpu->arch.apic->vapic_addr)

irq_windows.patch
Description: irq_windows.patch


Re: PowerPC page faults

2009-05-12 Thread Hollis Blanchard
On Monday 11 May 2009 17:17:53 Anthony Liguori wrote:
> Hollis Blanchard wrote:
> > On Mon, 2009-05-11 at 12:54 -0500, Anthony Liguori wrote:
> >   
> >> For future ppcemb's, do you know if there is an equivalent of a PF exit 
> >> type?  Does the hardware squirrel away the faulting address somewhere 
> >> and set PC to the start of the instruction?  If so, no guest memory load 
> >> should be required.
> >> 
> >
> > Ahhh... you're saying that the address itself (or offset within a page)
> > is the hypercall token, totally separate from IO emulation, and so we
> > could ignore the access size.
> 
> No, I'm not being nearly that clever.

I started thinking you weren't, but then I realized you must be working 
several levels above me and just forgot to explain... :)

> I was suggesting that hardware virtualization support in future PPC 
> systems might contain a mechanism to intercept a guest-mode TLB miss.  
> If it did, it would be useful if that guest-mode TLB miss "exit" 
> contained extra information somewhere that included the PC of the 
> faulting instruction, the address response for the fault, and enough 
> information to handle the fault without instruction decoding.
> 
> I assume all MMIO comes from the same set of instructions in PPC?  
> Something like ld/st instructions?  Presumably all you need to know from 
> instruction decoding is the destination register and whether it was a 
> read or write?

In addition to register source/target, we also need to know the access size of 
the memory reference. That information isn't stuffed into registers for us by 
hardware, and it's not in published specifications for future hardware.

Now, if you wanted to define a hypercall as a byte access within a particular 
4K page, where the register source/target is ignored, that could be 
interesting, but I don't know if that's relevant to this "hypercall vs MMIO" 
discussion.

-- 
Hollis Blanchard
IBM Linux Technology Center
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] KVM: Fix potentially recursively get kvm lock

2009-05-12 Thread Marcelo Tosatti
On Tue, May 12, 2009 at 10:13:36PM +0800, Yang, Sheng wrote:
> > > + mutex_unlock(&kvm->lock);
> >
> > assigned_dev list is protected by kvm->lock. So you could have another
> > ioctl adding to it at the same time you're searching.
> 
> Oh, yes... My fault... 
> 
> > Could either have a separate kvm->assigned_devs_lock, to protect
> > kvm->arch.assigned_dev_head (users are ioctls that manipulate it), or
> > change the IRQ injection to use a separate spinlock, kill the workqueue
> > and call kvm_set_irq from the assigned device interrupt handler.
> 
> Peferred the latter, though needs more work. But the only reason for put a 
> workqueue here is because kvm->lock is a mutex? I can't believe... If so, I 
> think we had made a big mistake - we have to fix all kinds of racy problem 
> caused by this, but finally find it's unnecessary... 

One issue is that kvm_set_irq can take too long while interrupts are
blocked, and you'd have to disable interrupts in other contexes that
inject interrupts (say qemu->ioctl(SET_INTERRUPT)->...->), so all i can
see is a tradeoff.



But the interrupt injection path seems to be pretty short and efficient
to happen in host interrupt context.



Avi, Gleb?

> Maybe another reason is kvm_kick_vcpu(), but have already fixed by you.

Note you tested the spinlock_irq patch with GigE and there was no
significant performance regression right?

> 
> Continue to check the code...
> 
> -- 
> regards
> Yang, Sheng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] virtio: find_vqs/del_vqs virtio operations

2009-05-12 Thread Rusty Russell
On Tue, 12 May 2009 07:49:32 am Michael S. Tsirkin wrote:
> This replaces find_vq/del_vq with find_vqs/del_vqs virtio operations,
> and updates all drivers. This is needed for MSI support, because MSI
> needs to know the total number of vectors upfront.

Sorry, is this not on top of my virtio_device vq linked list patch?

Two other things: prefer vq_callback_t as the type name, and perhaps consider 
varargs for the callbacks (or would that be too horrible at the implementation 
end?)

Thanks,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] make migration work again

2009-05-12 Thread Glauber Costa
Due to a small messup in version checking, migration was not working.
fix it.

Signed-off-by: Glauber Costa 
---
 target-i386/machine.c |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/target-i386/machine.c b/target-i386/machine.c
index 7f75d31..730f364 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -193,9 +193,6 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
 uint16_t fpus, fpuc, fptag, fpregs_format;
 int32_t a20_mask;
 
-if (version_id != 3 && version_id != 4 && version_id != 5
-&& version_id != 6 && version_id != 7 && version_id != 8)
-return -EINVAL;
 /* KVM cannot accept migrations from QEMU today */
 if (version_id != 9)
 return -EINVAL;
-- 
1.6.0.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] Userspace changes for KVM HPET (v3)

2009-05-12 Thread Beth Kon

Avi Kivity wrote:

Beth Kon wrote:

Signed-off-by: Beth Kon 


diff --git a/hw/hpet.c b/hw/hpet.c
index c7945ec..100abf5 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -30,6 +30,7 @@
 #include "console.h"
 #include "qemu-timer.h"
 #include "hpet_emul.h"
+#include "qemu-kvm.h"
 
 //#define HPET_DEBUG

 #ifdef HPET_DEBUG
@@ -48,6 +49,28 @@ uint32_t hpet_in_legacy_mode(void)
 return 0;
 }
 
+static void hpet_legacy_enable(void)

+{
+if (qemu_kvm_pit_in_kernel()) {
+kvm_kpit_disable();
+dprintf("qemu: hpet disabled kernel pit\n");
+} else {
+hpet_pit_disable();
+dprintf("qemu: hpet disabled userspace pit\n");
+}
+}
+
+static void hpet_legacy_disable(void)
+{
+if (qemu_kvm_pit_in_kernel()) {
+kvm_kpit_enable();
+dprintf("qemu: hpet enabled kernel pit\n");
+} else {
+hpet_pit_enable();
+dprintf("qemu: hpet enabled userspace pit\n");
+}
+}
  
I think it's better to move these into hpet_pit_enable() and 
hpet_pit_enable().  This avoids changing the calls below, and puts pit 
stuff in i8254.c instead of hpet.c.


Might also need to be called from hpet_load(); probably a problem in 
upstream as well.


My assumption about hpet_load was that the correct pit state would be 
established via pit_load (since all saves/loads are done together).  But 
when I wrote this, I was thinking only about the userspace pit (for 
qemu). I'm not sure how the "load" concept applies to kernel state.  Do 
I need to explicitly re-enable or disable the kernel pit during load?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH, RFC] virtio_blk: add cache flush command

2009-05-12 Thread Christian Borntraeger
Am Tuesday 12 May 2009 15:54:14 schrieb Rusty Russell:
> On Mon, 11 May 2009 06:09:08 pm Christoph Hellwig wrote:
> > Do we need a new feature flag for this command or can we expect that
> > all previous barrier support was buggy enough anyway?
> 
> You mean reuse the VIRTIO_BLK_F_BARRIER for this as well?  Seems fine.
> 
> AFAIK only lguest offered that, and lguest has no ABI.  Best would be to 
> implement VIRTIO_BLK_T_FLUSH as well; it's supposed to be demo code, and it 
> should be easy).

It is also used by kuli 
(http://www.ibm.com/developerworks/linux/linux390/kuli.html)
and kuli used fdatasync. Since kuli is on offical webpages it takes a while
to update that code. When you reuse the VIRTIO_BLK_F_BARRIER flag, that would 
trigger some VIRTIO_BLK_T_FLUSH commands sent to the kuli host, right?
I think the if/else logic of kuli would interpret that as a read requestI 
am voting for a new feature flag :-)

Christian



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] KVM: Fix potentially recursively get kvm lock

2009-05-12 Thread Yang, Sheng
On Tuesday 12 May 2009 19:55:24 Marcelo Tosatti wrote:
> On Tue, May 12, 2009 at 05:32:09PM +0800, Sheng Yang wrote:
> > kvm_vm_ioctl_deassign_dev_irq() would potentially recursively get
> > kvm->lock, because it called kvm_deassigned_irq() which implicit hold
> > kvm->lock by calling deassign_host_irq().
> >
> > Fix it by move kvm_deassign_irq() out of critial region. And add the
> > missing lock for deassign_guest_irq().
> >
> > Reported-by: Alex Williamson 
> > Signed-off-by: Sheng Yang 
> > ---
> >  virt/kvm/kvm_main.c |   14 +++---
> >  1 files changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 4d00942..3c69655 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -215,6 +215,8 @@ static void kvm_assigned_dev_ack_irq(struct
> > kvm_irq_ack_notifier *kian) static void deassign_guest_irq(struct kvm
> > *kvm,
> >struct kvm_assigned_dev_kernel *assigned_dev)
> >  {
> > +   mutex_lock(&kvm->lock);
> > +
> > kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
> > assigned_dev->ack_notifier.gsi = -1;
> >
> > @@ -222,6 +224,8 @@ static void deassign_guest_irq(struct kvm *kvm,
> > kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
> > assigned_dev->irq_source_id = -1;
> > assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
> > +
> > +   mutex_unlock(&kvm->lock);
> >  }
> >
> >  /* The function implicit hold kvm->lock mutex due to cancel_work_sync()
> > */ @@ -558,20 +562,16 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct
> > kvm *kvm, struct kvm_assigned_irq
> >  *assigned_irq)
> >  {
> > -   int r = -ENODEV;
> > struct kvm_assigned_dev_kernel *match;
> >
> > mutex_lock(&kvm->lock);
> > -
> > match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> >   assigned_irq->assigned_dev_id);
> > +   mutex_unlock(&kvm->lock);
>
> assigned_dev list is protected by kvm->lock. So you could have another
> ioctl adding to it at the same time you're searching.

Oh, yes... My fault... 

> Could either have a separate kvm->assigned_devs_lock, to protect
> kvm->arch.assigned_dev_head (users are ioctls that manipulate it), or
> change the IRQ injection to use a separate spinlock, kill the workqueue
> and call kvm_set_irq from the assigned device interrupt handler.

Peferred the latter, though needs more work. But the only reason for put a 
workqueue here is because kvm->lock is a mutex? I can't believe... If so, I 
think we had made a big mistake - we have to fix all kinds of racy problem 
caused by this, but finally find it's unnecessary... 

Maybe another reason is kvm_kick_vcpu(), but have already fixed by you.

Continue to check the code...

-- 
regards
Yang, Sheng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/4] BIOS changes for configuring irq0->inti2 override(v3)

2009-05-12 Thread Beth Kon

Gleb Natapov wrote:

On Mon, May 11, 2009 at 01:29:43PM -0400, Beth Kon wrote:
  

Signed-off-by: Beth Kon 

diff --git a/kvm/bios/rombios32.c b/kvm/bios/rombios32.c
index cbd5f15..53359b8 100755
--- a/kvm/bios/rombios32.c
+++ b/kvm/bios/rombios32.c
@@ -444,6 +444,9 @@ uint32_t cpuid_features;
 uint32_t cpuid_ext_features;
 unsigned long ram_size;
 uint64_t ram_end;
+#ifdef BX_QEMU
+uint8_t irq0_override;
+#endif
 #ifdef BX_USE_EBDA_TABLES
 unsigned long ebda_cur_addr;
 #endif
@@ -485,6 +488,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
 #define QEMU_CFG_ARCH_LOCAL 0x8000
 #define QEMU_CFG_ACPI_TABLES  (QEMU_CFG_ARCH_LOCAL + 0)
 #define QEMU_CFG_SMBIOS_ENTRIES  (QEMU_CFG_ARCH_LOCAL + 1)
+#define QEMU_CFG_IRQ0_OVERRIDE   (QEMU_CFG_ARCH_LOCAL + 2)
 
 int qemu_cfg_port;
 
@@ -553,6 +557,18 @@ uint64_t qemu_cfg_get64 (void)

 }
 #endif
 
+#ifdef BX_QEMU

+void irq0_override_probe(void)
+{
+if(qemu_cfg_port) {
+qemu_cfg_select(QEMU_CFG_IRQ0_OVERRIDE);
+qemu_cfg_read(&irq0_override, 1);
+return;
+}
+memset(&irq0_override, 0, 1);
+}


Why memset and not irq0_override = 0, actually it should zero already.

  
This was an oversight, left over from some early cut-and-paste coding I 
was doing. You're right - not necessary. Thanks.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
  


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >