Re: [PATCH 5/5] kvm: expose MSI capability to guest

2008-11-26 Thread Sheng Yang
On Monday 24 November 2008 19:50:35 Sheng Yang wrote:
> Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>

Oh, hold this one for a moment...

I don't want to deal with compatible problem of deliver msi_msg, so I would 
send out gsi->msi mapping patch and update the userspace patch.

-- 
regards
Yang, Sheng

> ---
>  qemu/hw/device-assignment.c |   90
> +++--- qemu/hw/device-assignment.h |   
> 2 +
>  2 files changed, 85 insertions(+), 7 deletions(-)
>
> diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
> index d3105bc..67bd6b3 100644
> --- a/qemu/hw/device-assignment.c
> +++ b/qemu/hw/device-assignment.c
> @@ -262,7 +262,8 @@ static void assigned_dev_pci_write_config(PCIDevice *d,
> uint32_t address, }
>
>  if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
> -address == 0x3c || address == 0x3d) {
> +address == 0x3c || address == 0x3d ||
> +pci_access_cap_config(d, address, len)) {
>  /* used for update-mappings (BAR emulation) */
>  pci_default_write_config(d, address, val, len);
>  return;
> @@ -296,7 +297,8 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice
> *d, uint32_t address, AssignedDevice *pci_dev = container_of(d,
> AssignedDevice, dev);
>
>  if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
> -address == 0x3c || address == 0x3d) {
> +address == 0x3c || address == 0x3d ||
> +pci_access_cap_config(d, address, len)) {
>  val = pci_default_read_config(d, address, len);
>  DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
>(d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val,
> len); @@ -325,11 +327,13 @@ do_log:
>  DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
>(d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
>
> -/* kill the special capabilities */
> -if (address == 4 && len == 4)
> -val &= ~0x10;
> -else if (address == 6)
> -val &= ~0x10;
> +if (!pci_dev->cap.available) {
> +/* kill the special capabilities */
> +if (address == 4 && len == 4)
> +val &= ~0x10;
> +else if (address == 6)
> +val &= ~0x10;
> +}
>
>  return val;
>  }
> @@ -537,6 +541,73 @@ void assigned_dev_update_irq(PCIDevice *d)
>  }
>  }
>
> +#ifdef KVM_CAP_DEVICE_MSI
> +static void assigned_dev_enable_msi(PCIDevice *pci_dev)
> +{
> +int r;
> +struct kvm_assigned_irq assigned_irq_data;
> +AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice,
> dev); +
> +memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
> +assigned_irq_data.assigned_dev_id  =
> +calc_assigned_dev_id(assigned_dev->h_busnr,
> +(uint8_t)assigned_dev->h_devfn);
> +assigned_irq_data.guest_msi.addr_lo = *(uint32_t *)
> +(pci_dev->cap.config + 4);
> +assigned_irq_data.guest_msi.data = *(uint16_t *)
> +(pci_dev->cap.config + 8);
> +assigned_irq_data.flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
> +r = kvm_assign_irq(kvm_context, &assigned_irq_data);
> +if (r < 0) {
> +perror("assigned_dev_enable_msi");
> +assigned_dev->cap.enabled &= ~ASSIGNED_DEVICE_MSI_ENABLED;
> +/* Fail to enable MSI, enable INTx instead */
> +assigned_dev_update_irq(pci_dev);
> +}
> +}
> +#endif
> +
> +void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t
> address, +  uint32_t val, int len)
> +{
> +AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice,
> dev); +uint32_t pos = pci_dev->cap.start;
> +uint8_t target_byte, target_position;
> +
> +pci_default_cap_write_config(pci_dev, address, val, len);
> +#ifdef KVM_CAP_DEVICE_MSI
> +/* Check if guest want to enable MSI */
> +if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> +target_position = pos + 2;
> +if (address <= target_position && address + len > target_position)
> { +target_byte = (uint8_t)(val >> (target_position - address));
> +if (target_byte == 1) {
> +assigned_dev->cap.enabled |= ASSIGNED_DEVICE_MSI_ENABLED;
> +assigned_dev_enable_msi(pci_dev);
> +if (!assigned_dev->cap.enabled &
> ASSIGNED_DEVICE_MSI_ENABLED) +   
> pci_dev->cap.config[target_position - pos] = 0; +}
> +}
> +pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
> +}
> +#endif
> +return;
> +}
> +
> +void assigned_device_pci_cap_init(PCIDevice *pci_dev)
> +{
> +AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev);
> +
> +#ifdef KVM_CAP_DEVICE_MSI
> +/* Expose MSI capability
> + * MSI capability is the 1st capability in cap.config */
> +if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> +pci_dev->cap.config[0] = 0x5;
> +pci_de

[ kvm-Bugs-2327497 ] NFS copy makes guest network unstable

2008-11-26 Thread SourceForge.net
Bugs item #2327497, was opened at 2008-11-22 07:53
Message generated for change (Comment added) made by jiajun
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2327497&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
>Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Jiajun Xu (jiajun)
Assigned to: Nobody/Anonymous (nobody)
Summary: NFS copy makes guest network unstable

Initial Comment:
The NFS network of KVM guest is very unstable. When we copy a >600M file to the 
guest by NFS mount. The guest's network will down after finishing at about 500M 
size. 
Then, guest's network is down. Host also can not use "ping" or "scp". And 
sometimes, host also complains: ping: sendmsg: No buffer space available. I see 
memory by 'free', there is only 69MB free (While totally 8GB on the machine!).

Using scp to copy file can not reproduce it. This issue is very easy to be 
reproduced (>50%). 


Reproduce steps:

1. Create a guest and config NFS sharing folder on it
2. Mount the nfs folder to local folder --- /media
3. cp xxx /media
4. After some time, guest network is down


--

>Comment By: Jiajun Xu (jiajun)
Date: 2008-11-26 19:43

Message:
We tried e1000, virtio and 8139cp driver. No such issue found.
Seems the issue only occur with 8139too driver.

--

Comment By: Avi Kivity (avik)
Date: 2008-11-25 01:47

Message:
I tested overnight with e1000, 2.5TB and still going.  Please retest with
e1000.

--

Comment By: Fabio Coatti (cova)
Date: 2008-11-24 07:47

Message:
I wouldn't be so sure of 8139 culprit. We are seeing this with e1000 and
virtio driver...

--

Comment By: Avi Kivity (avik)
Date: 2008-11-24 07:31

Message:
Seems to be a bug in the 8139too driver.  Please try with the 8139cp driver
(which has much better performance).

--

Comment By: Jiajun Xu (jiajun)
Date: 2008-11-23 23:01

Message:
We did not test such case before.
I think the issue also exists before.

--

Comment By: Avi Kivity (avik)
Date: 2008-11-23 13:13

Message:
It's almost certainly a problem with the qemu process, not the bridge.

--

Comment By: Fabio Coatti (cova)
Date: 2008-11-23 12:15

Message:
I can't find out easily wich kvm version worked (nor be sure that is kvm
executable itself to have issues), as the subsystems involved are quite a
lot an some time passet prior to spot the problem. (kvm itself, network
birdge, host kernel may be involved, of course). Now I'm trying to find out
the combination that worked, but at the same time I'll be willing to do
some tests to discover (on the actual non working setup) some hints, as the
bisection can be a very daunting task. (this issue has been noticed after
several upgrades).


--

Comment By: Avi Kivity (avik)
Date: 2008-11-23 10:40

Message:
Is this a regression, or a new test?

It it is a regression, what was the last version that worked?

--

Comment By: Fabio Coatti (cova)
Date: 2008-11-23 07:12

Message:
I can confirm a similar behaviour: a kvm machines gets large amounts of
data via http protocol and saves that files over NFS. (file sizes are in
the range of 4-20 MB approx and the machine downloads several of that
files.) After some time (I don't have a precise figure, but some hundreds
of MB) the guest nework goes down. No answers even to ping coming from
outside.
the guest uses virtio network drivers (as normal drivers are way too
slow)
host machine: 64 bit AMD dual quad core 16GB, tried with several kernels
ranging from 2.6.27.4 to 2.6.25.19
guest: 32 bit kvm machines (tried 76/77/78 ). both UP and SMP
configuration. kernels: same as host machine
network setup:
bridged network with br0 device on host machine. We are using 2 vlans for
guest and we have tried all the configuration (single tap and vlans
resolved on guest side,then two tap so two interfaces on guest machine and
so on) without any improvement. I can exclude MTU issues, as we have seen
that and solved, this issue is completely different.
At some point, sniffing traffic on host interfaces we are able to see only
ARP requests coming from guest, nothing more.

I understand that data is in no way complete, but I'm willing to do any
debug if someone gives me any hint on how to do so correctly. Thank

RE: [PATCH 1/2] VT-d: Support multiple device assignment for KVM

2008-11-26 Thread Han, Weidong
Yu, Fenghua wrote:
> Avi Kivity wrote:
>>> Han, Weidong wrote:
 In order to support multiple device assignment for KVM, this patch
does following main changes: - extend dmar_domain to own
 multiple devices from different iommus, use a bitmap of iommus to
replace iommu pointer in dmar_domain. - add a flag
 DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d usage. Many
 functions (e.g. intel_map_single() and intel_unmap_single()) won't
 be used by KVM VT-d. Let them return directly when this flag is
 set. 
 
>>> 
>>> 
>>> This seems brittle.  An API that has some functions shorted out
>>> depending on some flag is hard to understand and use.
> 
>> This flag just helps identify kvm VT-d usage, and let kvm VT-d APIs
>> reuse >native VT-d code, it needn't duplicate code for kvm VT-d
>> APIs, and it won't >impact existed native VT-d code.  
> 
>>> 
>>> We should either implement the functions, or split the API into a
>>> basic version that talks only to one device, and an expanded
>>> versions that talks to multiple devices, and is implemented by the
>>> using the lower level API.  This may require more changes due to
>>> the need to share io pagetables.
> 
>> The expanded versions that supports multiple devices will need to
>> change >dmar_domain, this will cause lots of changes, almost
>> duplicate the main >functions.  
> 
> I would agree with Avi. It's wired to merge the KVM api and native
> api together while they are logically separate. If we have two sets
> of interfaces for KVM and native iommu each, code is clean and robust
> and easy to extend in the future. If you well organize the code, we
> should have both good interface and shared code. While it makes sense
> to share code at beginning of KVM like this, it will get harder to
> maintain and evolve both KVM and native later, e.g. changing one part
> may break the other.

I will remove the flag judgement, and implement independent functions for KVM. 
   
> 
> Some comments on the code in this patch:
> 
> 1. You don't need to check DOMAIN_FLAG_VIRTUAL_MACHINE in the DMA
> functions. When the DMA functions are called, the domain should not
> have the DOMAIN_FLAG_VIRTUAL_MACHINE flag. Otherwise, the domain
> allocation code is wrong. 

Yes,  I will remove these meaningless checks.
  
> 
> 2. You need to initialize domain->flags=0 for DMA domain. It's random
> number after the domain is allocated by kmem_cache_alloc. 

Right. Thanks.

Regards,
Weidong

> 
> Thanks.
> 
> -Fenghua

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/5][v2] Userspace for MSI support of KVM

2008-11-26 Thread Sheng Yang
On Monday 24 November 2008 19:50:30 Sheng Yang wrote:
> Hi Avi & Anthony
>
> Here is the userspace for MSI support of KVM.
>
> Main change from v1:
> Make device assignment depends on libpci.
> Move capability framework to pci.c (this patch may can be accepted by
> QEmu).

Avi & Anthony

Any comments?

Can QEmu upstream accept "[PATCH 4/5] Support for device capability"?

Thanks!
-- 
regards
Yang, Sheng

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] markers: comment marker_synchronize_unregister() on data dependency

2008-11-26 Thread Wu Fengguang
On Thu, Nov 27, 2008 at 03:23:06AM +0200, Lai Jiangshan wrote:
> Wu Fengguang wrote:
> > [updated patch to include Documentation/markers.txt changes]
> > 
> > Add document and comments on marker_synchronize_unregister(): it
> > should be called before freeing resources that the probes depend on.
> > 
> > Based on comments from Lai Jiangshan and Mathieu Desnoyers.
> > 
> > Cc: Lai Jiangshan <[EMAIL PROTECTED]>
> > Cc: Mathieu Desnoyers <[EMAIL PROTECTED]>
> > Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
> > ---
> > diff --git a/Documentation/markers.txt b/Documentation/markers.txt
> > index 089f613..8bf6afe 100644
> > --- a/Documentation/markers.txt
> > +++ b/Documentation/markers.txt
> > @@ -51,11 +51,15 @@ to call) for the specific marker through 
> > marker_probe_register() and can be
> >  activated by calling marker_arm(). Marker deactivation can be done by 
> > calling
> >  marker_disarm() as many times as marker_arm() has been called. Removing a 
> > probe
> >  is done through marker_probe_unregister(); it will disarm the probe.
> > -marker_synchronize_unregister() must be called before the end of the 
> > module exit
> > -function to make sure there is no caller left using the probe. This, and 
> > the
> > -fact that preemption is disabled around the probe call, make sure that 
> > probe
> > -removal and module unload are safe. See the "Probe example" section below 
> > for a
> > -sample probe module.
> > +
> > +marker_synchronize_unregister() must be called before the first occurrence 
> > of
> > +- the end of the module exit function,
> > +  to make sure there is no caller left using the probe;
> > +- the free of any resource used by the probes,
> > +  to make sure the probes wont be accessing destructed data.
> > +This, and the fact that preemption is disabled around the probe call, make 
> > sure
> > +that probe removal and module unload are safe. See the "Probe example" 
> > section
> > +below for a sample probe module.
> >  
> >  The marker mechanism supports inserting multiple instances of the same 
> > marker.
> >  Markers can be put in inline functions, inlined static functions, and
> > diff --git a/include/linux/marker.h b/include/linux/marker.h
> > index 889196c..32ce4f2 100644
> > --- a/include/linux/marker.h
> > +++ b/include/linux/marker.h
> > @@ -162,8 +162,10 @@ extern void *marker_get_private_data(const char *name, 
> > marker_probe_func *probe,
> >  
> >  /*
> >   * marker_synchronize_unregister must be called between the last marker 
> > probe
> > - * unregistration and the end of module exit to make sure there is no 
> > caller
> > - * executing a probe when it is freed.
> > + * unregistration and the first one of
> > + * - the end of module exit function
> > + * - the free of any resource used by the probes
> 
> Does "destruction" contain the meaning of "free" and other destruction 
> behavior?

Ahh, better to use "invalid data" instead of "destructed data"?

> It's every good job, thank you.
> 
> Reviewed-by: Lai Jiangshan <[EMAIL PROTECTED]>

Thank you,
Fengguang
---
markers: comment marker_synchronize_unregister() on data dependency

Add document and comments on marker_synchronize_unregister(): it
should be called before freeing resources that the probes depend on.

Based on comments from Lai Jiangshan and Mathieu Desnoyers.

Cc: Mathieu Desnoyers <[EMAIL PROTECTED]>
Reviewed-by: Lai Jiangshan <[EMAIL PROTECTED]>
Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
---
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index 089f613..8bf6afe 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -51,11 +51,15 @@ to call) for the specific marker through 
marker_probe_register() and can be
 activated by calling marker_arm(). Marker deactivation can be done by calling
 marker_disarm() as many times as marker_arm() has been called. Removing a probe
 is done through marker_probe_unregister(); it will disarm the probe.
-marker_synchronize_unregister() must be called before the end of the module 
exit
-function to make sure there is no caller left using the probe. This, and the
-fact that preemption is disabled around the probe call, make sure that probe
-removal and module unload are safe. See the "Probe example" section below for a
-sample probe module.
+
+marker_synchronize_unregister() must be called before the first occurrence of
+- the end of the module exit function,
+  to make sure there is no caller left using the probe;
+- the free of any resource used by the probes,
+  to make sure the probes wont be accessing invalid data.
+This, and the fact that preemption is disabled around the probe call, make sure
+that probe removal and module unload are safe. See the "Probe example" section
+below for a sample probe module.
 
 The marker mechanism supports inserting multiple instances of the same marker.
 Markers can be put in inline functions, inlined static functions, and
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c..

Re: [PATCH] markers: comment marker_synchronize_unregister() on data dependency

2008-11-26 Thread Lai Jiangshan
Wu Fengguang wrote:
> [updated patch to include Documentation/markers.txt changes]
> 
> Add document and comments on marker_synchronize_unregister(): it
> should be called before freeing resources that the probes depend on.
> 
> Based on comments from Lai Jiangshan and Mathieu Desnoyers.
> 
> Cc: Lai Jiangshan <[EMAIL PROTECTED]>
> Cc: Mathieu Desnoyers <[EMAIL PROTECTED]>
> Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
> ---
> diff --git a/Documentation/markers.txt b/Documentation/markers.txt
> index 089f613..8bf6afe 100644
> --- a/Documentation/markers.txt
> +++ b/Documentation/markers.txt
> @@ -51,11 +51,15 @@ to call) for the specific marker through 
> marker_probe_register() and can be
>  activated by calling marker_arm(). Marker deactivation can be done by calling
>  marker_disarm() as many times as marker_arm() has been called. Removing a 
> probe
>  is done through marker_probe_unregister(); it will disarm the probe.
> -marker_synchronize_unregister() must be called before the end of the module 
> exit
> -function to make sure there is no caller left using the probe. This, and the
> -fact that preemption is disabled around the probe call, make sure that probe
> -removal and module unload are safe. See the "Probe example" section below 
> for a
> -sample probe module.
> +
> +marker_synchronize_unregister() must be called before the first occurrence of
> +- the end of the module exit function,
> +  to make sure there is no caller left using the probe;
> +- the free of any resource used by the probes,
> +  to make sure the probes wont be accessing destructed data.
> +This, and the fact that preemption is disabled around the probe call, make 
> sure
> +that probe removal and module unload are safe. See the "Probe example" 
> section
> +below for a sample probe module.
>  
>  The marker mechanism supports inserting multiple instances of the same 
> marker.
>  Markers can be put in inline functions, inlined static functions, and
> diff --git a/include/linux/marker.h b/include/linux/marker.h
> index 889196c..32ce4f2 100644
> --- a/include/linux/marker.h
> +++ b/include/linux/marker.h
> @@ -162,8 +162,10 @@ extern void *marker_get_private_data(const char *name, 
> marker_probe_func *probe,
>  
>  /*
>   * marker_synchronize_unregister must be called between the last marker probe
> - * unregistration and the end of module exit to make sure there is no caller
> - * executing a probe when it is freed.
> + * unregistration and the first one of
> + * - the end of module exit function
> + * - the free of any resource used by the probes

Does "destruction" contain the meaning of "free" and other destruction behavior?

It's every good job, thank you.

Reviewed-by: Lai Jiangshan <[EMAIL PROTECTED]>

> + * to ensure the code and data are valid for any possibly running probes.
>   */
>  #define marker_synchronize_unregister() synchronize_sched()
>  
> 


> 
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] markers: comment marker_synchronize_unregister() on data dependency

2008-11-26 Thread Wu Fengguang
[updated patch to include Documentation/markers.txt changes]

Add document and comments on marker_synchronize_unregister(): it
should be called before freeing resources that the probes depend on.

Based on comments from Lai Jiangshan and Mathieu Desnoyers.

Cc: Lai Jiangshan <[EMAIL PROTECTED]>
Cc: Mathieu Desnoyers <[EMAIL PROTECTED]>
Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
---
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index 089f613..8bf6afe 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -51,11 +51,15 @@ to call) for the specific marker through 
marker_probe_register() and can be
 activated by calling marker_arm(). Marker deactivation can be done by calling
 marker_disarm() as many times as marker_arm() has been called. Removing a probe
 is done through marker_probe_unregister(); it will disarm the probe.
-marker_synchronize_unregister() must be called before the end of the module 
exit
-function to make sure there is no caller left using the probe. This, and the
-fact that preemption is disabled around the probe call, make sure that probe
-removal and module unload are safe. See the "Probe example" section below for a
-sample probe module.
+
+marker_synchronize_unregister() must be called before the first occurrence of
+- the end of the module exit function,
+  to make sure there is no caller left using the probe;
+- the free of any resource used by the probes,
+  to make sure the probes wont be accessing destructed data.
+This, and the fact that preemption is disabled around the probe call, make sure
+that probe removal and module unload are safe. See the "Probe example" section
+below for a sample probe module.
 
 The marker mechanism supports inserting multiple instances of the same marker.
 Markers can be put in inline functions, inlined static functions, and
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c..32ce4f2 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -162,8 +162,10 @@ extern void *marker_get_private_data(const char *name, 
marker_probe_func *probe,
 
 /*
  * marker_synchronize_unregister must be called between the last marker probe
- * unregistration and the end of module exit to make sure there is no caller
- * executing a probe when it is freed.
+ * unregistration and the first one of
+ * - the end of module exit function
+ * - the free of any resource used by the probes
+ * to ensure the code and data are valid for any possibly running probes.
  */
 #define marker_synchronize_unregister() synchronize_sched()
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] PCI: allow pci driver to support only dynids

2008-11-26 Thread Chris Wright
* Jean Delvare ([EMAIL PROTECTED]) wrote:
> Ah, OK, then your patch makes full sense and mine doesn't, sorry for
> the noise. Feel free to add my Acked-by on your patch.

Thanks Jean.
-chris
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] PCI: allow pci driver to support only dynids

2008-11-26 Thread Jean Delvare
Hi Chris,

On Wed, 26 Nov 2008 07:19:40 -0800, Chris Wright wrote:
> * Jean Delvare ([EMAIL PROTECTED]) wrote:
> > As a side note, I am curious what PCI driver we do have which has
> > driver->probe defined but no driver->id_table. Did you hit an actual
> > issue or are you fixing a theoretical one?
> 
> Such a driver was patch 2/2.
> 
> http://lkml.org/lkml/2008/11/26/11
> 
> It's meant to have only dynamic ids, worked fine on 2.6.27 and oopsed
> from NULL id-> deref on current git.

Ah, OK, then your patch makes full sense and mine doesn't, sorry for
the noise. Feel free to add my Acked-by on your patch.

-- 
Jean Delvare
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [SR-IOV driver example 0/3] introduction

2008-11-26 Thread Jeff Garzik

Yu Zhao wrote:

SR-IOV drivers of Intel 82576 NIC are available. There are two parts
of the drivers: Physical Function driver and Virtual Function driver.
The PF driver is based on the IGB driver and is used to control PF to
allocate hardware specific resources and interface with the SR-IOV core.
The VF driver is a new NIC driver that is same as the traditional PCI
device driver. It works in both the host and the guest (Xen and KVM)
environment.

These two drivers are testing versions and they are *only* intended to
show how to use SR-IOV API.

Intel 82576 NIC specification can be found at:
http://download.intel.com/design/network/datashts/82576_Datasheet_v2p1.pdf

[SR-IOV driver example 1/3] PF driver: allocate hardware specific resource
[SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core
[SR-IOV driver example 3/3] VF driver tar ball


Please copy [EMAIL PROTECTED] on all network-related patches.  This 
is where the network developers live, and all patches on this list are 
automatically archived for review and handling at 
http://patchwork.ozlabs.org/project/netdev/list/


Jeff



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core

2008-11-26 Thread Greg KH
On Wed, Nov 26, 2008 at 11:27:10AM -0800, Nakajima, Jun wrote:
> On 11/26/2008 8:58:59 AM, Greg KH wrote:
> > On Wed, Nov 26, 2008 at 10:21:56PM +0800, Yu Zhao wrote:
> > > This patch integrates the IGB driver with the SR-IOV core. It shows
> > > how the SR-IOV API is used to support the capability. Obviously
> > > people does not need to put much effort to integrate the PF driver
> > > with SR-IOV core. All SR-IOV standard stuff are handled by SR-IOV
> > > core and PF driver once it gets the necessary information (i.e.
> > > number of Virtual
> > > Functions) from the callback function.
> > >
> > > ---
> > >  drivers/net/igb/igb_main.c |   30 ++
> > >  1 files changed, 30 insertions(+), 0 deletions(-)
> > >
> > > diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
> > > index bc063d4..b8c7dc6 100644
> > > --- a/drivers/net/igb/igb_main.c
> > > +++ b/drivers/net/igb/igb_main.c
> > > @@ -139,6 +139,7 @@ void igb_set_mc_list_pools(struct igb_adapter *,
> > > struct e1000_hw *, int, u16);  static int igb_vmm_control(struct
> > > igb_adapter *, bool);  static int igb_set_vf_mac(struct net_device
> > > *, int, u8*);  static void igb_mbox_handler(struct igb_adapter *);
> > > +static int igb_virtual(struct pci_dev *, int);
> > >  #endif
> > >
> > >  static int igb_suspend(struct pci_dev *, pm_message_t); @@ -184,6
> > > +185,9 @@ static struct pci_driver igb_driver = {  #endif
> > > .shutdown = igb_shutdown,
> > > .err_handler = &igb_err_handler,
> > > +#ifdef CONFIG_PCI_IOV
> > > +   .virtual = igb_virtual
> > > +#endif
> >
> > #ifdef should not be needed, right?
> >
> 
> Good point. I think this is because the driver is expected to build on
> older kernels also,

That should not be an issue for patches that are being submitted, right?

And if this is the case, shouldn't it be called out in the changelog
entry?

> but the problem is that the driver (and probably others) is broken
> unless the kernel is built with CONFIG_PCI_IOV because of the
> following hunk, for example.
> 
> However, we don't want to use #ifdef for the (*virtual) field in the
> header. One option would be to define a constant like the following
> along with those changes.
> #define PCI_DEV_IOV
> 
> Any better idea?

Just always declare it in your driver, which will be added _after_ this
field gets added to the kernel tree as well.  It's not a big deal, just
an ordering of patches issue.

Because remember, don't add #ifdefs to drivers, they should not be
needed at all.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core

2008-11-26 Thread Nakajima, Jun
On 11/26/2008 8:58:59 AM, Greg KH wrote:
> On Wed, Nov 26, 2008 at 10:21:56PM +0800, Yu Zhao wrote:
> > This patch integrates the IGB driver with the SR-IOV core. It shows
> > how the SR-IOV API is used to support the capability. Obviously
> > people does not need to put much effort to integrate the PF driver
> > with SR-IOV core. All SR-IOV standard stuff are handled by SR-IOV
> > core and PF driver once it gets the necessary information (i.e.
> > number of Virtual
> > Functions) from the callback function.
> >
> > ---
> >  drivers/net/igb/igb_main.c |   30 ++
> >  1 files changed, 30 insertions(+), 0 deletions(-)
> >
> > diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
> > index bc063d4..b8c7dc6 100644
> > --- a/drivers/net/igb/igb_main.c
> > +++ b/drivers/net/igb/igb_main.c
> > @@ -139,6 +139,7 @@ void igb_set_mc_list_pools(struct igb_adapter *,
> > struct e1000_hw *, int, u16);  static int igb_vmm_control(struct
> > igb_adapter *, bool);  static int igb_set_vf_mac(struct net_device
> > *, int, u8*);  static void igb_mbox_handler(struct igb_adapter *);
> > +static int igb_virtual(struct pci_dev *, int);
> >  #endif
> >
> >  static int igb_suspend(struct pci_dev *, pm_message_t); @@ -184,6
> > +185,9 @@ static struct pci_driver igb_driver = {  #endif
> > .shutdown = igb_shutdown,
> > .err_handler = &igb_err_handler,
> > +#ifdef CONFIG_PCI_IOV
> > +   .virtual = igb_virtual
> > +#endif
>
> #ifdef should not be needed, right?
>

Good point. I think this is because the driver is expected to build on older 
kernels also, but the problem is that the driver (and probably others) is 
broken unless the kernel is built with CONFIG_PCI_IOV because of the following 
hunk, for example.

However, we don't want to use #ifdef for the (*virtual) field in the header. 
One option would be to define a constant like the following along with those 
changes.
#define PCI_DEV_IOV

Any better idea?

Thanks,
 .
Jun Nakajima | Intel Open Source Technology Center


@@ -259,6 +266,7 @@ struct pci_dev {
struct list_head msi_list;
 #endif
struct pci_vpd *vpd;
+   struct pci_iov *iov;
 };

 extern struct pci_dev *alloc_pci_dev(void); @@ -426,6 +434,7 @@ struct 
pci_driver {
int  (*resume_early) (struct pci_dev *dev);
int  (*resume) (struct pci_dev *dev);   /* Device woken 
up */
void (*shutdown) (struct pci_dev *dev);
+   int (*virtual) (struct pci_dev *dev, int nr_virtfn);
struct pm_ext_ops *pm;
struct pci_error_handlers *err_handler;
struct device_driverdriver;

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/2] VT-d: Support multiple device assignment for KVM

2008-11-26 Thread Yu, Fenghua
Avi Kivity wrote:
>> Han, Weidong wrote:
>>> In order to support multiple device assignment for KVM, this patch
>>> does following main changes: 
>>>- extend dmar_domain to own multiple devices from different
>>> iommus, use a bitmap of iommus to replace iommu pointer in
>>> dmar_domain.  
>>>- add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d
>>> usage. Many functions (e.g. intel_map_single() and
>>> intel_unmap_single()) won't be used by KVM VT-d. Let them return
>>> directly when this flag is set.   
>>> 
>> 
>> 
>> This seems brittle.  An API that has some functions shorted out
>> depending on some flag is hard to understand and use.

>This flag just helps identify kvm VT-d usage, and let kvm VT-d APIs reuse 
>>native VT-d code, it needn't duplicate code for kvm VT-d APIs, and it won't 
>>impact existed native VT-d code. 

>> 
>> We should either implement the functions, or split the API into a
>> basic version that talks only to one device, and an expanded versions
>> that talks to multiple devices, and is implemented by the using the
>> lower level API.  This may require more changes due to the need to
>> share io pagetables.

>The expanded versions that supports multiple devices will need to change 
>>dmar_domain, this will cause lots of changes, almost duplicate the main 
>>functions.

I would agree with Avi. It's wired to merge the KVM api and native api together 
while they are logically separate. If we have two sets of interfaces for KVM 
and native iommu each, code is clean and robust and easy to extend in the 
future. If you well organize the code, we should have both good interface and 
shared code. While it makes sense to share code at beginning of KVM like this, 
it will get harder to maintain and evolve both KVM and native later, e.g. 
changing one part may break the other.

Some comments on the code in this patch:

1. You don't need to check DOMAIN_FLAG_VIRTUAL_MACHINE in the DMA functions. 
When the DMA functions are called, the domain should not have the 
DOMAIN_FLAG_VIRTUAL_MACHINE flag. Otherwise, the domain allocation code is 
wrong.

2. You need to initialize domain->flags=0 for DMA domain. It's random number 
after the domain is allocated by kmem_cache_alloc.

Thanks.

-Fenghua
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Remove TARGET_PAGE_SIZE from virtio interface

2008-11-26 Thread Hollis Blanchard
On Wed, 2008-11-26 at 12:22 -0600, Hollis Blanchard wrote:
> 
> diff --git a/hw/virtio.h b/hw/virtio.h
> index 1df8f83..c23f38c 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -47,6 +47,11 @@
>  /* This means don't interrupt guest when buffer consumed. */
>  #define VRING_AVAIL_F_NO_INTERRUPT1
> 
> +static inline vring_align(unsigned long addr, unsigned long align)
> +{
> +return (addr + align - 1) & ~(align - 1);
> +}
> +
>  typedef struct VirtQueue VirtQueue;
>  typedef struct VirtIODevice VirtIODevice;

OK, obviously this doesn't need to be named "vring_align". I was going
to just build VIRTIO_PCI_VRING_ALIGN into this function, but in the
future we'll need to accommodate KVM_S390_VIRTIO_RING_ALIGN, so we would
need to pass in a parameter from virtqueue_init(). Of course, I'm not
sure how the S390 code will fit here anyways, since virtio.c is both
virtio ring and virtio PCI.

I haven't found an existing "align" function in qemu though...

-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 2/2] Virtio block device support

2008-11-26 Thread Hollis Blanchard
On Tue, 2008-11-25 at 15:57 -0600, Anthony Liguori wrote:
> This has been posted before but I believe it now has addressed all outstanding
> concerns.  I'd like to apply it if there are no objections.
> 
> This patch adds virtio-blk support to QEMU.  virtio-blk is a paravirtual disk
> controller that can achieve good performance when using KVM.
> 
> Since virtio is based on a scatter/gather API, we don't have a linear buffer
> for each request.  This forces us to allocate a linear buffer since the 
> current
> block driver API does not have a scatter/gather operation.  This allocation
> can never exceed the maximum data limit on the ring queue so it isn't
> unbounded.
> 
> posix-aio cannot support a scatter/gather asynchronous operation so we'll need
> to introduce our own thread pool to eliminate this limitation.  There is work
> underway to do this.
> 
> Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

Tested and working (in conjunction with the virtio page size patch I
just posted).

-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Remove TARGET_PAGE_SIZE from virtio interface

2008-11-26 Thread Hollis Blanchard
TARGET_PAGE_SIZE should only be used internal to qemu, not in guest/host
interfaces. The virtio frontend code in Linux uses two constants (PFN shift
and vring alignment) for the interface, so update qemu to match.

I've tested this with PowerPC KVM and confirmed that it fixes virtio problems
when using non-TARGET_PAGE_SIZE pages in the guest.

Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]>
---
 hw/virtio.c |   16 +---
 hw/virtio.h |5 +
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/hw/virtio.c b/hw/virtio.c
index e4224ab..0134b0b 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -51,6 +51,14 @@
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION   0
 
+/* How many bits to shift physical queue address written to QUEUE_PFN.
+ * 12 is historical, and due to x86 page size. */
+#define VIRTIO_PCI_QUEUE_ADDR_SHIFT12
+
+/* The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize again. */
+#define VIRTIO_PCI_VRING_ALIGN 4096
+
 /* QEMU doesn't strictly need write barriers since everything runs in
  * lock-step.  We'll leave the calls to wmb() in though to make it obvious for
  * KVM or if kqemu gets SMP support.
@@ -110,7 +118,9 @@ static void virtqueue_init(VirtQueue *vq, 
target_phys_addr_t pa)
 {
 vq->vring.desc = pa;
 vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
-vq->vring.used = TARGET_PAGE_ALIGN(vq->vring.avail + offsetof(VRingAvail, 
ring[vq->vring.num]));
+vq->vring.used = vring_align(vq->vring.avail +
+ offsetof(VRingAvail, ring[vq->vring.num]),
+ VIRTIO_PCI_VRING_ALIGN);
 }
 
 static inline uint64_t vring_desc_addr(VirtQueue *vq, int i)
@@ -386,7 +396,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
 vdev->features = val;
 break;
 case VIRTIO_PCI_QUEUE_PFN:
-pa = (ram_addr_t)val << TARGET_PAGE_BITS;
+pa = (ram_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 vdev->vq[vdev->queue_sel].pfn = val;
 if (pa == 0)
 virtio_reset(vdev);
@@ -660,7 +670,7 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
 if (vdev->vq[i].pfn) {
 target_phys_addr_t pa;
 
-pa = (ram_addr_t)vdev->vq[i].pfn << TARGET_PAGE_BITS;
+pa = (ram_addr_t)vdev->vq[i].pfn << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
 virtqueue_init(&vdev->vq[i], pa);
 }
 }
diff --git a/hw/virtio.h b/hw/virtio.h
index 1df8f83..c23f38c 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -47,6 +47,11 @@
 /* This means don't interrupt guest when buffer consumed. */
 #define VRING_AVAIL_F_NO_INTERRUPT1
 
+static inline vring_align(unsigned long addr, unsigned long align)
+{
+return (addr + align - 1) & ~(align - 1);
+}
+
 typedef struct VirtQueue VirtQueue;
 typedef struct VirtIODevice VirtIODevice;
 
-- 
1.5.6.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [PATCH 1/1] Virtio block device support

2008-11-26 Thread Hollis Blanchard
On Wed, 2008-11-26 at 12:11 -0600, Hollis Blanchard wrote:
> From: Anthony Liguori <[EMAIL PROTECTED]>

Hmm, this is not my patch. :) Let me try again...

-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/1] Virtio block device support

2008-11-26 Thread Hollis Blanchard
From: Anthony Liguori <[EMAIL PROTECTED]>

This has been posted before but I believe it now has addressed all outstanding
concerns.  I'd like to apply it if there are no objections.

This patch adds virtio-blk support to QEMU.  virtio-blk is a paravirtual disk
controller that can achieve good performance when using KVM.

Since virtio is based on a scatter/gather API, we don't have a linear buffer
for each request.  This forces us to allocate a linear buffer since the current
block driver API does not have a scatter/gather operation.  This allocation
can never exceed the maximum data limit on the ring queue so it isn't
unbounded.

posix-aio cannot support a scatter/gather asynchronous operation so we'll need
to introduce our own thread pool to eliminate this limitation.  There is work
underway to do this.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
---
 Makefile.target |2 +-
 hw/pc.c |   12 ++
 hw/pc.h |4 +
 hw/virtio-blk.c |  303 +++
 sysemu.h|2 +-
 vl.c|4 +
 6 files changed, 325 insertions(+), 2 deletions(-)
 create mode 100644 hw/virtio-blk.c

diff --git a/Makefile.target b/Makefile.target
index f7fc3a2..773b615 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -588,7 +588,7 @@ endif #CONFIG_BSD_USER
 ifndef CONFIG_USER_ONLY
 
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o
-OBJS+=fw_cfg.o virtio.o
+OBJS+=fw_cfg.o virtio.o virtio-blk.o
 ifdef CONFIG_KVM
 OBJS+=kvm.o kvm-all.o
 endif
diff --git a/hw/pc.c b/hw/pc.c
index 1486b68..8651e49 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -1092,6 +1092,18 @@ static void pc_init1(ram_addr_t ram_size, int 
vga_ram_size,
}
 }
 }
+
+/* Add virtio block devices */
+if (pci_enabled) {
+int index;
+int unit_id = 0;
+
+while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
+virtio_blk_init(pci_bus, 0x1AF4, 0x1001,
+drives_table[index].bdrv);
+unit_id++;
+}
+}
 }
 
 static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
diff --git a/hw/pc.h b/hw/pc.h
index f156b9e..edbc5e8 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -152,4 +152,8 @@ void pci_piix4_ide_init(PCIBus *bus, BlockDriverState 
**hd_table, int devfn,
 
 void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd);
 
+/* virtio-blk.c */
+void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device,
+  BlockDriverState *bs);
+
 #endif
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
new file mode 100644
index 000..da87b66
--- /dev/null
+++ b/hw/virtio-blk.c
@@ -0,0 +1,303 @@
+/*
+ * Virtio Block Device
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ *  Anthony Liguori   <[EMAIL PROTECTED]>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "virtio.h"
+#include "block.h"
+#include "block_int.h"
+#include "pc.h"
+
+/* from Linux's linux/virtio_blk.h */
+
+/* The ID for virtio_block */
+#define VIRTIO_ID_BLOCK2
+
+/* Feature bits */
+#define VIRTIO_BLK_F_BARRIER   0  /* Does host support barriers? */
+#define VIRTIO_BLK_F_SIZE_MAX  1 /* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX   2 /* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY  4 /* Indicates support of legacy geometry */
+
+struct virtio_blk_config
+{
+uint64_t capacity;
+uint32_t size_max;
+uint32_t seg_max;
+uint16_t cylinders;
+uint8_t heads;
+uint8_t sectors;
+} __attribute__((packed));
+
+/* These two define direction. */
+#define VIRTIO_BLK_T_IN0
+#define VIRTIO_BLK_T_OUT   1
+
+/* This bit says it's a scsi command, not an actual read or write. */
+#define VIRTIO_BLK_T_SCSI_CMD  2
+
+/* Barrier before this op. */
+#define VIRTIO_BLK_T_BARRIER   0x8000
+
+/* This is the first element of the read scatter-gather list. */
+struct virtio_blk_outhdr
+{
+/* VIRTIO_BLK_T* */
+uint32_t type;
+/* io priority. */
+uint32_t ioprio;
+/* Sector (ie. 512 byte offset) */
+uint64_t sector;
+};
+
+#define VIRTIO_BLK_S_OK0
+#define VIRTIO_BLK_S_IOERR 1
+#define VIRTIO_BLK_S_UNSUPP2
+
+/* This is the first element of the write scatter-gather list */
+struct virtio_blk_inhdr
+{
+unsigned char status;
+};
+
+typedef struct VirtIOBlock
+{
+VirtIODevice vdev;
+BlockDriverState *bs;
+VirtQueue *vq;
+} VirtIOBlock;
+
+static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
+{
+return (VirtIOBlock *)vdev;
+}
+
+typedef struct VirtIOBlockReq
+{
+VirtIOBlock *dev;
+VirtQueueElement elem;
+struct virtio_blk_inhdr *in;
+struct virtio_blk_outhdr *out;
+size_t size;
+uint8_t *buffer;
+} VirtIOBlockReq;
+
+static void virtio_blk_rw_complete(void *opaque, int ret)
+{
+VirtIOBlockReq *req = opaque;
+VirtIOBlock *s = req->d

[ kvm-Bugs-2351433 ] unhandled vm exit: 0x80000021 running UnixWare guest

2008-11-26 Thread SourceForge.net
Bugs item #2351433, was opened at 2008-11-26 17:39
Message generated for change (Comment added) made by hughesj
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: intel
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: John Hughes (hughesj)
Assigned to: Nobody/Anonymous (nobody)
Summary: unhandled vm exit: 0x8021 running UnixWare guest

Initial Comment:
CPU: Xeon 3050
Vendor: Intel
Bits: 64
Host distribution: Debian Lenny, kernel kernel 2.6.26-1-amd64
Guest: SCO UnixWare 7.1.1 (SVR5), 32 bits

Running a UnixWare (SVR5) guest with kvm 79,  debian build kernel 
2.6.26-1-amd64 on an Intel Xeon 3050 it crashes during boot with:

$ qemu-system-x86_64 -net nic,model=ne2k_isa -net user -hda UnixWare.img
unhandled vm exit: 0x8021 vcpu_id 0
rax b101 rbx  rcx  rdx 

rsi  rdi  rsp 0ff8 rbp 

r8   r9   r10  r11 

r12  r13  r14  r15 

rip fe6e rflags 00023002
cs 0100 (/ p 1 dpl 0 db 1 s 1 type b l 0 g 1 avl 0)
ds 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
es  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
ss 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
fs  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
gs  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
tr 0148 (c04405c0/2067 p 1 dpl 0 db 0 s 0 type b l 0 g 0 avl 0)
ldt  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
gdt 5020/2cf
idt 52f0/7ff
cr0 8001003b cr2 0 cr3 1005000 cr4 2d4 cr8 0 efer 0
Aborted

--

Comment By: John Hughes (hughesj)
Date: 2008-11-26 19:09

Message:
Using kvm 72 in 32 bit mode on the same hardware I get

unhandled vm exit: 0x9 vcpu_id 0
rax 8001003b rbx 0ff8 rcx f000 rdx
f000
rsi fffda500 rdi 0001 rsp fffda4b4 rbp
c043d818
r8   r9   r10  r11

r12  r13  r14  r15

rip c005f980 rflags 0202
cs 0100 (/ p 1 dpl 0 db 1 s 1 type b l 0 g 1 avl 0)
ds 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
es 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
ss 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
fs  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
gs  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
tr 0110 (5af0/0067 p 1 dpl 0 db 0 s 0 type b l 0 g 0 avl 0)
ldt 0118 (c0812bf8/0047 p 1 dpl 0 db 0 s 0 type 2 l 0 g 0 avl 0)
gdt 5020/2cf
idt 52f0/7ff
cr0 8001003b cr2 0 cr3 1005000 cr4 2d4 cr8 0 efer 0
Aborted


--

Comment By: John Hughes (hughesj)
Date: 2008-11-26 17:41

Message:
(Works with -no-kvm).

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2351676 ] Guests hang periodically on Ubuntu-8.10

2008-11-26 Thread SourceForge.net
Bugs item #2351676, was opened at 2008-11-26 12:59
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351676&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Chris Jones (c_jones)
Assigned to: Nobody/Anonymous (nobody)
Summary: Guests hang periodically on Ubuntu-8.10

Initial Comment:
I'm seeing periodic hangs on my guests.  I've been unable so far to find a 
trigger - they always boot fine, but after anywhere from 10 minutes to 24 hours 
they eventually hang completely.

My setup:
  * AMD Athlon X2 4850e (2500 MHz dual core)
  * 4Gig memory
  * Ubuntu 8.10 server, 64-bit
  * KVMs tried:
: kvm-72 (shipped with ubuntu)
: kvm-79 (built myself, --patched-kernel option)
  * Kernels tried:
: 2.6.27.7 (kernel.org, self built)
: 2.6.27-7-server from Ubuntu 8.10 distribution

  In guests
  * Ubuntu 8.10 server, 64-bit (virtual machine install)
  * kernel 2.6.27-7-server from Ubuntu 8.10

I'm running the guests like:
  sudo /usr/local/bin/qemu-system-x86_64\
 -daemonize \
 -no-kvm-irqchip\
 -hda Imgs/ndev_root.img\
 -m 1024\
 -cdrom ISOs/ubuntu-8.10-server-amd64.iso   \
 -vnc :4\
 -net nic,macaddr=DE:AD:BE:EF:04:04,model=e1000 \
 -net tap,ifname=tap4,script=/home/chris/kvm/qemu-ifup.sh 

The problem does not happen if I use -no-kvm.

I've tried some other options that have no effect:
  -no-kvm-pit
  -no-acpi

The disk images are raw format.

When the guests hang, I cannot ping them, and the vnc console us hung.  The 
qemu monitor is still accessible, and the guests recover if I issue a 
system_reset command from the monitor.  However, often, the console will not 
take keyboard after doing so.

When the guest is hung, kvm_stat shows all 0s for the counters:

efer_relo  exits  fpu_reloa  halt_exit  halt_wake  host_stat  hypercall
+insn_emul  insn_emul invlpg   io_exits  irq_exits  irq_windo  largepage
+mmio_exit  mmu_cache  mmu_flood  mmu_pde_z  mmu_pte_u  mmu_pte_w  mmu_recyc
+mmu_shado  nmi_windo   pf_fixed   pf_guest  remote_tl  request_i  signal_ex
+tlb_flush
>  0  0  0  0  0  0  0
+0  0  0  0  0  0  0  0
+0  0  0  0  0  0  0  0
+0  0  0  0  0  0

gdb shows two threads - both waiting:

c(gdb) info threads
  2 Thread 0x414f1950 (LWP 422)  0x7f36f07a03e1 in sigtimedwait ()
   from /lib/libc.so.6
  1 Thread 0x7f36f1f306e0 (LWP 414)  0x7f36f084b482 in select ()
   from /lib/libc.so.6
(gdb) thread 1
[Switching to thread 1 (Thread 0x7f36f1f306e0 (LWP 414))]#0  0x7f36f084b482
+in select () from /lib/libc.so.6
(gdb) bt
#0  0x7f36f084b482 in select () from /lib/libc.so.6
#1  0x004094cb in main_loop_wait (timeout=0)
at /home/chris/pkgs/kvm/kvm-79/qemu/vl.c:4719
#2  0x0050a7ea in kvm_main_loop ()
at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:619
#3  0x0040fafc in main (argc=,
argv=0x79f41948) at /home/chris/pkgs/kvm/kvm-79/qemu/vl.c:4871
(gdb) thread 2
[Switching to thread 2 (Thread 0x414f1950 (LWP 422))]#0  0x7f36f07a03e1 in
+sigtimedwait () from /lib/libc.so.6
(gdb) bt
#0  0x7f36f07a03e1 in sigtimedwait () from /lib/libc.so.6
#1  0x0050a560 in kvm_main_loop_wait (env=0xc319e0, timeout=0)
at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:284
#2  0x0050aaf7 in ap_main_loop (_env=)
at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:425
#3  0x7f36f11ba3ea in start_thread () from /lib/libpthread.so.0
#4  0x7f36f0852c6d in clone () from /lib/libc.so.6
#5  0x in ?? ()


Any clues to help me resolve this would be much appreciated.


--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351676&group_id=180599
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core

2008-11-26 Thread Chris Wright
* Greg KH ([EMAIL PROTECTED]) wrote:
> > +static int
> > +igb_virtual(struct pci_dev *pdev, int nr_virtfn)
> > +{
> > +   unsigned char my_mac_addr[6] = {0x00, 0xDE, 0xAD, 0xBE, 0xEF, 0xFF};
> > +   struct net_device *netdev = pci_get_drvdata(pdev);
> > +   struct igb_adapter *adapter = netdev_priv(netdev);
> > +   int i;
> > +
> > +   if (nr_virtfn > 7)
> > +   return -EINVAL;
> 
> Why the check for 7?  Is that the max virtual functions for this card?
> Shouldn't that be a define somewhere so it's easier to fix in future
> versions of this hardware?  :)

IIRC it's 8 for the card, 1 reserved for PF.  I think both notions
should be captured w/ commented constants.

thanks,
-chris
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [SR-IOV driver example 3/3] VF driver tar ball

2008-11-26 Thread Greg KH
On Wed, Nov 26, 2008 at 10:40:43PM +0800, Yu Zhao wrote:
> The attachment is the VF driver for Intel 82576 NIC.

Please don't attach things as tarballs, we can't review or easily read
them at all.

Care to resend it?

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [SR-IOV driver example 0/3] introduction

2008-11-26 Thread Greg KH
On Wed, Nov 26, 2008 at 10:03:03PM +0800, Yu Zhao wrote:
> SR-IOV drivers of Intel 82576 NIC are available. There are two parts
> of the drivers: Physical Function driver and Virtual Function driver.
> The PF driver is based on the IGB driver and is used to control PF to
> allocate hardware specific resources and interface with the SR-IOV core.
> The VF driver is a new NIC driver that is same as the traditional PCI
> device driver. It works in both the host and the guest (Xen and KVM)
> environment.
> 
> These two drivers are testing versions and they are *only* intended to
> show how to use SR-IOV API.

That's funny, as some distros are already shipping this driver.  You
might want to tell them that this is an "example only" driver and not to
be used "for real"... :(

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core

2008-11-26 Thread Greg KH
On Wed, Nov 26, 2008 at 10:21:56PM +0800, Yu Zhao wrote:
> This patch integrates the IGB driver with the SR-IOV core. It shows how
> the SR-IOV API is used to support the capability. Obviously people does
> not need to put much effort to integrate the PF driver with SR-IOV core.
> All SR-IOV standard stuff are handled by SR-IOV core and PF driver only
> concerns the device specific resource allocation and deallocation once it
> gets the necessary information (i.e. number of Virtual Functions) from
> the callback function.
> 
> ---
>  drivers/net/igb/igb_main.c |   30 ++
>  1 files changed, 30 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
> index bc063d4..b8c7dc6 100644
> --- a/drivers/net/igb/igb_main.c
> +++ b/drivers/net/igb/igb_main.c
> @@ -139,6 +139,7 @@ void igb_set_mc_list_pools(struct igb_adapter *, struct 
> e1000_hw *, int, u16);
>  static int igb_vmm_control(struct igb_adapter *, bool);
>  static int igb_set_vf_mac(struct net_device *, int, u8*);
>  static void igb_mbox_handler(struct igb_adapter *);
> +static int igb_virtual(struct pci_dev *, int);
>  #endif
>  
>  static int igb_suspend(struct pci_dev *, pm_message_t);
> @@ -184,6 +185,9 @@ static struct pci_driver igb_driver = {
>  #endif
>   .shutdown = igb_shutdown,
>   .err_handler = &igb_err_handler,
> +#ifdef CONFIG_PCI_IOV
> + .virtual = igb_virtual
> +#endif

#ifdef should not be needed, right?

>  };
>  
>  static int global_quad_port_a; /* global quad port a indication */
> @@ -5107,6 +5111,32 @@ void igb_set_mc_list_pools(struct igb_adapter *adapter,
>   reg_data |= (1 << 25);
>   wr32(E1000_VMOLR(pool), reg_data);
>  }
> +
> +static   int
> +igb_virtual(struct pci_dev *pdev, int nr_virtfn)
> +{
> + unsigned char my_mac_addr[6] = {0x00, 0xDE, 0xAD, 0xBE, 0xEF, 0xFF};
> + struct net_device *netdev = pci_get_drvdata(pdev);
> + struct igb_adapter *adapter = netdev_priv(netdev);
> + int i;
> +
> + if (nr_virtfn > 7)
> + return -EINVAL;

Why the check for 7?  Is that the max virtual functions for this card?
Shouldn't that be a define somewhere so it's easier to fix in future
versions of this hardware?  :)

> +
> + if (nr_virtfn) {
> + for (i = 0; i < nr_virtfn; i++) {
> + printk(KERN_INFO "SR-IOV: VF %d is enabled\n", i);

Use dev_info() please, that shows the exact pci device and driver that
emitted the message.

> + my_mac_addr[5] = (unsigned char)i;
> + igb_set_vf_mac(netdev, i, my_mac_addr);
> + igb_set_vf_vmolr(adapter, i);
> + }
> + } else
> + printk(KERN_INFO "SR-IOV is disabled\n");

Is that really true?  (oh, use dev_info as well.)  What happens if you
had called this with "5" and then later with "0", you never destroyed
those existing virtual functions, yet the code does:

> + adapter->vfs_allocated_count = nr_virtfn;

Which makes the driver think they are not present.  What happens when
the driver later goes to shut down?  Are those resources freed up
properly?

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: Using virtio drivers under Windows 2003 R2 SP2 guest

2008-11-26 Thread Martin Maurer
> Subject: Re: Using virtio drivers under Windows 2003 R2 SP2 guest
> 
> On Wed, Nov 26, 2008 at 1:47 PM, carlopmart <[EMAIL PROTECTED]>
> wrote:
> > Hi all,
> >
> >  Recently I have installed a ubuntu server to use as a virtual host. I
> need
> > to install two Windows 2003 R2 SP2 guests on it, and I would like to
> use
> > virtio drivers for networking but I see that these drivers only exists
> for
> > Windows XP and Windows 2000, is it correct?? can I use it for Windows
> 2003?
> 
> I have been using the XP driver in a 2003 VM and it works correctly,
> and IIRC Dor and Avi have said that XP drivers work in 2003.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

I just confirm that the XP driver virtio drivers works also here on win 2003 
without any issues.
(we run several win2003 with exchange in production).

Best Regards,

Martin Maurer

[EMAIL PROTECTED]
http://www.proxmox.com

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2351433 ] unhandled vm exit: 0x80000021 running UnixWare guest

2008-11-26 Thread SourceForge.net
Bugs item #2351433, was opened at 2008-11-26 17:39
Message generated for change (Comment added) made by hughesj
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: intel
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: John Hughes (hughesj)
Assigned to: Nobody/Anonymous (nobody)
Summary: unhandled vm exit: 0x8021 running UnixWare guest

Initial Comment:
CPU: Xeon 3050
Vendor: Intel
Bits: 64
Host distribution: Debian Lenny, kernel kernel 2.6.26-1-amd64
Guest: SCO UnixWare 7.1.1 (SVR5), 32 bits

Running a UnixWare (SVR5) guest with kvm 79,  debian build kernel 
2.6.26-1-amd64 on an Intel Xeon 3050 it crashes during boot with:

$ qemu-system-x86_64 -net nic,model=ne2k_isa -net user -hda UnixWare.img
unhandled vm exit: 0x8021 vcpu_id 0
rax b101 rbx  rcx  rdx 

rsi  rdi  rsp 0ff8 rbp 

r8   r9   r10  r11 

r12  r13  r14  r15 

rip fe6e rflags 00023002
cs 0100 (/ p 1 dpl 0 db 1 s 1 type b l 0 g 1 avl 0)
ds 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
es  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
ss 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
fs  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
gs  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
tr 0148 (c04405c0/2067 p 1 dpl 0 db 0 s 0 type b l 0 g 0 avl 0)
ldt  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
gdt 5020/2cf
idt 52f0/7ff
cr0 8001003b cr2 0 cr3 1005000 cr4 2d4 cr8 0 efer 0
Aborted

--

Comment By: John Hughes (hughesj)
Date: 2008-11-26 17:41

Message:
(Works with -no-kvm).

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2351433 ] unhandled vm exit: 0x80000021 running UnixWare guest

2008-11-26 Thread SourceForge.net
Bugs item #2351433, was opened at 2008-11-26 17:39
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: intel
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: John Hughes (hughesj)
Assigned to: Nobody/Anonymous (nobody)
Summary: unhandled vm exit: 0x8021 running UnixWare guest

Initial Comment:
CPU: Xeon 3050
Vendor: Intel
Bits: 64
Host distribution: Debian Lenny, kernel kernel 2.6.26-1-amd64
Guest: SCO UnixWare 7.1.1 (SVR5), 32 bits

Running a UnixWare (SVR5) guest with kvm 79,  debian build kernel 
2.6.26-1-amd64 on an Intel Xeon 3050 it crashes during boot with:

$ qemu-system-x86_64 -net nic,model=ne2k_isa -net user -hda UnixWare.img
unhandled vm exit: 0x8021 vcpu_id 0
rax b101 rbx  rcx  rdx 

rsi  rdi  rsp 0ff8 rbp 

r8   r9   r10  r11 

r12  r13  r14  r15 

rip fe6e rflags 00023002
cs 0100 (/ p 1 dpl 0 db 1 s 1 type b l 0 g 1 avl 0)
ds 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
es  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
ss 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
fs  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
gs  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
tr 0148 (c04405c0/2067 p 1 dpl 0 db 0 s 0 type b l 0 g 0 avl 0)
ldt  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
gdt 5020/2cf
idt 52f0/7ff
cr0 8001003b cr2 0 cr3 1005000 cr4 2d4 cr8 0 efer 0
Aborted

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: header-sync: fix to work with 2.6.28 kernel

2008-11-26 Thread Avi Kivity

Mark McLoughlin wrote:

If you run header-sync against a kernel which has asm/kvm.h but not
e.g. asm/vmx.h then $(wildcard $(headers-new)) returns a string with
expanded kvm.h path and the vmx.h glob removed. We then pass the
original globs to rsync causing that to fail when vmx.h can't be
found:

  rsync: link_stat ".../asm/vmx*.h" failed: No such file or directory (2)

Fix by passing the expanded paths returned by the wildcard function to
rsync.
  


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-userspace: Cleanup user space NMI injection

2008-11-26 Thread Avi Kivity

Jan Kiszka wrote:

Hm.  For interrupts we need to check, since the interrupt might be
deasserted or masked while the window is closed.

Is there no way that this can happen for NMIs?



In our emulation, but I also think in real life, there is no way to
deassert an NMI. Therefore, our user space API just sets the nmi_pending
flag. Masking of NMIs (due to mov ss etc.) is handled by the NMI
injection code, and this has to happen in kernel anyway as the other NMI
sources (APIC and IOAPIC) do not bother about the NMI windows as well.
  


I think you're right, and applied the patch.  Thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-userspace: fix module build with --kerneldir

2008-11-26 Thread Avi Kivity

Maik Hentsche wrote:
  
If the user specified $kerneldir, it should be in the form 
/lib/modules/$version/build, no? 



Ah sorry, I did not know of this requirement. My kerneldir
is /tmp/linux (I build inside chroot).
  


I took it from the depmod manpage, but if you're building from 
/tmp/linux, clearly it isn't a strict requirement.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-userspace: fix module build with --kerneldir

2008-11-26 Thread Avi Kivity

Jan Kiszka wrote:

Avi Kivity wrote:
  

Maik Hentsche wrote:


Avi Kivity <[EMAIL PROTECTED]> wrote:

 
  

Can't the version be determined directly from kernedir itself?  e.g.
kerneldir is /lib/modules/$version/build?



How do you get $version if you are crosscompiling? In this case the
usual way of asking uname doesn't work. Instead you somehow need to get
the version of the kernel you're building for from the kernel source
provided in --kerneldir. I don't know any better way than evaluating
UTSRELEASE.
  
  

If the user specified $kerneldir, it should be in the form
/lib/modules/$version/build, no?



This is not sufficiently generic. KVM should to accept arbitrary paths
to kernel directories, not just those special links (which could cause
problems anyway if the link is set up for the target and makes no sense
on the build host).

  


Fair enough.  This seems to work:

   awk ' / = / { a[$1] = $3 } END { printf("%s.%s.%s%s\n", 
a["VERSION"], a["PATCHLEVEL"], a["SUBLEVEL"], a["EXTRAVERSION"]) }' 
"$kerneldir/Makefile"


Or alternatively

awk '/Linux kernel version/ { print $NF }' "$kerneldir/.config"

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Using virtio drivers under Windows 2003 R2 SP2 guest

2008-11-26 Thread Jorge Lucángeli Obes
On Wed, Nov 26, 2008 at 1:47 PM, carlopmart <[EMAIL PROTECTED]> wrote:
> Hi all,
>
>  Recently I have installed a ubuntu server to use as a virtual host. I need
> to install two Windows 2003 R2 SP2 guests on it, and I would like to use
> virtio drivers for networking but I see that these drivers only exists for
> Windows XP and Windows 2000, is it correct?? can I use it for Windows 2003?

I have been using the XP driver in a 2003 VM and it works correctly,
and IIRC Dor and Avi have said that XP drivers work in 2003.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Using virtio drivers under Windows 2003 R2 SP2 guest

2008-11-26 Thread carlopmart

Hi all,

 Recently I have installed a ubuntu server to use as a virtual host. I need to 
install two Windows 2003 R2 SP2 guests on it, and I would like to use virtio 
drivers for networking but I see that these drivers only exists for Windows XP 
and Windows 2000, is it correct?? can I use it for Windows 2003?


Many thanks.

--
CL Martinez
carlopmart {at} gmail {d0t} com
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] PCI: allow pci driver to support only dynids

2008-11-26 Thread Chris Wright
* Jean Delvare ([EMAIL PROTECTED]) wrote:
> As a side note, I am curious what PCI driver we do have which has
> driver->probe defined but no driver->id_table. Did you hit an actual
> issue or are you fixing a theoretical one?

Such a driver was patch 2/2.

http://lkml.org/lkml/2008/11/26/11

It's meant to have only dynamic ids, worked fine on 2.6.27 and oopsed
from NULL id-> deref on current git.

thanks,
-chris
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core

2008-11-26 Thread Yu Zhao
This patch integrates the IGB driver with the SR-IOV core. It shows how
the SR-IOV API is used to support the capability. Obviously people does
not need to put much effort to integrate the PF driver with SR-IOV core.
All SR-IOV standard stuff are handled by SR-IOV core and PF driver only
concerns the device specific resource allocation and deallocation once it
gets the necessary information (i.e. number of Virtual Functions) from
the callback function.

---
 drivers/net/igb/igb_main.c |   30 ++
 1 files changed, 30 insertions(+), 0 deletions(-)

diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index bc063d4..b8c7dc6 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -139,6 +139,7 @@ void igb_set_mc_list_pools(struct igb_adapter *, struct 
e1000_hw *, int, u16);
 static int igb_vmm_control(struct igb_adapter *, bool);
 static int igb_set_vf_mac(struct net_device *, int, u8*);
 static void igb_mbox_handler(struct igb_adapter *);
+static int igb_virtual(struct pci_dev *, int);
 #endif
 
 static int igb_suspend(struct pci_dev *, pm_message_t);
@@ -184,6 +185,9 @@ static struct pci_driver igb_driver = {
 #endif
.shutdown = igb_shutdown,
.err_handler = &igb_err_handler,
+#ifdef CONFIG_PCI_IOV
+   .virtual = igb_virtual
+#endif
 };
 
 static int global_quad_port_a; /* global quad port a indication */
@@ -5107,6 +5111,32 @@ void igb_set_mc_list_pools(struct igb_adapter *adapter,
reg_data |= (1 << 25);
wr32(E1000_VMOLR(pool), reg_data);
 }
+
+static int
+igb_virtual(struct pci_dev *pdev, int nr_virtfn)
+{
+   unsigned char my_mac_addr[6] = {0x00, 0xDE, 0xAD, 0xBE, 0xEF, 0xFF};
+   struct net_device *netdev = pci_get_drvdata(pdev);
+   struct igb_adapter *adapter = netdev_priv(netdev);
+   int i;
+
+   if (nr_virtfn > 7)
+   return -EINVAL;
+
+   if (nr_virtfn) {
+   for (i = 0; i < nr_virtfn; i++) {
+   printk(KERN_INFO "SR-IOV: VF %d is enabled\n", i);
+   my_mac_addr[5] = (unsigned char)i;
+   igb_set_vf_mac(netdev, i, my_mac_addr);
+   igb_set_vf_vmolr(adapter, i);
+   }
+   } else
+   printk(KERN_INFO "SR-IOV is disabled\n");
+
+   adapter->vfs_allocated_count = nr_virtfn;
+
+   return 0;
+}
 #endif
 
 /* igb_main.c */
-- 
1.5.4.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[SR-IOV driver example 1/3] PF driver: allocate hardware specific resource

2008-11-26 Thread Yu Zhao
This patch makes the IGB driver allocate hardware resource (rx/tx queues)
for Virtual Functions. All operations in this patch are hardware specific.

---
 drivers/net/igb/Makefile|2 +-
 drivers/net/igb/e1000_82575.c   |1 +
 drivers/net/igb/e1000_82575.h   |   61 
 drivers/net/igb/e1000_defines.h |7 +
 drivers/net/igb/e1000_hw.h  |2 +
 drivers/net/igb/e1000_regs.h|   13 +
 drivers/net/igb/e1000_vf.c  |  223 ++
 drivers/net/igb/igb.h   |   10 +
 drivers/net/igb/igb_main.c  |  604 ++-
 9 files changed, 910 insertions(+), 13 deletions(-)
 create mode 100644 drivers/net/igb/e1000_vf.c

diff --git a/drivers/net/igb/Makefile b/drivers/net/igb/Makefile
index 1927b3f..ab3944c 100644
--- a/drivers/net/igb/Makefile
+++ b/drivers/net/igb/Makefile
@@ -33,5 +33,5 @@
 obj-$(CONFIG_IGB) += igb.o
 
 igb-objs := igb_main.o igb_ethtool.o e1000_82575.o \
-   e1000_mac.o e1000_nvm.o e1000_phy.o
+   e1000_mac.o e1000_nvm.o e1000_phy.o e1000_vf.o
 
diff --git a/drivers/net/igb/e1000_82575.c b/drivers/net/igb/e1000_82575.c
index f5e2e72..bb823ac 100644
--- a/drivers/net/igb/e1000_82575.c
+++ b/drivers/net/igb/e1000_82575.c
@@ -87,6 +87,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw)
case E1000_DEV_ID_82576:
case E1000_DEV_ID_82576_FIBER:
case E1000_DEV_ID_82576_SERDES:
+   case E1000_DEV_ID_82576_QUAD_COPPER:
mac->type = e1000_82576;
break;
default:
diff --git a/drivers/net/igb/e1000_82575.h b/drivers/net/igb/e1000_82575.h
index c1928b5..8c488ab 100644
--- a/drivers/net/igb/e1000_82575.h
+++ b/drivers/net/igb/e1000_82575.h
@@ -170,4 +170,65 @@ struct e1000_adv_tx_context_desc {
 #define E1000_DCA_TXCTRL_CPUID_SHIFT 24 /* Tx CPUID now in the last byte */
 #define E1000_DCA_RXCTRL_CPUID_SHIFT 24 /* Rx CPUID now in the last byte */
 
+#define MAX_NUM_VFS   8
+
+#define E1000_DTXSWC_VMDQ_LOOPBACK_EN (1 << 31)  /* global VF LB enable */
+
+/* Easy defines for setting default pool, would normally be left a zero */
+#define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7
+#define E1000_VT_CTL_DEFAULT_POOL_MASK  (0x7 << 
E1000_VT_CTL_DEFAULT_POOL_SHIFT)
+
+/* Other useful VMD_CTL register defines */
+#define E1000_VT_CTL_DISABLE_DEF_POOL   (1 << 29)
+#define E1000_VT_CTL_VM_REPL_EN (1 << 30)
+
+/* Per VM Offload register setup */
+#define E1000_VMOLR_LPE0x0001 /* Accept Long packet */
+#define E1000_VMOLR_AUPE   0x0100 /* Accept untagged packets */
+#define E1000_VMOLR_BAM0x0800 /* Accept Broadcast packets */
+#define E1000_VMOLR_MPME   0x1000 /* Multicast promiscuous mode */
+#define E1000_VMOLR_STRVLAN0x4000 /* Vlan stripping enable */
+
+#define E1000_P2VMAILBOX_STS   0x0001 /* Initiate message send to VF */
+#define E1000_P2VMAILBOX_ACK   0x0002 /* Ack message recv'd from VF */
+#define E1000_P2VMAILBOX_VFU   0x0004 /* VF owns the mailbox buffer */
+#define E1000_P2VMAILBOX_PFU   0x0008 /* PF owns the mailbox buffer */
+
+#define E1000_VLVF_ARRAY_SIZE 32
+#define E1000_VLVF_VLANID_MASK0x0FFF
+#define E1000_VLVF_POOLSEL_SHIFT  12
+#define E1000_VLVF_POOLSEL_MASK   (0xFF << E1000_VLVF_POOLSEL_SHIFT)
+#define E1000_VLVF_VLANID_ENABLE  0x8000
+
+#define E1000_VFMAILBOX_SIZE   16 /* 16 32 bit words - 64 bytes */
+
+/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the
+ * PF.  The reverse is true if it is E1000_PF_*.
+ * Message ACK's are the value or'd with 0xF000
+ */
+#define E1000_VT_MSGTYPE_ACK  0xF000  /* Messages below or'd with
+   * this are the ACK */
+#define E1000_VT_MSGTYPE_NACK 0xFF00  /* Messages below or'd with
+   * this are the NACK */
+#define E1000_VT_MSGINFO_SHIFT16
+/* bits 23:16 are used for exra info for certain messages */
+#define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT)
+
+#define E1000_VF_MSGTYPE_REQ_MAC  1 /* VF needs to know its MAC */
+#define E1000_VF_MSGTYPE_VFLR 2 /* VF notifies VFLR to PF */
+#define E1000_VF_SET_MULTICAST3 /* VF requests PF to set MC addr */
+#define E1000_VF_SET_VLAN 4 /* VF requests PF to set VLAN */
+#define E1000_VF_SET_LPE  5 /* VF requests PF to set VMOLR.LPE */
+
+s32  e1000_send_mail_to_vf(struct e1000_hw *hw, u32 *msg,
+   u32 vf_number, s16 size);
+s32  e1000_receive_mail_from_vf(struct e1000_hw *hw, u32 *msg,
+u32 vf_number, s16 size);
+void e1000_vmdq_loopback_enable_vf(struct e1000_hw *hw);
+void e1000_vmdq_loopback_disable_vf(struct e1000_hw *hw);
+void e1000_vmdq_replication_enable_vf(struct e1000_hw *hw, u32 enables);
+void e1000_vmdq_replication_disable_vf(struct e1000_hw *hw);
+bool e1000_check_for_pf_ack_vf(struct e1000_hw *hw);
+bool e1000_check_for_pf_mail

[SR-IOV driver example 0/3] introduction

2008-11-26 Thread Yu Zhao
SR-IOV drivers of Intel 82576 NIC are available. There are two parts
of the drivers: Physical Function driver and Virtual Function driver.
The PF driver is based on the IGB driver and is used to control PF to
allocate hardware specific resources and interface with the SR-IOV core.
The VF driver is a new NIC driver that is same as the traditional PCI
device driver. It works in both the host and the guest (Xen and KVM)
environment.

These two drivers are testing versions and they are *only* intended to
show how to use SR-IOV API.

Intel 82576 NIC specification can be found at:
http://download.intel.com/design/network/datashts/82576_Datasheet_v2p1.pdf

[SR-IOV driver example 1/3] PF driver: allocate hardware specific resource
[SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core
[SR-IOV driver example 3/3] VF driver tar ball
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/5] kvm: qemu: virtio: introduce virtqueue_fill() and virtqueue_flush()

2008-11-26 Thread Mark McLoughlin
Split virtqueue_push() into two logical steps - adding an element
to the used ring and notifying the other side of added elements.

This is needed because with the mergeable receive buffers scheme we
will add buffers to the used ring as we copy the packet data into them
but we only want to notify the guest of the new buffers once all the
packet buffers are available.

Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]>
---
 qemu/hw/virtio.c |   23 ++-
 qemu/hw/virtio.h |3 +++
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index fe0a120..18ad3ed 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -107,19 +107,32 @@ static void virtqueue_init(VirtQueue *vq, void *p)
 vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned 
long)&vq->vring.avail->ring[vq->vring.num]);
 }
 
-void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
-   unsigned int len)
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+   unsigned int len, unsigned int idx)
 {
 VRingUsedElem *used;
 
+idx += vq->vring.used->idx;
+
 /* Get a pointer to the next entry in the used ring. */
-used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
+used = &vq->vring.used->ring[idx % vq->vring.num];
 used->id = elem->index;
 used->len = len;
+}
+
+void virtqueue_flush(VirtQueue *vq, unsigned int count)
+{
 /* Make sure buffer is written before we update index. */
 wmb();
-vq->vring.used->idx++;
-vq->inuse--;
+vq->vring.used->idx += count;
+vq->inuse -= count;
+}
+
+void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
+   unsigned int len)
+{
+virtqueue_fill(vq, elem, len, 0);
+virtqueue_flush(vq, 1);
 }
 
 static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h
index 0dcedbf..87a15cc 100644
--- a/qemu/hw/virtio.h
+++ b/qemu/hw/virtio.h
@@ -139,6 +139,9 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int 
queue_size,
 
 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len);
+void virtqueue_flush(VirtQueue *vq, unsigned int count);
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+   unsigned int len, unsigned int idx);
 
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
 
-- 
1.5.4.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/5] kvm: qemu: virtio-net: add a new virtio-net receive buffer scheme

2008-11-26 Thread Mark McLoughlin
Currently, in order to receive large packets, the guest must allocate
max-sized packet buffers and pass them to the host. Each of these
max-sized packets occupy 20 ring entries, which means we can only
transfer a maximum of 12 packets in a single batch with a 256 entry
ring.

When receiving packets from external networks, we only receive MTU
sized packets and so the throughput observed is throttled by the
number of packets the ring can hold.

Implement the VIRTIO_NET_F_MRG_RXBUF feature to let guests know that
we can merge smaller buffers together in order to handle large packets.

This scheme allows us to be efficient in our use of ring entries
while still supporting large packets. Benchmarking using netperf from
an external machine to a guest over a 10Gb/s network shows a 100%
improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark
with GSO disabled on the host side, throughput was seen to increase
from 700Mb/s to 1.7Gb/s.

Based on a patch from Herbert Xu.

Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]>
---
 qemu/hw/virtio-net.c |  133 +++--
 qemu/hw/virtio.c |   32 
 qemu/hw/virtio.h |1 +
 3 files changed, 139 insertions(+), 27 deletions(-)

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 2a52536..b5d5f9e 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -34,9 +34,13 @@
 #define VIRTIO_NET_F_HOST_TSO6 12  /* Host can handle TSOv6 in. */
 #define VIRTIO_NET_F_HOST_ECN  13  /* Host can handle TSO[6] w/ ECN in. */
 #define VIRTIO_NET_F_HOST_UFO  14  /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF 15  /* Host can merge receive buffers. */
 
 #define TX_TIMER_INTERVAL 15 /* 150 us */
 
+/* Maximum packet size we can receive from tap device: header + 64k */
+#define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
+
 /* The config defining mac address (6 bytes) */
 struct virtio_net_config
 {
@@ -61,6 +65,14 @@ struct virtio_net_hdr
 uint16_t csum_offset;
 };
 
+/* This is the version of the header to use when the MRG_RXBUF
+ * feature has been negotiated. */
+struct virtio_net_hdr_mrg_rxbuf
+{
+struct virtio_net_hdr hdr;
+uint16_t num_buffers;   /* Number of merged rx buffers */
+};
+
 typedef struct VirtIONet
 {
 VirtIODevice vdev;
@@ -70,6 +82,7 @@ typedef struct VirtIONet
 VLANClientState *vc;
 QEMUTimer *tx_timer;
 int tx_timer_active;
+int mergeable_rx_bufs;
 } VirtIONet;
 
 /* TODO
@@ -106,6 +119,7 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev)
features |= (1 << VIRTIO_NET_F_HOST_TSO4);
features |= (1 << VIRTIO_NET_F_HOST_TSO6);
features |= (1 << VIRTIO_NET_F_HOST_ECN);
+   features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
/* Kernel can't actually handle UFO in software currently. */
 }
 
@@ -117,6 +131,8 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint32_t features)
 VirtIONet *n = to_virtio_net(vdev);
 VLANClientState *host = n->vc->vlan->first_client;
 
+n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
+
 if (!tap_has_vnet_hdr(host) || !host->set_offload)
return;
 
@@ -145,7 +161,9 @@ static int virtio_net_can_receive(void *opaque)
!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return 0;
 
-if (n->rx_vq->vring.avail->idx == n->rx_vq->last_avail_idx) {
+if (n->rx_vq->vring.avail->idx == n->rx_vq->last_avail_idx ||
+   (n->mergeable_rx_bufs &&
+!virtqueue_avail_bytes(n->rx_vq, VIRTIO_NET_MAX_BUFSIZE, 0))) {
n->rx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
return 0;
 }
@@ -197,41 +215,90 @@ static int iov_fill(struct iovec *iov, int iovcnt, const 
void *buf, int count)
 return offset;
 }
 
-static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
+static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
+ const void *buf, int size, int hdr_len)
 {
-VirtIONet *n = opaque;
-VirtQueueElement elem;
-struct virtio_net_hdr *hdr;
+struct virtio_net_hdr *hdr = iov[0].iov_base;
 int offset;
-int total;
-
-if (virtqueue_pop(n->rx_vq, &elem) == 0)
-   return;
-
-if (elem.in_num < 1 || elem.in_sg[0].iov_len != sizeof(*hdr)) {
-   fprintf(stderr, "virtio-net header not in first element\n");
-   exit(1);
-}
 
-hdr = (void *)elem.in_sg[0].iov_base;
 hdr->flags = 0;
 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
 
-offset = 0;
-total = sizeof(*hdr);
-
 if (tap_has_vnet_hdr(n->vc->vlan->first_client)) {
memcpy(hdr, buf, sizeof(*hdr));
-   offset += total;
-work_around_broken_dhclient(hdr, buf + offset, size - offset);
+   offset = sizeof(*hdr);
+   work_around_broken_dhclient(hdr, buf + offset, size - offset);
 }
 
-/* copy in packet.  ugh */
-total += iov_fill(&elem.in_sg[1], elem.in_num 

[PATCH 1/5] kvm: qemu: virtio: move virtqueue_next_desc() around

2008-11-26 Thread Mark McLoughlin
virtio_next_desc() is only used in virtqueue_pop(), so move them
alongside one another.

Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]>
---
 qemu/hw/virtio.c |   30 +++---
 1 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index 8fac354..fe0a120 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -107,6 +107,21 @@ static void virtqueue_init(VirtQueue *vq, void *p)
 vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned 
long)&vq->vring.avail->ring[vq->vring.num]);
 }
 
+void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
+   unsigned int len)
+{
+VRingUsedElem *used;
+
+/* Get a pointer to the next entry in the used ring. */
+used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
+used->id = elem->index;
+used->len = len;
+/* Make sure buffer is written before we update index. */
+wmb();
+vq->vring.used->idx++;
+vq->inuse--;
+}
+
 static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
 {
 unsigned int next;
@@ -126,21 +141,6 @@ static unsigned virtqueue_next_desc(VirtQueue *vq, 
unsigned int i)
 return next;
 }
 
-void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
-   unsigned int len)
-{
-VRingUsedElem *used;
-
-/* Get a pointer to the next entry in the used ring. */
-used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
-used->id = elem->index;
-used->len = len;
-/* Make sure buffer is written before we update index. */
-wmb();
-vq->vring.used->idx++;
-vq->inuse--;
-}
-
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 {
 unsigned int i, head;
-- 
1.5.4.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/5] kvm: qemu: virtio: split some helpers out of virtqueue_pop()

2008-11-26 Thread Mark McLoughlin
The mergeable receive buffer scheme will introduce a new function
which peeks at how much buffer space is available in the queue.

Split out some helper functions from virtqueue_pop() for that purpose.

Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]>
---
 qemu/hw/virtio.c |   45 +
 1 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index 18ad3ed..42022d4 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -135,6 +135,33 @@ void virtqueue_push(VirtQueue *vq, const VirtQueueElement 
*elem,
 virtqueue_flush(vq, 1);
 }
 
+static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
+{
+uint16_t num_heads = vq->vring.avail->idx - idx;
+
+/* Check it isn't doing very strange things with descriptor numbers. */
+if (num_heads > vq->vring.num)
+   errx(1, "Guest moved used index from %u to %u",
+idx, vq->vring.avail->idx);
+
+return num_heads;
+}
+
+static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
+{
+unsigned int head;
+
+/* Grab the next descriptor number they're advertising, and increment
+ * the index we've seen. */
+head = vq->vring.avail->ring[idx % vq->vring.num];
+
+/* If their number is silly, that's a fatal mistake. */
+if (head >= vq->vring.num)
+   errx(1, "Guest says index %u is available", head);
+
+return head;
+}
+
 static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
 {
 unsigned int next;
@@ -158,27 +185,13 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 {
 unsigned int i, head;
 
-/* Check it isn't doing very strange things with descriptor numbers. */
-if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num)
-   errx(1, "Guest moved used index from %u to %u",
-vq->last_avail_idx, vq->vring.avail->idx);
-
-/* If there's nothing new since last we looked, return invalid. */
-if (vq->vring.avail->idx == vq->last_avail_idx)
+if (!virtqueue_num_heads(vq, vq->last_avail_idx))
return 0;
 
-/* Grab the next descriptor number they're advertising, and increment
- * the index we've seen. */
-head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num];
-
-/* If their number is silly, that's a fatal mistake. */
-if (head >= vq->vring.num)
-   errx(1, "Guest says index %u is available", head);
-
 /* When we start there are none of either input nor output. */
 elem->out_num = elem->in_num = 0;
 
-i = head;
+i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
 do {
struct iovec *sg;
 
-- 
1.5.4.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/5] kvm: qemu: virtio_net: add support for mergeable rx buffers

2008-11-26 Thread Mark McLoughlin

Hi,
The mergeable receive buffers scheme has been in the Linux
virtio_net driver in net-next-2.6 for a while now, so it's probably
safe to assume what's there now is the final iteration of the ABI.

The following patches implement support for the scheme in
KVM. Only the 5/5 patch contains functional changes.

Changes since last time:

  + We now peak at how much buffer space is actually available in the
ring and no longer assume that buffers are at least 4k

  + The new header is now 'struct virtio_net_hdr_mrg_rxbuf' rather
than 'struct virtio_net_hdr2'

  + The new header no longer is padded to 32 bytes

Cheers,
Mark.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/5] kvm: qemu: virtio-net: split iov_fill() out from virtio_net_receive()

2008-11-26 Thread Mark McLoughlin
Simplifies the current code, but more especially, simplifies the
implementation of the mergeable receive buffers scheme.

Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]>
---
 qemu/hw/virtio-net.c |   27 ++-
 1 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index bc2ede6..2a52536 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -182,12 +182,27 @@ static void work_around_broken_dhclient(struct 
virtio_net_hdr *hdr,
 }
 }
 
+static int iov_fill(struct iovec *iov, int iovcnt, const void *buf, int count)
+{
+int offset, i;
+
+offset = i = 0;
+while (offset < count && i < iovcnt) {
+   int len = MIN(iov[i].iov_len, count - offset);
+   memcpy(iov[i].iov_base, buf + offset, len);
+   offset += len;
+   i++;
+}
+
+return offset;
+}
+
 static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
 {
 VirtIONet *n = opaque;
 VirtQueueElement elem;
 struct virtio_net_hdr *hdr;
-int offset, i;
+int offset;
 int total;
 
 if (virtqueue_pop(n->rx_vq, &elem) == 0)
@@ -212,14 +227,8 @@ static void virtio_net_receive(void *opaque, const uint8_t 
*buf, int size)
 }
 
 /* copy in packet.  ugh */
-i = 1;
-while (offset < size && i < elem.in_num) {
-   int len = MIN(elem.in_sg[i].iov_len, size - offset);
-   memcpy(elem.in_sg[i].iov_base, buf + offset, len);
-   offset += len;
-   total += len;
-   i++;
-}
+total += iov_fill(&elem.in_sg[1], elem.in_num - 1,
+  buf + offset, size - offset);
 
 /* signal other side */
 virtqueue_push(n->rx_vq, &elem, total);
-- 
1.5.4.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2215532 ] SMP IA32e RHEL5.1 guest can not boot up

2008-11-26 Thread SourceForge.net
Bugs item #2215532, was opened at 2008-11-01 22:58
Message generated for change (Comment added) made by jiajun
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2215532&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
>Status: Closed
>Resolution: Fixed
Priority: 5
Private: No
Submitted By: Jiajun Xu (jiajun)
Assigned to: Nobody/Anonymous (nobody)
Summary: SMP IA32e RHEL5.1 guest can not boot up

Initial Comment:
Against kernel.git:20d09323d7b91819e35194773ea000e9d79badd2
user-space.git: ccabb53f1a84f11896c840391f7ade3756214086, SMP IA32e RHEL5/RHEL4 
Guest can not boot up.

UP IA32e Guest can boot up well. The issue disappeare if nmi_watchdog=0 in 
guest kernel parameter.

--

>Comment By: Jiajun Xu (jiajun)
Date: 2008-11-26 06:49

Message:
Verified on kernel.git: 0f7d3ee64e9324b265d5b988c2e81ac1784fc59d
userspace.git: 6e63ba19476753595e508713eb9daf559dc50bf6, the issue is
fixed.

--

Comment By: Avi Kivity (avik)
Date: 2008-11-23 11:12

Message:
Should be fixed now.  Please retest.

--

Comment By: Jiajun Xu (jiajun)
Date: 2008-11-04 01:21

Message:
File Added: config-2.6.18-53.el5

--

Comment By: Jiajun Xu (jiajun)
Date: 2008-11-04 01:20

Message:
Please see attachement, I uploaded all the call trace messages.
I also attched my RHEL5.1 config. The kernel version is 2.6.18-53.
File Added: call_trace

--

Comment By: Jan Kiszka (kiszka)
Date: 2008-11-04 00:08

Message:
Please use a (virtual) serial console or a netconsole to catch the guest
output. The screenshot only tells the tail of the report.

Is there a chance to get my hands on some image to reproduce? I don't have
that RHEL version at hand. Which .config does RHEL use? Which kernel
version? Any special patches that may make a difference?

--

Comment By: Jiajun Xu (jiajun)
Date: 2008-11-03 23:34

Message:
You can find snapshot of guest call trace in attachement. 
Guest call trace after loading kernel and start udev. There are a lot of
messages printed on guest console, most are related to NMI.
There is no any error message on host console.

--

Comment By: Jan Kiszka (kiszka)
Date: 2008-11-02 01:31

Message:
Could you be more specific: Where does it hang? What does the guest kernel
console say? Also, what is the nmi_watchdog default otherwise? And is there
any message in the host kernel's log?

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2215532&group_id=180599
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 2/2] KVM: Change to use new APIs for KVM VT-d

2008-11-26 Thread Han, Weidong
Avi Kivity wrote:
> Han, Weidong wrote:
>> This patch changes to use new APIs for KVM VT-d, and add device
>> deassignment for hotplug. 
>> 
>> index 44fd7fa..558bc32 100644
>> --- a/include/linux/kvm.h
>> +++ b/include/linux/kvm.h
>> @@ -425,6 +425,8 @@ struct kvm_trace_rec {
>> struct kvm_assigned_pci_dev)
>>  #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
>>  struct kvm_assigned_irq)
>> +#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x71, \
>> + struct kvm_assigned_pci_dev)
>> 
> 
> Need to advertise using KVM_CAP_DEVICE_DEASSIGNMENT.

Okay, will add KVM_CAP_DEVICE_DEASSIGNMENT. Thanks.

Regards,
Weidong

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/2] VT-d: Support multiple device assignment for KVM

2008-11-26 Thread Han, Weidong
Avi Kivity wrote:
> Han, Weidong wrote:
>> In order to support multiple device assignment for KVM, this patch
>> does following main changes: 
>>- extend dmar_domain to own multiple devices from different
>> iommus, use a bitmap of iommus to replace iommu pointer in
>> dmar_domain.  
>>- add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d
>> usage. Many functions (e.g. intel_map_single() and
>> intel_unmap_single()) won't be used by KVM VT-d. Let them return
>> directly when this flag is set.   
>> 
> 
> 
> This seems brittle.  An API that has some functions shorted out
> depending on some flag is hard to understand and use.

This flag just helps identify kvm VT-d usage, and let kvm VT-d APIs reuse 
native VT-d code, it needn't duplicate code for kvm VT-d APIs, and it won't 
impact existed native VT-d code. 

> 
> We should either implement the functions, or split the API into a
> basic version that talks only to one device, and an expanded versions
> that talks to multiple devices, and is implemented by the using the
> lower level API.  This may require more changes due to the need to
> share io pagetables.

The expanded versions that supports multiple devices will need to change 
dmar_domain, this will cause lots of changes, almost duplicate the main 
functions.

> 
>>- "SAGAW" capability may be different across iommus, that's to
>> say the VT-d page table levels may be different among iommus. This
>> patch uses a defaut agaw, and skip top levels of page tables for
>> iommus which have smaller agaw than default.   
>> 
> 
> Neat trick.
> 
>>  void free_dmar_iommu(struct intel_iommu *iommu)
>>  {
>> struct dmar_domain *domain;
>> @@ -960,7 +1054,14 @@ void free_dmar_iommu(struct intel_iommu *iommu)
>> for (; i < cap_ndoms(iommu->cap); ) {
>> domain = iommu->domains[i];
>> clear_bit(i, iommu->domain_ids);
>> -   domain_exit(domain);
>> +
>> +   if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
>> +   /* domain may be referenced by other iommus
>> */ +   if (domain_in_other_iommus(domain, iommu)
>> == 0) +   domain_exit(domain); +
>> } +   else
>> +   domain_exit(domain);
>> 
> 
> Things like this are best expressed using reference counts, which
> removes the need for the test as well.

will add a reference count for it.

> 
>> +
>> +   /* Skip top levels of page tables for
>> +* iommu which has less agaw than default. +
>> */ +   for (agaw = domain->agaw; agaw != iommu->agaw;
>> agaw--) { +   pgd =
>> phys_to_virt(dma_pte_addr(*pgd)); +   if
>> (!dma_pte_present(*pgd)) { +  
>> spin_unlock_irqrestore(&iommu->lock, flags); +  
>> return -ENOMEM; +   }
>> +   }
>> +   }
>> 
> 
> Need to check that the agaw is sufficient for mapped memory (and when
> adding a device or mapping more memory, need a similar check).

I think we can check the smallest agaw across iommus for mapped memory when 
mapping memory.

Regards,
Weidong

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] kvm-s390: Fix memory leak of vcpu->run

2008-11-26 Thread Christian Borntraeger
From: Christian Borntraeger <[EMAIL PROTECTED]>

The s390 backend of kvm never calls kvm_vcpu_uninit. This causes
a memory leak of vcpu->run pages.
Lets call kvm_vcpu_uninit in kvm_arch_vcpu_destroy to free
the vcpu->run. 

Signed-off-by: Christian Borntraeger <[EMAIL PROTECTED]>
Acked-by: Carsten Otte <[EMAIL PROTECTED]>
---
 arch/s390/kvm/kvm-s390.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Index: kvm/arch/s390/kvm/kvm-s390.c
===
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -198,6 +198,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vc
 {
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
free_page((unsigned long)(vcpu->arch.sie_block));
+   kvm_vcpu_uninit(vcpu);
kfree(vcpu);
 }
 
@@ -230,8 +231,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
-   /* kvm common code refers to this, but does'nt call it */
-   BUG();
+   /* Nothing todo */
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] kvm-s390: Fix refcounting and allow module unload

2008-11-26 Thread Christian Borntraeger
From: Christian Borntraeger <[EMAIL PROTECTED]>

Currently it is impossible to unload the kvm module on s390.
This patch fixes kvm_arch_destroy_vm to release all cpus.
This make it possible to unload the module.

In addition we stop messing with the module refcount in arch code.

Signed-off-by: Christian Borntraeger <[EMAIL PROTECTED]>
Acked-by: Carsten Otte <[EMAIL PROTECTED]>
---
 arch/s390/kvm/kvm-s390.c |   35 +--
 1 file changed, 21 insertions(+), 14 deletions(-)

Index: kvm/arch/s390/kvm/kvm-s390.c
===
--- kvm.orig/arch/s390/kvm/kvm-s390.c
+++ kvm/arch/s390/kvm/kvm-s390.c
@@ -185,8 +185,6 @@ struct kvm *kvm_arch_create_vm(void)
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
 
-   try_module_get(THIS_MODULE);
-
return kvm;
 out_nodbf:
free_page((unsigned long)(kvm->arch.sca));
@@ -196,13 +194,32 @@ out_nokvm:
return ERR_PTR(rc);
 }
 
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+   VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+   free_page((unsigned long)(vcpu->arch.sie_block));
+   kfree(vcpu);
+}
+
+static void kvm_free_vcpus(struct kvm *kvm)
+{
+   unsigned int i;
+
+   for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+   if (kvm->vcpus[i]) {
+   kvm_arch_vcpu_destroy(kvm->vcpus[i]);
+   kvm->vcpus[i] = NULL;
+   }
+   }
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-   debug_unregister(kvm->arch.dbf);
+   kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
free_page((unsigned long)(kvm->arch.sca));
+   debug_unregister(kvm->arch.dbf);
kfree(kvm);
-   module_put(THIS_MODULE);
 }
 
 /* Section: vcpu related */
@@ -308,8 +325,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st
VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
 vcpu->arch.sie_block);
 
-   try_module_get(THIS_MODULE);
-
return vcpu;
 out_free_cpu:
kfree(vcpu);
@@ -317,14 +332,6 @@ out_nomem:
return ERR_PTR(rc);
 }
 
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-   VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
-   free_page((unsigned long)(vcpu->arch.sie_block));
-   kfree(vcpu);
-   module_put(THIS_MODULE);
-}
-
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
/* kvm common code refers to this, but never calls it */
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] two refcount and leak fixes for kvm on s390

2008-11-26 Thread Christian Borntraeger
Hello Avi,

here are two fixes for kvm on s390.

[PATCH 1/2] kvm-s390: Fix refcounting and allow module unload
[PATCH 2/2] kvm-s390: Fix memory leak of vcpu->run

None of the fixed problems is severe. So I guess this should be scheduled for 
the next merge window. Thanks

Christian


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] markers: comment usage of marker_synchronize_unregister()

2008-11-26 Thread Mathieu Desnoyers
* Wu Fengguang ([EMAIL PROTECTED]) wrote:
> Add more comments to marker_synchronize_unregister() in order to
> reduce the chance of misusing.
> 
> Based on comments from Lai Jiangshan <[EMAIL PROTECTED]>.
> 
> Cc: Lai Jiangshan <[EMAIL PROTECTED]>
> Cc: Mathieu Desnoyers <[EMAIL PROTECTED]>
> Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
> ---
> 
> I'm still not sure about the last sentence. Can anyone clarify on
> this? Thanks!
> 
> diff --git a/include/linux/marker.h b/include/linux/marker.h
> index 889196c..89ce1b8 100644
> --- a/include/linux/marker.h
> +++ b/include/linux/marker.h
> @@ -164,6 +164,12 @@ extern void *marker_get_private_data(const char *name, 
> marker_probe_func *probe,
>   * marker_synchronize_unregister must be called between the last marker probe
>   * unregistration and the end of module exit to make sure there is no caller
>   * executing a probe when it is freed.
> + *
> + * It must be called _also_ between unregistration and destruction the data
> + * that unregistration-ed probes need to make sure there is no caller 
> executing
> + * a probe when it's data is destroyed.

it's -> its

And the way it's written, this last sentence is a bit misleading. One
might think that the synchronize_unregister has to be called two, when
in fact it just has to be called once, but it must be called at a moment
in time between unregister and free of any resource used by the probes,
including the code which is removed by module unload.

> + *
> + * It works reliably only when all probe routines do not sleep and 
> reschedule.

Per definition, preemption is disabled around marker probe execution, so
I don't see why we should add this last sentence ?

Mathieu

>   */
>  #define marker_synchronize_unregister() synchronize_sched()
>  

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Host<->guest channel interface advice needed

2008-11-26 Thread Gleb Natapov
On Wed, Nov 26, 2008 at 04:07:01PM +0300, Evgeniy Polyakov wrote:
> On Wed, Nov 26, 2008 at 02:39:19PM +0200, Gleb Natapov ([EMAIL PROTECTED]) 
> wrote:
> > The interfaces that are being considered are netlink socket (only datagram
> > semantics, linux specific), new socket family or character device with
> > different minor number for each channel. Which one better suits for
> > the purpose?  Is there other kind of interface to consider? New socket
> > family looks like a good choice, but it would be nice to hear other
> > opinions before starting to work on it.
> 
> What about X (or whatever else) protocol running over host-guest network
> device, which are in the kernel already?
> 
I should have mentioned that in my original mail. We don't want to
use IP stack for communication between host and guest for variety of
reasons. User of the VM may interfere with our communication by mis
configuring firewall for instance (and he/she may even not be aware
that an OS running inside a VM). We also want be able to communicate
with agent inside a guest even when guest's network is not yet configured.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Host<->guest channel interface advice needed

2008-11-26 Thread Arnd Bergmann
On Wednesday 26 November 2008, Gleb Natapov wrote:
> The interfaces that are being considered are netlink socket (only datagram
> semantics, linux specific), new socket family or character device with
> different minor number for each channel. Which one better suits for
> the purpose?  Is there other kind of interface to consider? New socket
> family looks like a good choice, but it would be nice to hear other
> opinions before starting to work on it.

I think a socket and a pty both look reasonable here, but one important
aspect IMHO is that you only need a new kernel driver for the guest, if
you just use the regular pty support or Unix domain sockets in the host.

Obviously, there needs to be some control over permissions, as a guest
most not be able to just open any socket or pty of the host, so a
reasonable approach might be that the guest can only create a socket
or pty that can be opened by the host, but not vice versa. Alternatively,
you create the socket/pty in host userspace and then allow passing that
down into the guest, which creates a virtio device from it.

Arnd <><
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] markers: comment usage of marker_synchronize_unregister()

2008-11-26 Thread Wu Fengguang
On Wed, Nov 26, 2008 at 02:46:08PM +0200, Mathieu Desnoyers wrote:
> * Wu Fengguang ([EMAIL PROTECTED]) wrote:
> > Add more comments to marker_synchronize_unregister() in order to
> > reduce the chance of misusing.
> > 
> > Based on comments from Lai Jiangshan <[EMAIL PROTECTED]>.
> > 
> > Cc: Lai Jiangshan <[EMAIL PROTECTED]>
> > Cc: Mathieu Desnoyers <[EMAIL PROTECTED]>
> > Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
> > ---
> > 
> > I'm still not sure about the last sentence. Can anyone clarify on
> > this? Thanks!
> > 
> > diff --git a/include/linux/marker.h b/include/linux/marker.h
> > index 889196c..89ce1b8 100644
> > --- a/include/linux/marker.h
> > +++ b/include/linux/marker.h
> > @@ -164,6 +164,12 @@ extern void *marker_get_private_data(const char *name, 
> > marker_probe_func *probe,
> >   * marker_synchronize_unregister must be called between the last marker 
> > probe
> >   * unregistration and the end of module exit to make sure there is no 
> > caller
> >   * executing a probe when it is freed.
> > + *
> > + * It must be called _also_ between unregistration and destruction the data
> > + * that unregistration-ed probes need to make sure there is no caller 
> > executing
> > + * a probe when it's data is destroyed.
> 
> it's -> its
> 
> And the way it's written, this last sentence is a bit misleading. One
> might think that the synchronize_unregister has to be called two, when
> in fact it just has to be called once, but it must be called at a moment
> in time between unregister and free of any resource used by the probes,
> including the code which is removed by module unload.
> 
> > + *
> > + * It works reliably only when all probe routines do not sleep and 
> > reschedule.
> 
> Per definition, preemption is disabled around marker probe execution, so
> I don't see why we should add this last sentence ?

Thanks, your reminder dismissed my confusion on this last sentence :-)

Updated patch according to your helpful comments.

Thank you,
Fengguang
---
markers: comment usage of marker_synchronize_unregister()

Add more comments to marker_synchronize_unregister() in order to
reduce the chance of misusing.

Based on comments from Lai Jiangshan and Mathieu Desnoyers.

Cc: Lai Jiangshan <[EMAIL PROTECTED]>
Cc: Mathieu Desnoyers <[EMAIL PROTECTED]>
Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
---
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c..32ce4f2 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -162,8 +162,10 @@ extern void *marker_get_private_data(const char *name, 
marker_probe_func *probe,
 
 /*
  * marker_synchronize_unregister must be called between the last marker probe
- * unregistration and the end of module exit to make sure there is no caller
- * executing a probe when it is freed.
+ * unregistration and the first one of
+ * - the end of module exit
+ * - the free of any resource used by the probes
+ * to ensure the code and data are all valid for any possibly running probes.
  */
 #define marker_synchronize_unregister() synchronize_sched()
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Host<->guest channel interface advice needed

2008-11-26 Thread Evgeniy Polyakov
Hi Gleb.

On Wed, Nov 26, 2008 at 02:39:19PM +0200, Gleb Natapov ([EMAIL PROTECTED]) 
wrote:
> The interfaces that are being considered are netlink socket (only datagram
> semantics, linux specific), new socket family or character device with
> different minor number for each channel. Which one better suits for
> the purpose?  Is there other kind of interface to consider? New socket
> family looks like a good choice, but it would be nice to hear other
> opinions before starting to work on it.

What about X (or whatever else) protocol running over host-guest network
device, which are in the kernel already?

-- 
Evgeniy Polyakov
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Patch 4/5] x86_emulator: add the assembler code for three operands

2008-11-26 Thread Avi Kivity

Guillaume Thouvenin wrote:

It's the only code with shld instruction. I don't see how you can have
three different shld instructions here. I'm sure that I'm missing
something here because for me, when we emulate the shld instruction, the
code produced is the same. I mean that src.val and dst.val are always
unsigned long and I don't see why register size are important here. In
fact I don't understand why we need to use the switch ((_dst).bytes)
for the other emulations.
  


The flags vary according to operand size.  For example, shld $1, %bx, 
%ax where %ax == 0x8000 will set eflags.cf, but shld $1, %ebx, %eax , 
with the same arguments, will not.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Weekly KVM Test report, kernel 30d95f ... userspace fc94d1 ...

2008-11-26 Thread Avi Kivity

Xu, Jiajun wrote:

The call trace messages in guest:
###
Kernel BUG at block/elevator.c:560
invalid opcode:  [1] SMP 
last sysfs file: /block/hda/removable
  


This suggests something happened to /dev/hda.  Could be a timeout or 
something.  Are there any messages in the log before the BUG?



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Host<->guest channel interface advice needed

2008-11-26 Thread Gleb Natapov
Hello,

 I'd like to ask what would be the best user space interface for generic
guest<->host communication channel. The channel will be used to pass
mouse events to/from a guest or by managements software to communicate
with agents running in a guests or for something similar.

The interfaces that are being considered are netlink socket (only datagram
semantics, linux specific), new socket family or character device with
different minor number for each channel. Which one better suits for
the purpose?  Is there other kind of interface to consider? New socket
family looks like a good choice, but it would be nice to hear other
opinions before starting to work on it.

Thanks,

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] markers: comment usage of marker_synchronize_unregister()

2008-11-26 Thread Wu Fengguang
Add more comments to marker_synchronize_unregister() in order to
reduce the chance of misusing.

Based on comments from Lai Jiangshan <[EMAIL PROTECTED]>.

Cc: Lai Jiangshan <[EMAIL PROTECTED]>
Cc: Mathieu Desnoyers <[EMAIL PROTECTED]>
Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
---

I'm still not sure about the last sentence. Can anyone clarify on
this? Thanks!

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c..89ce1b8 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -164,6 +164,12 @@ extern void *marker_get_private_data(const char *name, 
marker_probe_func *probe,
  * marker_synchronize_unregister must be called between the last marker probe
  * unregistration and the end of module exit to make sure there is no caller
  * executing a probe when it is freed.
+ *
+ * It must be called _also_ between unregistration and destruction the data
+ * that unregistration-ed probes need to make sure there is no caller executing
+ * a probe when it's data is destroyed.
+ *
+ * It works reliably only when all probe routines do not sleep and reschedule.
  */
 #define marker_synchronize_unregister() synchronize_sched()
 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-userspace: fix module build with --kerneldir

2008-11-26 Thread Jan Kiszka
Avi Kivity wrote:
> Maik Hentsche wrote:
>> Avi Kivity <[EMAIL PROTECTED]> wrote:
>>
>>  
>>> Can't the version be determined directly from kernedir itself?  e.g.
>>> kerneldir is /lib/modules/$version/build?
>>> 
>>
>> How do you get $version if you are crosscompiling? In this case the
>> usual way of asking uname doesn't work. Instead you somehow need to get
>> the version of the kernel you're building for from the kernel source
>> provided in --kerneldir. I don't know any better way than evaluating
>> UTSRELEASE.
>>   
> 
> If the user specified $kerneldir, it should be in the form
> /lib/modules/$version/build, no?

This is not sufficiently generic. KVM should to accept arbitrary paths
to kernel directories, not just those special links (which could cause
problems anyway if the link is set up for the target and makes no sense
on the build host).

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2 ES-OS
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-userspace: fix module build with --kerneldir

2008-11-26 Thread Maik Hentsche
(Sorry, I accidentally sent this mail without CC before. Resend with
correct CC list.)

Avi Kivity <[EMAIL PROTECTED]> wrote:

> If the user specified $kerneldir, it should be in the form 
> /lib/modules/$version/build, no? 

Ah sorry, I did not know of this requirement. My kerneldir
is /tmp/linux (I build inside chroot).

so long
Maik

-- 
   \   AMD Saxony Limited Liability Company & Co. KG
 Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
  System   |  Register Court Dresden: HRA 4896
 Research  |  General Partner authorized to represent:
  Center   | AMD Saxony LLC (Wilmington, Delaware, US)
   / General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe,
Thomas McCoy


signature.asc
Description: PGP signature


Re: [PATCH] Prevent trace call into unloaded module text

2008-11-26 Thread Wu Fengguang
On Wed, Nov 26, 2008 at 07:46:19PM +0800, Wu Fengguang wrote:
> On Wed, Nov 26, 2008 at 01:17:55PM +0200, Avi Kivity wrote:
> > Wu Fengguang wrote:
> > > Add marker_synchronize_unregister() before module unloading.
> > > This prevents possible trace calls into unloaded module text.
> > >
> > > Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
> > > ---
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index a87f45e..64f38b3 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -2102,5 +2102,6 @@ void kvm_exit(void)
> > >   kvm_arch_exit();
> > >   kvm_exit_debug();
> > >   __free_page(bad_page);
> > > + marker_synchronize_unregister();
> > >  }
> > >  EXPORT_SYMBOL_GPL(kvm_exit);
> > >   
> > 
> > What about kvm-intel.ko and kvm-amd.ko?  They also contain markers.
> 
> vmx_exit and svm_exit() all calls into kvm_exit(), so they have been
> handled in a common way.
> 
> > (and, why doesn't module unload do this automatically?)
> 
> Maybe most modules don't need it for now?

OK I got a better answer: because marker_synchronize_unregister() must
be called after marker_probe_unregister() calls and the tear down of
any private data the trace functions rely on. So there are no
automatic way.

Below is the updated patch.

Thanks,
Fengguang
---
Prevent trace call into torn down text or data

Add marker_synchronize_unregister() immediately after probe unregisters.
This prevents possible trace calls into unloaded module text, or the
trace functions accessing invalidated data.

Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
---
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index 41dcc84..f598744 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -252,6 +252,7 @@ void kvm_trace_cleanup(void)
struct kvm_trace_probe *p = &kvm_trace_probes[i];
marker_probe_unregister(p->name, p->probe_func, p);
}
+   marker_synchronize_unregister();
 
relay_close(kt->rchan);
debugfs_remove(kt->lost_file);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Patch 4/5] x86_emulator: add the assembler code for three operands

2008-11-26 Thread Guillaume Thouvenin
On Tue, 25 Nov 2008 16:59:00 +0200
Avi Kivity <[EMAIL PROTECTED]> wrote:

> 
> 
> > +#define __emulate_2op_cl(_op,_src,_src2,_dst,_eflags,_wx,_wy)  \
> > +   do {
> > \
> > +   unsigned long _tmp; 
> > \
> > +   
> >   \
> > +   __asm__ __volatile__ (  \
> > +   _PRE_EFLAGS("0", "5", "2")  
> > \
> > +   "mov %4, %%rcx \n\t"
> > \
> > +   _op" %%cl,%3,%1; \n\t"  
> >\
> > +   _POST_EFLAGS("0", "5", "2") 
> > \
> > +   : "=m" (_eflags), "=m" ((_dst).val),
> > \
> > + "=&r" (_tmp)  
> > \
> > +   : _wy ((_src).val) , _wy ((_src2).val), "i" 
> > (EFLAGS_MASK) \
> > +   : "%rcx" ); 
> > \
> > +   } while (0)
> >
> > I tested the code and it seems to work.
> >   
> 
> That's actually better and could be used for the other emulations. 
> Please disassemble x86_emulate.o and verify that there are three 
> different shld instructions, one for each register size.
> 

I tried with the following code (it's nearly the same as above):

+   __asm__ __volatile__ (  \
+   _PRE_EFLAGS("0", "5", "2")  
\
+   "mov %4, %%rcx \n\t"
\
+   _op" %3,%1; \n\t"   
\
+   _POST_EFLAGS("0", "5", "2") 
\
+   : "=m" (_eflags), "+r" ((_dst).val),
\
+ "=&r" (_tmp)  
\
+   : _x ((_src).val) , _y ((_src2).val), "i" (EFLAGS_MASK) 
\
+   : "%rcx" ); 
\

When I disassemble x86_emulate.o I can see the following produced code:

4787:   ...
4788:   41 8f 44 24 08  popq   0x8(%r12)
478d:   48 89 d1mov%rdx,%rcx
4790:   48 0f a5 f0 shld   %cl,%rsi,%rax
4794:   9c  pushfq
4795:   ... 

It's the only code with shld instruction. I don't see how you can have
three different shld instructions here. I'm sure that I'm missing
something here because for me, when we emulate the shld instruction, the
code produced is the same. I mean that src.val and dst.val are always
unsigned long and I don't see why register size are important here. In
fact I don't understand why we need to use the switch ((_dst).bytes)
for the other emulations.

Guillaume
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Prevent trace call into unloaded module text

2008-11-26 Thread Wu Fengguang
On Wed, Nov 26, 2008 at 01:17:55PM +0200, Avi Kivity wrote:
> Wu Fengguang wrote:
> > Add marker_synchronize_unregister() before module unloading.
> > This prevents possible trace calls into unloaded module text.
> >
> > Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
> > ---
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index a87f45e..64f38b3 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -2102,5 +2102,6 @@ void kvm_exit(void)
> > kvm_arch_exit();
> > kvm_exit_debug();
> > __free_page(bad_page);
> > +   marker_synchronize_unregister();
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_exit);
> >   
> 
> What about kvm-intel.ko and kvm-amd.ko?  They also contain markers.

vmx_exit and svm_exit() all calls into kvm_exit(), so they have been
handled in a common way.

> (and, why doesn't module unload do this automatically?)

Maybe most modules don't need it for now?

Thanks,
Fengguang
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-userspace: Cleanup user space NMI injection

2008-11-26 Thread Jan Kiszka
Avi Kivity wrote:
> Jan Kiszka wrote:
>> Cleanup redundant check for an open NMI window before injecting. This
>> will no longer be supported by the kernel, and it was broken by design
>> anyway.
>>
>> This change still allows to run the user space against older kernel
>> modules.
>>
>>   
> 
> Hm.  For interrupts we need to check, since the interrupt might be
> deasserted or masked while the window is closed.
> 
> Is there no way that this can happen for NMIs?

In our emulation, but I also think in real life, there is no way to
deassert an NMI. Therefore, our user space API just sets the nmi_pending
flag. Masking of NMIs (due to mov ss etc.) is handled by the NMI
injection code, and this has to happen in kernel anyway as the other NMI
sources (APIC and IOAPIC) do not bother about the NMI windows as well.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2 ES-OS
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Add KVM version to monitor "info version"

2008-11-26 Thread Avi Kivity

Glauber Costa wrote:

On Wed, Nov 26, 2008 at 8:55 AM, Daniel P. Berrange <[EMAIL PROTECTED]> wrote:
  

On Wed, Nov 26, 2008 at 11:22:03AM +0100, Bj?rn Mork wrote:


Trying to track the current KVM head as closely as possible, I often
find myself wondering which KVM version a particular guest instance is
running.  The attached patch adds this information to the monitor
command "info version":

 (qemu) info version
 0.9.1 (kvm-79)
  

I'd really rather we didn't touch the 'info version' command since it
means we have different syntax from upstream QEMU. If we want KVM
version info exposed, then I'd suggest 'info kvmversion' or some other
new command.



I believe it fits nicely into info kvm. Instead of enabled/disabled,
we could answer
disabled or kvm-whatever

  


(qemu) info kvm
enabled (kvm-123)

or

(qemu) info kvm
enabled
version kvm-123

May be easier to parse, and more backwards compatible.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-userspace: Cleanup user space NMI injection

2008-11-26 Thread Avi Kivity

Jan Kiszka wrote:

Cleanup redundant check for an open NMI window before injecting. This
will no longer be supported by the kernel, and it was broken by design
anyway.

This change still allows to run the user space against older kernel
modules.

  


Hm.  For interrupts we need to check, since the interrupt might be 
deasserted or masked while the window is closed.


Is there no way that this can happen for NMIs?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Add KVM version to monitor "info version"

2008-11-26 Thread Glauber Costa
On Wed, Nov 26, 2008 at 8:55 AM, Daniel P. Berrange <[EMAIL PROTECTED]> wrote:
> On Wed, Nov 26, 2008 at 11:22:03AM +0100, Bj?rn Mork wrote:
>> Trying to track the current KVM head as closely as possible, I often
>> find myself wondering which KVM version a particular guest instance is
>> running.  The attached patch adds this information to the monitor
>> command "info version":
>>
>>  (qemu) info version
>>  0.9.1 (kvm-79)
>
> I'd really rather we didn't touch the 'info version' command since it
> means we have different syntax from upstream QEMU. If we want KVM
> version info exposed, then I'd suggest 'info kvmversion' or some other
> new command.

I believe it fits nicely into info kvm. Instead of enabled/disabled,
we could answer
disabled or kvm-whatever




-- 
Glauber  Costa.
"Free as in Freedom"
http://glommer.net

"The less confident you are, the more serious you have to act."
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Prevent trace call into unloaded module text

2008-11-26 Thread Avi Kivity

Wu Fengguang wrote:

Add marker_synchronize_unregister() before module unloading.
This prevents possible trace calls into unloaded module text.

Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]>
---
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a87f45e..64f38b3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2102,5 +2102,6 @@ void kvm_exit(void)
kvm_arch_exit();
kvm_exit_debug();
__free_page(bad_page);
+   marker_synchronize_unregister();
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
  


What about kvm-intel.ko and kvm-amd.ko?  They also contain markers.


(and, why doesn't module unload do this automatically?)

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-userspace: fix module build with --kerneldir

2008-11-26 Thread Avi Kivity

Maik Hentsche wrote:

Avi Kivity <[EMAIL PROTECTED]> wrote:

  
Can't the version be determined directly from kernedir itself?  e.g. 
kerneldir is /lib/modules/$version/build?



How do you get $version if you are crosscompiling? In this case the
usual way of asking uname doesn't work. Instead you somehow need to get
the version of the kernel you're building for from the kernel source
provided in --kerneldir. I don't know any better way than evaluating
UTSRELEASE.
  


If the user specified $kerneldir, it should be in the form 
/lib/modules/$version/build, no?  If so, then you can extract $version 
by removing the prefix and suffix from the string.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: MMU: optimize set_spte for page sync

2008-11-26 Thread Avi Kivity

Marcelo Tosatti wrote:


Here it goes.

KVM: MMU: optimize set_spte for page sync

The write protect verification in set_spte is unnecessary for page sync.

Its guaranteed that, if the unsync spte was writable, the target page
does not have a write protected shadow (if it had, the spte would have
been write protected under mmu_lock by rmap_write_protect before).

Same reasoning applies to mark_page_dirty: the gfn has been marked as
dirty via the pagefault path.

The cost of hash table and memslot lookups are quite significant if the
workload is pagetable write intensive resulting in increased mmu_lock
contention.

  


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-userspace: fix module build with --kerneldir

2008-11-26 Thread Maik Hentsche
Avi Kivity <[EMAIL PROTECTED]> wrote:

> Can't the version be determined directly from kernedir itself?  e.g. 
> kerneldir is /lib/modules/$version/build?

How do you get $version if you are crosscompiling? In this case the
usual way of asking uname doesn't work. Instead you somehow need to get
the version of the kernel you're building for from the kernel source
provided in --kerneldir. I don't know any better way than evaluating
UTSRELEASE.

so long
Maik

-- 
   \   AMD Saxony Limited Liability Company & Co. KG
 Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
  System   |  Register Court Dresden: HRA 4896
 Research  |  General Partner authorized to represent:
  Center   | AMD Saxony LLC (Wilmington, Delaware, US)
   / General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe,
Thomas McCoy


signature.asc
Description: PGP signature


Re: [PATCH 2/2] KVM: Change to use new APIs for KVM VT-d

2008-11-26 Thread Avi Kivity

Han, Weidong wrote:

This patch changes to use new APIs for KVM VT-d, and add device deassignment 
for hotplug.

index 44fd7fa..558bc32 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -425,6 +425,8 @@ struct kvm_trace_rec {
   struct kvm_assigned_pci_dev)
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
struct kvm_assigned_irq)
+#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x71, \
+struct kvm_assigned_pci_dev)
  


Need to advertise using KVM_CAP_DEVICE_DEASSIGNMENT.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Add KVM version to monitor "info version"

2008-11-26 Thread Daniel P. Berrange
On Wed, Nov 26, 2008 at 11:22:03AM +0100, Bj?rn Mork wrote:
> Trying to track the current KVM head as closely as possible, I often
> find myself wondering which KVM version a particular guest instance is
> running.  The attached patch adds this information to the monitor
> command "info version":
> 
>  (qemu) info version
>  0.9.1 (kvm-79)

I'd really rather we didn't touch the 'info version' command since it
means we have different syntax from upstream QEMU. If we want KVM
version info exposed, then I'd suggest 'info kvmversion' or some other
new command.

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] VT-d: Support multiple device assignment for KVM

2008-11-26 Thread Avi Kivity

Han, Weidong wrote:

In order to support multiple device assignment for KVM, this patch does 
following main changes:
   - extend dmar_domain to own multiple devices from different iommus, use a 
bitmap of iommus to replace iommu pointer in dmar_domain.
   - add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d usage. Many 
functions (e.g. intel_map_single() and intel_unmap_single()) won't be used by 
KVM VT-d. Let them return directly when this flag is set.
  



This seems brittle.  An API that has some functions shorted out 
depending on some flag is hard to understand and use.


We should either implement the functions, or split the API into a basic 
version that talks only to one device, and an expanded versions that 
talks to multiple devices, and is implemented by the using the lower 
level API.  This may require more changes due to the need to share io 
pagetables.



   - "SAGAW" capability may be different across iommus, that's to say the VT-d 
page table levels may be different among iommus. This patch uses a defaut agaw, and skip 
top levels of page tables for iommus which have smaller agaw than default.
  


Neat trick.


 void free_dmar_iommu(struct intel_iommu *iommu)
 {
struct dmar_domain *domain;
@@ -960,7 +1054,14 @@ void free_dmar_iommu(struct intel_iommu *iommu)
for (; i < cap_ndoms(iommu->cap); ) {
domain = iommu->domains[i];
clear_bit(i, iommu->domain_ids);
-   domain_exit(domain);
+
+   if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+   /* domain may be referenced by other iommus */
+   if (domain_in_other_iommus(domain, iommu) == 0)
+   domain_exit(domain);
+   }
+   else
+   domain_exit(domain);
  


Things like this are best expressed using reference counts, which 
removes the need for the test as well.



+
+   /* Skip top levels of page tables for
+* iommu which has less agaw than default.
+*/
+   for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+   pgd = phys_to_virt(dma_pte_addr(*pgd));
+   if (!dma_pte_present(*pgd)) {
+   spin_unlock_irqrestore(&iommu->lock, flags);
+   return -ENOMEM;
+   }
+   }
+   }
  


Need to check that the agaw is sufficient for mapped memory (and when 
adding a device or mapping more memory, need a similar check).



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm-userspace: fix module build with --kerneldir

2008-11-26 Thread Avi Kivity

Joerg Roedel wrote:

From: Maik Hentsche <[EMAIL PROTECTED]>

When kvm-userspace is build with a different kernel version than the
running kernel the depmod at the end will fail. This patch fixed the
problem.

Signed-off-by: Maik Hentsche <[EMAIL PROTECTED]>
@@ -0,0 +1,9 @@
+#include 
+#include 
+
+int main()
+{
+printf("%s",UTS_RELEASE);
+return(0);
+}
+
  


Can't the version be determined directly from kernedir itself?  e.g. 
kerneldir is /lib/modules/$version/build?



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Add KVM version to monitor "info version"

2008-11-26 Thread Bjørn Mork
Trying to track the current KVM head as closely as possible, I often
find myself wondering which KVM version a particular guest instance is
running.  The attached patch adds this information to the monitor
command "info version":

 (qemu) info version
 0.9.1 (kvm-79)


Signed-off-by: Bjørn Mork <[EMAIL PROTECTED]>
---
 qemu/monitor.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu/monitor.c b/qemu/monitor.c
index a58a18e..f61fe56 100644
--- a/qemu/monitor.c
+++ b/qemu/monitor.c
@@ -246,7 +246,7 @@ static void do_info(const char *item)
 
 static void do_info_version(void)
 {
-  term_printf("%s\n", QEMU_VERSION);
+  term_printf("%s\n", QEMU_VERSION " (" KVM_VERSION ")");
 }
 
 static void do_info_name(void)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/9][v6] Enable MSI for KVM

2008-11-26 Thread Avi Kivity

Sheng Yang wrote:

Hi

MSI patchset v6 is coming...

v5->v6

Addressed all comments from Avi on v5. I also set msi2intx=0 in non-x86
architecture machines.
  


Applied all, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFT] Rebased gdb/debug register patches

2008-11-26 Thread Jan Kiszka
Hollis Blanchard wrote:
> On Mon, 2008-11-24 at 16:27 +0100, Jan Kiszka wrote:
>> Hi,
>>
>> this is not yet the official submission, but a request for testing:
>>
>> I'm happy to announce the availability of a rebased patch series to
>> enhance KVM's guest debugging support as well as to add debug register
>> emulation. It was rebased because QEMU mainline recently accepted the
>> core of my corresponding bits and KVM has merged them over. A few
>> patches are still awaiting QEMU merge, and two of them are mandatory to
>> provide a clean foundation for the KVM changes - therefore this
>> intermediate step.
>>
>> To test the series, checkout the kernel bits from
>>
>>  git://git.kiszka.org/linux-kvm.git gdb-queue
>>
>> and the user space part from
>>
>>  git://git.kiszka.org/kvm-userspace.git gdb-queue
>>
>> Early feedback welcome, also before the final submission. And if someone
>> could look into AMD/SVM implementation, this would also be great
>> (unfortunately, there is no customer need for it ATM, thus no resources).
> 
> Would you mind posting the patches here, to make feedback easier for the
> reviewers?

I was waiting for the first two of them getting merged into qemu, what
has happened yesterday, plus there was one open oddity that is
understood and fixed now as well. Will post them later today.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2 ES-OS
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] PCI: allow pci driver to support only dynids

2008-11-26 Thread Jean Delvare
Hi Chris,

On Tue, 25 Nov 2008 19:36:10 -0800, Chris Wright wrote:
> commit b41d6cf38e27 (PCI: Check dynids driver_data value for validity)
> requires all drivers to include an id table to try and match
> driver_data.  Before validating driver_data check driver has an id
> table.

Sorry for missing this case.

> Cc: Jean Delvare <[EMAIL PROTECTED]>
> Cc: Milton Miller <[EMAIL PROTECTED]>
> Signed-off-by: Chris Wright <[EMAIL PROTECTED]>
> ---
>  drivers/pci/pci-driver.c |   20 +++-
>  1 files changed, 11 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index b4cdd69..0a5edbe 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -48,7 +48,7 @@ store_new_id(struct device_driver *driver, const char *buf, 
> size_t count)
>   subdevice=PCI_ANY_ID, class=0, class_mask=0;
>   unsigned long driver_data=0;
>   int fields=0;
> - int retval;
> + int retval=0;
>  
>   fields = sscanf(buf, "%x %x %x %x %x %x %lx",
>   &vendor, &device, &subvendor, &subdevice,
> @@ -58,16 +58,18 @@ store_new_id(struct device_driver *driver, const char 
> *buf, size_t count)
>  
>   /* Only accept driver_data values that match an existing id_table
>  entry */
> - retval = -EINVAL;
> - while (ids->vendor || ids->subvendor || ids->class_mask) {
> - if (driver_data == ids->driver_data) {
> - retval = 0;
> - break;
> + if (ids) {
> + retval = -EINVAL;
> + while (ids->vendor || ids->subvendor || ids->class_mask) {
> + if (driver_data == ids->driver_data) {
> + retval = 0;
> + break;
> + }
> + ids++;
>   }
> - ids++;
> + if (retval) /* No match */
> + return retval;
>   }
> - if (retval) /* No match */
> - return retval;
>  
>   dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
>   if (!dynid)

Do we really want to let the user add a new id to a driver which didn't
have any? Wouldn't it be safer to simply not create the new_id sysfs
file if driver->id_table is NULL? That's a one-line change:

Signed-off-by: Jean Delvare <[EMAIL PROTECTED]>
---
 drivers/pci/pci-driver.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- linux-2.6.28-rc6.orig/drivers/pci/pci-driver.c  2008-10-24 
09:28:14.0 +0200
+++ linux-2.6.28-rc6/drivers/pci/pci-driver.c   2008-11-26 09:23:46.0 
+0100
@@ -113,7 +113,7 @@ static int
 pci_create_newid_file(struct pci_driver *drv)
 {
int error = 0;
-   if (drv->probe != NULL)
+   if (drv->probe != NULL && drv->id_table != NULL)
error = driver_create_file(&drv->driver, &driver_attr_new_id);
return error;
 }

As a side note, I am curious what PCI driver we do have which has
driver->probe defined but no driver->id_table. Did you hit an actual
issue or are you fixing a theoretical one?

Thanks,
-- 
Jean Delvare
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html