Re: [PATCH 5/5] kvm: expose MSI capability to guest
On Monday 24 November 2008 19:50:35 Sheng Yang wrote: > Signed-off-by: Sheng Yang <[EMAIL PROTECTED]> Oh, hold this one for a moment... I don't want to deal with compatible problem of deliver msi_msg, so I would send out gsi->msi mapping patch and update the userspace patch. -- regards Yang, Sheng > --- > qemu/hw/device-assignment.c | 90 > +++--- qemu/hw/device-assignment.h | > 2 + > 2 files changed, 85 insertions(+), 7 deletions(-) > > diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c > index d3105bc..67bd6b3 100644 > --- a/qemu/hw/device-assignment.c > +++ b/qemu/hw/device-assignment.c > @@ -262,7 +262,8 @@ static void assigned_dev_pci_write_config(PCIDevice *d, > uint32_t address, } > > if ((address >= 0x10 && address <= 0x24) || address == 0x34 || > -address == 0x3c || address == 0x3d) { > +address == 0x3c || address == 0x3d || > +pci_access_cap_config(d, address, len)) { > /* used for update-mappings (BAR emulation) */ > pci_default_write_config(d, address, val, len); > return; > @@ -296,7 +297,8 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice > *d, uint32_t address, AssignedDevice *pci_dev = container_of(d, > AssignedDevice, dev); > > if ((address >= 0x10 && address <= 0x24) || address == 0x34 || > -address == 0x3c || address == 0x3d) { > +address == 0x3c || address == 0x3d || > +pci_access_cap_config(d, address, len)) { > val = pci_default_read_config(d, address, len); > DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", >(d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, > len); @@ -325,11 +327,13 @@ do_log: > DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", >(d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); > > -/* kill the special capabilities */ > -if (address == 4 && len == 4) > -val &= ~0x10; > -else if (address == 6) > -val &= ~0x10; > +if (!pci_dev->cap.available) { > +/* kill the special capabilities */ > +if (address == 4 && len == 4) > +val &= ~0x10; > +else if (address == 6) > +val &= ~0x10; > +} > > return val; > } > @@ -537,6 +541,73 @@ void assigned_dev_update_irq(PCIDevice *d) > } > } > > +#ifdef KVM_CAP_DEVICE_MSI > +static void assigned_dev_enable_msi(PCIDevice *pci_dev) > +{ > +int r; > +struct kvm_assigned_irq assigned_irq_data; > +AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, > dev); + > +memset(&assigned_irq_data, 0, sizeof assigned_irq_data); > +assigned_irq_data.assigned_dev_id = > +calc_assigned_dev_id(assigned_dev->h_busnr, > +(uint8_t)assigned_dev->h_devfn); > +assigned_irq_data.guest_msi.addr_lo = *(uint32_t *) > +(pci_dev->cap.config + 4); > +assigned_irq_data.guest_msi.data = *(uint16_t *) > +(pci_dev->cap.config + 8); > +assigned_irq_data.flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI; > +r = kvm_assign_irq(kvm_context, &assigned_irq_data); > +if (r < 0) { > +perror("assigned_dev_enable_msi"); > +assigned_dev->cap.enabled &= ~ASSIGNED_DEVICE_MSI_ENABLED; > +/* Fail to enable MSI, enable INTx instead */ > +assigned_dev_update_irq(pci_dev); > +} > +} > +#endif > + > +void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t > address, + uint32_t val, int len) > +{ > +AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, > dev); +uint32_t pos = pci_dev->cap.start; > +uint8_t target_byte, target_position; > + > +pci_default_cap_write_config(pci_dev, address, val, len); > +#ifdef KVM_CAP_DEVICE_MSI > +/* Check if guest want to enable MSI */ > +if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { > +target_position = pos + 2; > +if (address <= target_position && address + len > target_position) > { +target_byte = (uint8_t)(val >> (target_position - address)); > +if (target_byte == 1) { > +assigned_dev->cap.enabled |= ASSIGNED_DEVICE_MSI_ENABLED; > +assigned_dev_enable_msi(pci_dev); > +if (!assigned_dev->cap.enabled & > ASSIGNED_DEVICE_MSI_ENABLED) + > pci_dev->cap.config[target_position - pos] = 0; +} > +} > +pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH; > +} > +#endif > +return; > +} > + > +void assigned_device_pci_cap_init(PCIDevice *pci_dev) > +{ > +AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev); > + > +#ifdef KVM_CAP_DEVICE_MSI > +/* Expose MSI capability > + * MSI capability is the 1st capability in cap.config */ > +if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { > +pci_dev->cap.config[0] = 0x5; > +pci_de
[ kvm-Bugs-2327497 ] NFS copy makes guest network unstable
Bugs item #2327497, was opened at 2008-11-22 07:53 Message generated for change (Comment added) made by jiajun You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2327497&group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None >Status: Open Resolution: None Priority: 5 Private: No Submitted By: Jiajun Xu (jiajun) Assigned to: Nobody/Anonymous (nobody) Summary: NFS copy makes guest network unstable Initial Comment: The NFS network of KVM guest is very unstable. When we copy a >600M file to the guest by NFS mount. The guest's network will down after finishing at about 500M size. Then, guest's network is down. Host also can not use "ping" or "scp". And sometimes, host also complains: ping: sendmsg: No buffer space available. I see memory by 'free', there is only 69MB free (While totally 8GB on the machine!). Using scp to copy file can not reproduce it. This issue is very easy to be reproduced (>50%). Reproduce steps: 1. Create a guest and config NFS sharing folder on it 2. Mount the nfs folder to local folder --- /media 3. cp xxx /media 4. After some time, guest network is down -- >Comment By: Jiajun Xu (jiajun) Date: 2008-11-26 19:43 Message: We tried e1000, virtio and 8139cp driver. No such issue found. Seems the issue only occur with 8139too driver. -- Comment By: Avi Kivity (avik) Date: 2008-11-25 01:47 Message: I tested overnight with e1000, 2.5TB and still going. Please retest with e1000. -- Comment By: Fabio Coatti (cova) Date: 2008-11-24 07:47 Message: I wouldn't be so sure of 8139 culprit. We are seeing this with e1000 and virtio driver... -- Comment By: Avi Kivity (avik) Date: 2008-11-24 07:31 Message: Seems to be a bug in the 8139too driver. Please try with the 8139cp driver (which has much better performance). -- Comment By: Jiajun Xu (jiajun) Date: 2008-11-23 23:01 Message: We did not test such case before. I think the issue also exists before. -- Comment By: Avi Kivity (avik) Date: 2008-11-23 13:13 Message: It's almost certainly a problem with the qemu process, not the bridge. -- Comment By: Fabio Coatti (cova) Date: 2008-11-23 12:15 Message: I can't find out easily wich kvm version worked (nor be sure that is kvm executable itself to have issues), as the subsystems involved are quite a lot an some time passet prior to spot the problem. (kvm itself, network birdge, host kernel may be involved, of course). Now I'm trying to find out the combination that worked, but at the same time I'll be willing to do some tests to discover (on the actual non working setup) some hints, as the bisection can be a very daunting task. (this issue has been noticed after several upgrades). -- Comment By: Avi Kivity (avik) Date: 2008-11-23 10:40 Message: Is this a regression, or a new test? It it is a regression, what was the last version that worked? -- Comment By: Fabio Coatti (cova) Date: 2008-11-23 07:12 Message: I can confirm a similar behaviour: a kvm machines gets large amounts of data via http protocol and saves that files over NFS. (file sizes are in the range of 4-20 MB approx and the machine downloads several of that files.) After some time (I don't have a precise figure, but some hundreds of MB) the guest nework goes down. No answers even to ping coming from outside. the guest uses virtio network drivers (as normal drivers are way too slow) host machine: 64 bit AMD dual quad core 16GB, tried with several kernels ranging from 2.6.27.4 to 2.6.25.19 guest: 32 bit kvm machines (tried 76/77/78 ). both UP and SMP configuration. kernels: same as host machine network setup: bridged network with br0 device on host machine. We are using 2 vlans for guest and we have tried all the configuration (single tap and vlans resolved on guest side,then two tap so two interfaces on guest machine and so on) without any improvement. I can exclude MTU issues, as we have seen that and solved, this issue is completely different. At some point, sniffing traffic on host interfaces we are able to see only ARP requests coming from guest, nothing more. I understand that data is in no way complete, but I'm willing to do any debug if someone gives me any hint on how to do so correctly. Thank
RE: [PATCH 1/2] VT-d: Support multiple device assignment for KVM
Yu, Fenghua wrote: > Avi Kivity wrote: >>> Han, Weidong wrote: In order to support multiple device assignment for KVM, this patch does following main changes: - extend dmar_domain to own multiple devices from different iommus, use a bitmap of iommus to replace iommu pointer in dmar_domain. - add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d usage. Many functions (e.g. intel_map_single() and intel_unmap_single()) won't be used by KVM VT-d. Let them return directly when this flag is set. >>> >>> >>> This seems brittle. An API that has some functions shorted out >>> depending on some flag is hard to understand and use. > >> This flag just helps identify kvm VT-d usage, and let kvm VT-d APIs >> reuse >native VT-d code, it needn't duplicate code for kvm VT-d >> APIs, and it won't >impact existed native VT-d code. > >>> >>> We should either implement the functions, or split the API into a >>> basic version that talks only to one device, and an expanded >>> versions that talks to multiple devices, and is implemented by the >>> using the lower level API. This may require more changes due to >>> the need to share io pagetables. > >> The expanded versions that supports multiple devices will need to >> change >dmar_domain, this will cause lots of changes, almost >> duplicate the main >functions. > > I would agree with Avi. It's wired to merge the KVM api and native > api together while they are logically separate. If we have two sets > of interfaces for KVM and native iommu each, code is clean and robust > and easy to extend in the future. If you well organize the code, we > should have both good interface and shared code. While it makes sense > to share code at beginning of KVM like this, it will get harder to > maintain and evolve both KVM and native later, e.g. changing one part > may break the other. I will remove the flag judgement, and implement independent functions for KVM. > > Some comments on the code in this patch: > > 1. You don't need to check DOMAIN_FLAG_VIRTUAL_MACHINE in the DMA > functions. When the DMA functions are called, the domain should not > have the DOMAIN_FLAG_VIRTUAL_MACHINE flag. Otherwise, the domain > allocation code is wrong. Yes, I will remove these meaningless checks. > > 2. You need to initialize domain->flags=0 for DMA domain. It's random > number after the domain is allocated by kmem_cache_alloc. Right. Thanks. Regards, Weidong > > Thanks. > > -Fenghua -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/5][v2] Userspace for MSI support of KVM
On Monday 24 November 2008 19:50:30 Sheng Yang wrote: > Hi Avi & Anthony > > Here is the userspace for MSI support of KVM. > > Main change from v1: > Make device assignment depends on libpci. > Move capability framework to pci.c (this patch may can be accepted by > QEmu). Avi & Anthony Any comments? Can QEmu upstream accept "[PATCH 4/5] Support for device capability"? Thanks! -- regards Yang, Sheng -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] markers: comment marker_synchronize_unregister() on data dependency
On Thu, Nov 27, 2008 at 03:23:06AM +0200, Lai Jiangshan wrote: > Wu Fengguang wrote: > > [updated patch to include Documentation/markers.txt changes] > > > > Add document and comments on marker_synchronize_unregister(): it > > should be called before freeing resources that the probes depend on. > > > > Based on comments from Lai Jiangshan and Mathieu Desnoyers. > > > > Cc: Lai Jiangshan <[EMAIL PROTECTED]> > > Cc: Mathieu Desnoyers <[EMAIL PROTECTED]> > > Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> > > --- > > diff --git a/Documentation/markers.txt b/Documentation/markers.txt > > index 089f613..8bf6afe 100644 > > --- a/Documentation/markers.txt > > +++ b/Documentation/markers.txt > > @@ -51,11 +51,15 @@ to call) for the specific marker through > > marker_probe_register() and can be > > activated by calling marker_arm(). Marker deactivation can be done by > > calling > > marker_disarm() as many times as marker_arm() has been called. Removing a > > probe > > is done through marker_probe_unregister(); it will disarm the probe. > > -marker_synchronize_unregister() must be called before the end of the > > module exit > > -function to make sure there is no caller left using the probe. This, and > > the > > -fact that preemption is disabled around the probe call, make sure that > > probe > > -removal and module unload are safe. See the "Probe example" section below > > for a > > -sample probe module. > > + > > +marker_synchronize_unregister() must be called before the first occurrence > > of > > +- the end of the module exit function, > > + to make sure there is no caller left using the probe; > > +- the free of any resource used by the probes, > > + to make sure the probes wont be accessing destructed data. > > +This, and the fact that preemption is disabled around the probe call, make > > sure > > +that probe removal and module unload are safe. See the "Probe example" > > section > > +below for a sample probe module. > > > > The marker mechanism supports inserting multiple instances of the same > > marker. > > Markers can be put in inline functions, inlined static functions, and > > diff --git a/include/linux/marker.h b/include/linux/marker.h > > index 889196c..32ce4f2 100644 > > --- a/include/linux/marker.h > > +++ b/include/linux/marker.h > > @@ -162,8 +162,10 @@ extern void *marker_get_private_data(const char *name, > > marker_probe_func *probe, > > > > /* > > * marker_synchronize_unregister must be called between the last marker > > probe > > - * unregistration and the end of module exit to make sure there is no > > caller > > - * executing a probe when it is freed. > > + * unregistration and the first one of > > + * - the end of module exit function > > + * - the free of any resource used by the probes > > Does "destruction" contain the meaning of "free" and other destruction > behavior? Ahh, better to use "invalid data" instead of "destructed data"? > It's every good job, thank you. > > Reviewed-by: Lai Jiangshan <[EMAIL PROTECTED]> Thank you, Fengguang --- markers: comment marker_synchronize_unregister() on data dependency Add document and comments on marker_synchronize_unregister(): it should be called before freeing resources that the probes depend on. Based on comments from Lai Jiangshan and Mathieu Desnoyers. Cc: Mathieu Desnoyers <[EMAIL PROTECTED]> Reviewed-by: Lai Jiangshan <[EMAIL PROTECTED]> Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> --- diff --git a/Documentation/markers.txt b/Documentation/markers.txt index 089f613..8bf6afe 100644 --- a/Documentation/markers.txt +++ b/Documentation/markers.txt @@ -51,11 +51,15 @@ to call) for the specific marker through marker_probe_register() and can be activated by calling marker_arm(). Marker deactivation can be done by calling marker_disarm() as many times as marker_arm() has been called. Removing a probe is done through marker_probe_unregister(); it will disarm the probe. -marker_synchronize_unregister() must be called before the end of the module exit -function to make sure there is no caller left using the probe. This, and the -fact that preemption is disabled around the probe call, make sure that probe -removal and module unload are safe. See the "Probe example" section below for a -sample probe module. + +marker_synchronize_unregister() must be called before the first occurrence of +- the end of the module exit function, + to make sure there is no caller left using the probe; +- the free of any resource used by the probes, + to make sure the probes wont be accessing invalid data. +This, and the fact that preemption is disabled around the probe call, make sure +that probe removal and module unload are safe. See the "Probe example" section +below for a sample probe module. The marker mechanism supports inserting multiple instances of the same marker. Markers can be put in inline functions, inlined static functions, and diff --git a/include/linux/marker.h b/include/linux/marker.h index 889196c..
Re: [PATCH] markers: comment marker_synchronize_unregister() on data dependency
Wu Fengguang wrote: > [updated patch to include Documentation/markers.txt changes] > > Add document and comments on marker_synchronize_unregister(): it > should be called before freeing resources that the probes depend on. > > Based on comments from Lai Jiangshan and Mathieu Desnoyers. > > Cc: Lai Jiangshan <[EMAIL PROTECTED]> > Cc: Mathieu Desnoyers <[EMAIL PROTECTED]> > Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> > --- > diff --git a/Documentation/markers.txt b/Documentation/markers.txt > index 089f613..8bf6afe 100644 > --- a/Documentation/markers.txt > +++ b/Documentation/markers.txt > @@ -51,11 +51,15 @@ to call) for the specific marker through > marker_probe_register() and can be > activated by calling marker_arm(). Marker deactivation can be done by calling > marker_disarm() as many times as marker_arm() has been called. Removing a > probe > is done through marker_probe_unregister(); it will disarm the probe. > -marker_synchronize_unregister() must be called before the end of the module > exit > -function to make sure there is no caller left using the probe. This, and the > -fact that preemption is disabled around the probe call, make sure that probe > -removal and module unload are safe. See the "Probe example" section below > for a > -sample probe module. > + > +marker_synchronize_unregister() must be called before the first occurrence of > +- the end of the module exit function, > + to make sure there is no caller left using the probe; > +- the free of any resource used by the probes, > + to make sure the probes wont be accessing destructed data. > +This, and the fact that preemption is disabled around the probe call, make > sure > +that probe removal and module unload are safe. See the "Probe example" > section > +below for a sample probe module. > > The marker mechanism supports inserting multiple instances of the same > marker. > Markers can be put in inline functions, inlined static functions, and > diff --git a/include/linux/marker.h b/include/linux/marker.h > index 889196c..32ce4f2 100644 > --- a/include/linux/marker.h > +++ b/include/linux/marker.h > @@ -162,8 +162,10 @@ extern void *marker_get_private_data(const char *name, > marker_probe_func *probe, > > /* > * marker_synchronize_unregister must be called between the last marker probe > - * unregistration and the end of module exit to make sure there is no caller > - * executing a probe when it is freed. > + * unregistration and the first one of > + * - the end of module exit function > + * - the free of any resource used by the probes Does "destruction" contain the meaning of "free" and other destruction behavior? It's every good job, thank you. Reviewed-by: Lai Jiangshan <[EMAIL PROTECTED]> > + * to ensure the code and data are valid for any possibly running probes. > */ > #define marker_synchronize_unregister() synchronize_sched() > > > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] markers: comment marker_synchronize_unregister() on data dependency
[updated patch to include Documentation/markers.txt changes] Add document and comments on marker_synchronize_unregister(): it should be called before freeing resources that the probes depend on. Based on comments from Lai Jiangshan and Mathieu Desnoyers. Cc: Lai Jiangshan <[EMAIL PROTECTED]> Cc: Mathieu Desnoyers <[EMAIL PROTECTED]> Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> --- diff --git a/Documentation/markers.txt b/Documentation/markers.txt index 089f613..8bf6afe 100644 --- a/Documentation/markers.txt +++ b/Documentation/markers.txt @@ -51,11 +51,15 @@ to call) for the specific marker through marker_probe_register() and can be activated by calling marker_arm(). Marker deactivation can be done by calling marker_disarm() as many times as marker_arm() has been called. Removing a probe is done through marker_probe_unregister(); it will disarm the probe. -marker_synchronize_unregister() must be called before the end of the module exit -function to make sure there is no caller left using the probe. This, and the -fact that preemption is disabled around the probe call, make sure that probe -removal and module unload are safe. See the "Probe example" section below for a -sample probe module. + +marker_synchronize_unregister() must be called before the first occurrence of +- the end of the module exit function, + to make sure there is no caller left using the probe; +- the free of any resource used by the probes, + to make sure the probes wont be accessing destructed data. +This, and the fact that preemption is disabled around the probe call, make sure +that probe removal and module unload are safe. See the "Probe example" section +below for a sample probe module. The marker mechanism supports inserting multiple instances of the same marker. Markers can be put in inline functions, inlined static functions, and diff --git a/include/linux/marker.h b/include/linux/marker.h index 889196c..32ce4f2 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -162,8 +162,10 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe, /* * marker_synchronize_unregister must be called between the last marker probe - * unregistration and the end of module exit to make sure there is no caller - * executing a probe when it is freed. + * unregistration and the first one of + * - the end of module exit function + * - the free of any resource used by the probes + * to ensure the code and data are valid for any possibly running probes. */ #define marker_synchronize_unregister() synchronize_sched() -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] PCI: allow pci driver to support only dynids
* Jean Delvare ([EMAIL PROTECTED]) wrote: > Ah, OK, then your patch makes full sense and mine doesn't, sorry for > the noise. Feel free to add my Acked-by on your patch. Thanks Jean. -chris -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] PCI: allow pci driver to support only dynids
Hi Chris, On Wed, 26 Nov 2008 07:19:40 -0800, Chris Wright wrote: > * Jean Delvare ([EMAIL PROTECTED]) wrote: > > As a side note, I am curious what PCI driver we do have which has > > driver->probe defined but no driver->id_table. Did you hit an actual > > issue or are you fixing a theoretical one? > > Such a driver was patch 2/2. > > http://lkml.org/lkml/2008/11/26/11 > > It's meant to have only dynamic ids, worked fine on 2.6.27 and oopsed > from NULL id-> deref on current git. Ah, OK, then your patch makes full sense and mine doesn't, sorry for the noise. Feel free to add my Acked-by on your patch. -- Jean Delvare -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [SR-IOV driver example 0/3] introduction
Yu Zhao wrote: SR-IOV drivers of Intel 82576 NIC are available. There are two parts of the drivers: Physical Function driver and Virtual Function driver. The PF driver is based on the IGB driver and is used to control PF to allocate hardware specific resources and interface with the SR-IOV core. The VF driver is a new NIC driver that is same as the traditional PCI device driver. It works in both the host and the guest (Xen and KVM) environment. These two drivers are testing versions and they are *only* intended to show how to use SR-IOV API. Intel 82576 NIC specification can be found at: http://download.intel.com/design/network/datashts/82576_Datasheet_v2p1.pdf [SR-IOV driver example 1/3] PF driver: allocate hardware specific resource [SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core [SR-IOV driver example 3/3] VF driver tar ball Please copy [EMAIL PROTECTED] on all network-related patches. This is where the network developers live, and all patches on this list are automatically archived for review and handling at http://patchwork.ozlabs.org/project/netdev/list/ Jeff -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core
On Wed, Nov 26, 2008 at 11:27:10AM -0800, Nakajima, Jun wrote: > On 11/26/2008 8:58:59 AM, Greg KH wrote: > > On Wed, Nov 26, 2008 at 10:21:56PM +0800, Yu Zhao wrote: > > > This patch integrates the IGB driver with the SR-IOV core. It shows > > > how the SR-IOV API is used to support the capability. Obviously > > > people does not need to put much effort to integrate the PF driver > > > with SR-IOV core. All SR-IOV standard stuff are handled by SR-IOV > > > core and PF driver once it gets the necessary information (i.e. > > > number of Virtual > > > Functions) from the callback function. > > > > > > --- > > > drivers/net/igb/igb_main.c | 30 ++ > > > 1 files changed, 30 insertions(+), 0 deletions(-) > > > > > > diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c > > > index bc063d4..b8c7dc6 100644 > > > --- a/drivers/net/igb/igb_main.c > > > +++ b/drivers/net/igb/igb_main.c > > > @@ -139,6 +139,7 @@ void igb_set_mc_list_pools(struct igb_adapter *, > > > struct e1000_hw *, int, u16); static int igb_vmm_control(struct > > > igb_adapter *, bool); static int igb_set_vf_mac(struct net_device > > > *, int, u8*); static void igb_mbox_handler(struct igb_adapter *); > > > +static int igb_virtual(struct pci_dev *, int); > > > #endif > > > > > > static int igb_suspend(struct pci_dev *, pm_message_t); @@ -184,6 > > > +185,9 @@ static struct pci_driver igb_driver = { #endif > > > .shutdown = igb_shutdown, > > > .err_handler = &igb_err_handler, > > > +#ifdef CONFIG_PCI_IOV > > > + .virtual = igb_virtual > > > +#endif > > > > #ifdef should not be needed, right? > > > > Good point. I think this is because the driver is expected to build on > older kernels also, That should not be an issue for patches that are being submitted, right? And if this is the case, shouldn't it be called out in the changelog entry? > but the problem is that the driver (and probably others) is broken > unless the kernel is built with CONFIG_PCI_IOV because of the > following hunk, for example. > > However, we don't want to use #ifdef for the (*virtual) field in the > header. One option would be to define a constant like the following > along with those changes. > #define PCI_DEV_IOV > > Any better idea? Just always declare it in your driver, which will be added _after_ this field gets added to the kernel tree as well. It's not a big deal, just an ordering of patches issue. Because remember, don't add #ifdefs to drivers, they should not be needed at all. thanks, greg k-h -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core
On 11/26/2008 8:58:59 AM, Greg KH wrote: > On Wed, Nov 26, 2008 at 10:21:56PM +0800, Yu Zhao wrote: > > This patch integrates the IGB driver with the SR-IOV core. It shows > > how the SR-IOV API is used to support the capability. Obviously > > people does not need to put much effort to integrate the PF driver > > with SR-IOV core. All SR-IOV standard stuff are handled by SR-IOV > > core and PF driver once it gets the necessary information (i.e. > > number of Virtual > > Functions) from the callback function. > > > > --- > > drivers/net/igb/igb_main.c | 30 ++ > > 1 files changed, 30 insertions(+), 0 deletions(-) > > > > diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c > > index bc063d4..b8c7dc6 100644 > > --- a/drivers/net/igb/igb_main.c > > +++ b/drivers/net/igb/igb_main.c > > @@ -139,6 +139,7 @@ void igb_set_mc_list_pools(struct igb_adapter *, > > struct e1000_hw *, int, u16); static int igb_vmm_control(struct > > igb_adapter *, bool); static int igb_set_vf_mac(struct net_device > > *, int, u8*); static void igb_mbox_handler(struct igb_adapter *); > > +static int igb_virtual(struct pci_dev *, int); > > #endif > > > > static int igb_suspend(struct pci_dev *, pm_message_t); @@ -184,6 > > +185,9 @@ static struct pci_driver igb_driver = { #endif > > .shutdown = igb_shutdown, > > .err_handler = &igb_err_handler, > > +#ifdef CONFIG_PCI_IOV > > + .virtual = igb_virtual > > +#endif > > #ifdef should not be needed, right? > Good point. I think this is because the driver is expected to build on older kernels also, but the problem is that the driver (and probably others) is broken unless the kernel is built with CONFIG_PCI_IOV because of the following hunk, for example. However, we don't want to use #ifdef for the (*virtual) field in the header. One option would be to define a constant like the following along with those changes. #define PCI_DEV_IOV Any better idea? Thanks, . Jun Nakajima | Intel Open Source Technology Center @@ -259,6 +266,7 @@ struct pci_dev { struct list_head msi_list; #endif struct pci_vpd *vpd; + struct pci_iov *iov; }; extern struct pci_dev *alloc_pci_dev(void); @@ -426,6 +434,7 @@ struct pci_driver { int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ void (*shutdown) (struct pci_dev *dev); + int (*virtual) (struct pci_dev *dev, int nr_virtfn); struct pm_ext_ops *pm; struct pci_error_handlers *err_handler; struct device_driverdriver; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 1/2] VT-d: Support multiple device assignment for KVM
Avi Kivity wrote: >> Han, Weidong wrote: >>> In order to support multiple device assignment for KVM, this patch >>> does following main changes: >>>- extend dmar_domain to own multiple devices from different >>> iommus, use a bitmap of iommus to replace iommu pointer in >>> dmar_domain. >>>- add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d >>> usage. Many functions (e.g. intel_map_single() and >>> intel_unmap_single()) won't be used by KVM VT-d. Let them return >>> directly when this flag is set. >>> >> >> >> This seems brittle. An API that has some functions shorted out >> depending on some flag is hard to understand and use. >This flag just helps identify kvm VT-d usage, and let kvm VT-d APIs reuse >>native VT-d code, it needn't duplicate code for kvm VT-d APIs, and it won't >>impact existed native VT-d code. >> >> We should either implement the functions, or split the API into a >> basic version that talks only to one device, and an expanded versions >> that talks to multiple devices, and is implemented by the using the >> lower level API. This may require more changes due to the need to >> share io pagetables. >The expanded versions that supports multiple devices will need to change >>dmar_domain, this will cause lots of changes, almost duplicate the main >>functions. I would agree with Avi. It's wired to merge the KVM api and native api together while they are logically separate. If we have two sets of interfaces for KVM and native iommu each, code is clean and robust and easy to extend in the future. If you well organize the code, we should have both good interface and shared code. While it makes sense to share code at beginning of KVM like this, it will get harder to maintain and evolve both KVM and native later, e.g. changing one part may break the other. Some comments on the code in this patch: 1. You don't need to check DOMAIN_FLAG_VIRTUAL_MACHINE in the DMA functions. When the DMA functions are called, the domain should not have the DOMAIN_FLAG_VIRTUAL_MACHINE flag. Otherwise, the domain allocation code is wrong. 2. You need to initialize domain->flags=0 for DMA domain. It's random number after the domain is allocated by kmem_cache_alloc. Thanks. -Fenghua -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Remove TARGET_PAGE_SIZE from virtio interface
On Wed, 2008-11-26 at 12:22 -0600, Hollis Blanchard wrote: > > diff --git a/hw/virtio.h b/hw/virtio.h > index 1df8f83..c23f38c 100644 > --- a/hw/virtio.h > +++ b/hw/virtio.h > @@ -47,6 +47,11 @@ > /* This means don't interrupt guest when buffer consumed. */ > #define VRING_AVAIL_F_NO_INTERRUPT1 > > +static inline vring_align(unsigned long addr, unsigned long align) > +{ > +return (addr + align - 1) & ~(align - 1); > +} > + > typedef struct VirtQueue VirtQueue; > typedef struct VirtIODevice VirtIODevice; OK, obviously this doesn't need to be named "vring_align". I was going to just build VIRTIO_PCI_VRING_ALIGN into this function, but in the future we'll need to accommodate KVM_S390_VIRTIO_RING_ALIGN, so we would need to pass in a parameter from virtqueue_init(). Of course, I'm not sure how the S390 code will fit here anyways, since virtio.c is both virtio ring and virtio PCI. I haven't found an existing "align" function in qemu though... -- Hollis Blanchard IBM Linux Technology Center -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH 2/2] Virtio block device support
On Tue, 2008-11-25 at 15:57 -0600, Anthony Liguori wrote: > This has been posted before but I believe it now has addressed all outstanding > concerns. I'd like to apply it if there are no objections. > > This patch adds virtio-blk support to QEMU. virtio-blk is a paravirtual disk > controller that can achieve good performance when using KVM. > > Since virtio is based on a scatter/gather API, we don't have a linear buffer > for each request. This forces us to allocate a linear buffer since the > current > block driver API does not have a scatter/gather operation. This allocation > can never exceed the maximum data limit on the ring queue so it isn't > unbounded. > > posix-aio cannot support a scatter/gather asynchronous operation so we'll need > to introduce our own thread pool to eliminate this limitation. There is work > underway to do this. > > Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> Tested and working (in conjunction with the virtio page size patch I just posted). -- Hollis Blanchard IBM Linux Technology Center -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Remove TARGET_PAGE_SIZE from virtio interface
TARGET_PAGE_SIZE should only be used internal to qemu, not in guest/host interfaces. The virtio frontend code in Linux uses two constants (PFN shift and vring alignment) for the interface, so update qemu to match. I've tested this with PowerPC KVM and confirmed that it fixes virtio problems when using non-TARGET_PAGE_SIZE pages in the guest. Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]> --- hw/virtio.c | 16 +--- hw/virtio.h |5 + 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/hw/virtio.c b/hw/virtio.c index e4224ab..0134b0b 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -51,6 +51,14 @@ /* Virtio ABI version, if we increment this, we break the guest driver. */ #define VIRTIO_PCI_ABI_VERSION 0 +/* How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT12 + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize again. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 + /* QEMU doesn't strictly need write barriers since everything runs in * lock-step. We'll leave the calls to wmb() in though to make it obvious for * KVM or if kqemu gets SMP support. @@ -110,7 +118,9 @@ static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa) { vq->vring.desc = pa; vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); -vq->vring.used = TARGET_PAGE_ALIGN(vq->vring.avail + offsetof(VRingAvail, ring[vq->vring.num])); +vq->vring.used = vring_align(vq->vring.avail + + offsetof(VRingAvail, ring[vq->vring.num]), + VIRTIO_PCI_VRING_ALIGN); } static inline uint64_t vring_desc_addr(VirtQueue *vq, int i) @@ -386,7 +396,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) vdev->features = val; break; case VIRTIO_PCI_QUEUE_PFN: -pa = (ram_addr_t)val << TARGET_PAGE_BITS; +pa = (ram_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT; vdev->vq[vdev->queue_sel].pfn = val; if (pa == 0) virtio_reset(vdev); @@ -660,7 +670,7 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f) if (vdev->vq[i].pfn) { target_phys_addr_t pa; -pa = (ram_addr_t)vdev->vq[i].pfn << TARGET_PAGE_BITS; +pa = (ram_addr_t)vdev->vq[i].pfn << VIRTIO_PCI_QUEUE_ADDR_SHIFT; virtqueue_init(&vdev->vq[i], pa); } } diff --git a/hw/virtio.h b/hw/virtio.h index 1df8f83..c23f38c 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -47,6 +47,11 @@ /* This means don't interrupt guest when buffer consumed. */ #define VRING_AVAIL_F_NO_INTERRUPT1 +static inline vring_align(unsigned long addr, unsigned long align) +{ +return (addr + align - 1) & ~(align - 1); +} + typedef struct VirtQueue VirtQueue; typedef struct VirtIODevice VirtIODevice; -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH 1/1] Virtio block device support
On Wed, 2008-11-26 at 12:11 -0600, Hollis Blanchard wrote: > From: Anthony Liguori <[EMAIL PROTECTED]> Hmm, this is not my patch. :) Let me try again... -- Hollis Blanchard IBM Linux Technology Center -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/1] Virtio block device support
From: Anthony Liguori <[EMAIL PROTECTED]> This has been posted before but I believe it now has addressed all outstanding concerns. I'd like to apply it if there are no objections. This patch adds virtio-blk support to QEMU. virtio-blk is a paravirtual disk controller that can achieve good performance when using KVM. Since virtio is based on a scatter/gather API, we don't have a linear buffer for each request. This forces us to allocate a linear buffer since the current block driver API does not have a scatter/gather operation. This allocation can never exceed the maximum data limit on the ring queue so it isn't unbounded. posix-aio cannot support a scatter/gather asynchronous operation so we'll need to introduce our own thread pool to eliminate this limitation. There is work underway to do this. Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> --- Makefile.target |2 +- hw/pc.c | 12 ++ hw/pc.h |4 + hw/virtio-blk.c | 303 +++ sysemu.h|2 +- vl.c|4 + 6 files changed, 325 insertions(+), 2 deletions(-) create mode 100644 hw/virtio-blk.c diff --git a/Makefile.target b/Makefile.target index f7fc3a2..773b615 100644 --- a/Makefile.target +++ b/Makefile.target @@ -588,7 +588,7 @@ endif #CONFIG_BSD_USER ifndef CONFIG_USER_ONLY OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o -OBJS+=fw_cfg.o virtio.o +OBJS+=fw_cfg.o virtio.o virtio-blk.o ifdef CONFIG_KVM OBJS+=kvm.o kvm-all.o endif diff --git a/hw/pc.c b/hw/pc.c index 1486b68..8651e49 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -1092,6 +1092,18 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size, } } } + +/* Add virtio block devices */ +if (pci_enabled) { +int index; +int unit_id = 0; + +while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) { +virtio_blk_init(pci_bus, 0x1AF4, 0x1001, +drives_table[index].bdrv); +unit_id++; +} +} } static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size, diff --git a/hw/pc.h b/hw/pc.h index f156b9e..edbc5e8 100644 --- a/hw/pc.h +++ b/hw/pc.h @@ -152,4 +152,8 @@ void pci_piix4_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn, void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd); +/* virtio-blk.c */ +void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, + BlockDriverState *bs); + #endif diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c new file mode 100644 index 000..da87b66 --- /dev/null +++ b/hw/virtio-blk.c @@ -0,0 +1,303 @@ +/* + * Virtio Block Device + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori <[EMAIL PROTECTED]> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "virtio.h" +#include "block.h" +#include "block_int.h" +#include "pc.h" + +/* from Linux's linux/virtio_blk.h */ + +/* The ID for virtio_block */ +#define VIRTIO_ID_BLOCK2 + +/* Feature bits */ +#define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */ +#define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */ +#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 4 /* Indicates support of legacy geometry */ + +struct virtio_blk_config +{ +uint64_t capacity; +uint32_t size_max; +uint32_t seg_max; +uint16_t cylinders; +uint8_t heads; +uint8_t sectors; +} __attribute__((packed)); + +/* These two define direction. */ +#define VIRTIO_BLK_T_IN0 +#define VIRTIO_BLK_T_OUT 1 + +/* This bit says it's a scsi command, not an actual read or write. */ +#define VIRTIO_BLK_T_SCSI_CMD 2 + +/* Barrier before this op. */ +#define VIRTIO_BLK_T_BARRIER 0x8000 + +/* This is the first element of the read scatter-gather list. */ +struct virtio_blk_outhdr +{ +/* VIRTIO_BLK_T* */ +uint32_t type; +/* io priority. */ +uint32_t ioprio; +/* Sector (ie. 512 byte offset) */ +uint64_t sector; +}; + +#define VIRTIO_BLK_S_OK0 +#define VIRTIO_BLK_S_IOERR 1 +#define VIRTIO_BLK_S_UNSUPP2 + +/* This is the first element of the write scatter-gather list */ +struct virtio_blk_inhdr +{ +unsigned char status; +}; + +typedef struct VirtIOBlock +{ +VirtIODevice vdev; +BlockDriverState *bs; +VirtQueue *vq; +} VirtIOBlock; + +static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev) +{ +return (VirtIOBlock *)vdev; +} + +typedef struct VirtIOBlockReq +{ +VirtIOBlock *dev; +VirtQueueElement elem; +struct virtio_blk_inhdr *in; +struct virtio_blk_outhdr *out; +size_t size; +uint8_t *buffer; +} VirtIOBlockReq; + +static void virtio_blk_rw_complete(void *opaque, int ret) +{ +VirtIOBlockReq *req = opaque; +VirtIOBlock *s = req->d
[ kvm-Bugs-2351433 ] unhandled vm exit: 0x80000021 running UnixWare guest
Bugs item #2351433, was opened at 2008-11-26 17:39 Message generated for change (Comment added) made by hughesj You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: intel Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: John Hughes (hughesj) Assigned to: Nobody/Anonymous (nobody) Summary: unhandled vm exit: 0x8021 running UnixWare guest Initial Comment: CPU: Xeon 3050 Vendor: Intel Bits: 64 Host distribution: Debian Lenny, kernel kernel 2.6.26-1-amd64 Guest: SCO UnixWare 7.1.1 (SVR5), 32 bits Running a UnixWare (SVR5) guest with kvm 79, debian build kernel 2.6.26-1-amd64 on an Intel Xeon 3050 it crashes during boot with: $ qemu-system-x86_64 -net nic,model=ne2k_isa -net user -hda UnixWare.img unhandled vm exit: 0x8021 vcpu_id 0 rax b101 rbx rcx rdx rsi rdi rsp 0ff8 rbp r8 r9 r10 r11 r12 r13 r14 r15 rip fe6e rflags 00023002 cs 0100 (/ p 1 dpl 0 db 1 s 1 type b l 0 g 1 avl 0) ds 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) es (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) ss 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) fs (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) gs (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) tr 0148 (c04405c0/2067 p 1 dpl 0 db 0 s 0 type b l 0 g 0 avl 0) ldt (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) gdt 5020/2cf idt 52f0/7ff cr0 8001003b cr2 0 cr3 1005000 cr4 2d4 cr8 0 efer 0 Aborted -- Comment By: John Hughes (hughesj) Date: 2008-11-26 19:09 Message: Using kvm 72 in 32 bit mode on the same hardware I get unhandled vm exit: 0x9 vcpu_id 0 rax 8001003b rbx 0ff8 rcx f000 rdx f000 rsi fffda500 rdi 0001 rsp fffda4b4 rbp c043d818 r8 r9 r10 r11 r12 r13 r14 r15 rip c005f980 rflags 0202 cs 0100 (/ p 1 dpl 0 db 1 s 1 type b l 0 g 1 avl 0) ds 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) es 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) ss 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) fs (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) gs (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) tr 0110 (5af0/0067 p 1 dpl 0 db 0 s 0 type b l 0 g 0 avl 0) ldt 0118 (c0812bf8/0047 p 1 dpl 0 db 0 s 0 type 2 l 0 g 0 avl 0) gdt 5020/2cf idt 52f0/7ff cr0 8001003b cr2 0 cr3 1005000 cr4 2d4 cr8 0 efer 0 Aborted -- Comment By: John Hughes (hughesj) Date: 2008-11-26 17:41 Message: (Works with -no-kvm). -- You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2351676 ] Guests hang periodically on Ubuntu-8.10
Bugs item #2351676, was opened at 2008-11-26 12:59 Message generated for change (Tracker Item Submitted) made by Item Submitter You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351676&group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: Chris Jones (c_jones) Assigned to: Nobody/Anonymous (nobody) Summary: Guests hang periodically on Ubuntu-8.10 Initial Comment: I'm seeing periodic hangs on my guests. I've been unable so far to find a trigger - they always boot fine, but after anywhere from 10 minutes to 24 hours they eventually hang completely. My setup: * AMD Athlon X2 4850e (2500 MHz dual core) * 4Gig memory * Ubuntu 8.10 server, 64-bit * KVMs tried: : kvm-72 (shipped with ubuntu) : kvm-79 (built myself, --patched-kernel option) * Kernels tried: : 2.6.27.7 (kernel.org, self built) : 2.6.27-7-server from Ubuntu 8.10 distribution In guests * Ubuntu 8.10 server, 64-bit (virtual machine install) * kernel 2.6.27-7-server from Ubuntu 8.10 I'm running the guests like: sudo /usr/local/bin/qemu-system-x86_64\ -daemonize \ -no-kvm-irqchip\ -hda Imgs/ndev_root.img\ -m 1024\ -cdrom ISOs/ubuntu-8.10-server-amd64.iso \ -vnc :4\ -net nic,macaddr=DE:AD:BE:EF:04:04,model=e1000 \ -net tap,ifname=tap4,script=/home/chris/kvm/qemu-ifup.sh The problem does not happen if I use -no-kvm. I've tried some other options that have no effect: -no-kvm-pit -no-acpi The disk images are raw format. When the guests hang, I cannot ping them, and the vnc console us hung. The qemu monitor is still accessible, and the guests recover if I issue a system_reset command from the monitor. However, often, the console will not take keyboard after doing so. When the guest is hung, kvm_stat shows all 0s for the counters: efer_relo exits fpu_reloa halt_exit halt_wake host_stat hypercall +insn_emul insn_emul invlpg io_exits irq_exits irq_windo largepage +mmio_exit mmu_cache mmu_flood mmu_pde_z mmu_pte_u mmu_pte_w mmu_recyc +mmu_shado nmi_windo pf_fixed pf_guest remote_tl request_i signal_ex +tlb_flush > 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 +0 0 0 0 0 0 gdb shows two threads - both waiting: c(gdb) info threads 2 Thread 0x414f1950 (LWP 422) 0x7f36f07a03e1 in sigtimedwait () from /lib/libc.so.6 1 Thread 0x7f36f1f306e0 (LWP 414) 0x7f36f084b482 in select () from /lib/libc.so.6 (gdb) thread 1 [Switching to thread 1 (Thread 0x7f36f1f306e0 (LWP 414))]#0 0x7f36f084b482 +in select () from /lib/libc.so.6 (gdb) bt #0 0x7f36f084b482 in select () from /lib/libc.so.6 #1 0x004094cb in main_loop_wait (timeout=0) at /home/chris/pkgs/kvm/kvm-79/qemu/vl.c:4719 #2 0x0050a7ea in kvm_main_loop () at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:619 #3 0x0040fafc in main (argc=, argv=0x79f41948) at /home/chris/pkgs/kvm/kvm-79/qemu/vl.c:4871 (gdb) thread 2 [Switching to thread 2 (Thread 0x414f1950 (LWP 422))]#0 0x7f36f07a03e1 in +sigtimedwait () from /lib/libc.so.6 (gdb) bt #0 0x7f36f07a03e1 in sigtimedwait () from /lib/libc.so.6 #1 0x0050a560 in kvm_main_loop_wait (env=0xc319e0, timeout=0) at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:284 #2 0x0050aaf7 in ap_main_loop (_env=) at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:425 #3 0x7f36f11ba3ea in start_thread () from /lib/libpthread.so.0 #4 0x7f36f0852c6d in clone () from /lib/libc.so.6 #5 0x in ?? () Any clues to help me resolve this would be much appreciated. -- You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351676&group_id=180599 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core
* Greg KH ([EMAIL PROTECTED]) wrote: > > +static int > > +igb_virtual(struct pci_dev *pdev, int nr_virtfn) > > +{ > > + unsigned char my_mac_addr[6] = {0x00, 0xDE, 0xAD, 0xBE, 0xEF, 0xFF}; > > + struct net_device *netdev = pci_get_drvdata(pdev); > > + struct igb_adapter *adapter = netdev_priv(netdev); > > + int i; > > + > > + if (nr_virtfn > 7) > > + return -EINVAL; > > Why the check for 7? Is that the max virtual functions for this card? > Shouldn't that be a define somewhere so it's easier to fix in future > versions of this hardware? :) IIRC it's 8 for the card, 1 reserved for PF. I think both notions should be captured w/ commented constants. thanks, -chris -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [SR-IOV driver example 3/3] VF driver tar ball
On Wed, Nov 26, 2008 at 10:40:43PM +0800, Yu Zhao wrote: > The attachment is the VF driver for Intel 82576 NIC. Please don't attach things as tarballs, we can't review or easily read them at all. Care to resend it? thanks, greg k-h -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [SR-IOV driver example 0/3] introduction
On Wed, Nov 26, 2008 at 10:03:03PM +0800, Yu Zhao wrote: > SR-IOV drivers of Intel 82576 NIC are available. There are two parts > of the drivers: Physical Function driver and Virtual Function driver. > The PF driver is based on the IGB driver and is used to control PF to > allocate hardware specific resources and interface with the SR-IOV core. > The VF driver is a new NIC driver that is same as the traditional PCI > device driver. It works in both the host and the guest (Xen and KVM) > environment. > > These two drivers are testing versions and they are *only* intended to > show how to use SR-IOV API. That's funny, as some distros are already shipping this driver. You might want to tell them that this is an "example only" driver and not to be used "for real"... :( greg k-h -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core
On Wed, Nov 26, 2008 at 10:21:56PM +0800, Yu Zhao wrote: > This patch integrates the IGB driver with the SR-IOV core. It shows how > the SR-IOV API is used to support the capability. Obviously people does > not need to put much effort to integrate the PF driver with SR-IOV core. > All SR-IOV standard stuff are handled by SR-IOV core and PF driver only > concerns the device specific resource allocation and deallocation once it > gets the necessary information (i.e. number of Virtual Functions) from > the callback function. > > --- > drivers/net/igb/igb_main.c | 30 ++ > 1 files changed, 30 insertions(+), 0 deletions(-) > > diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c > index bc063d4..b8c7dc6 100644 > --- a/drivers/net/igb/igb_main.c > +++ b/drivers/net/igb/igb_main.c > @@ -139,6 +139,7 @@ void igb_set_mc_list_pools(struct igb_adapter *, struct > e1000_hw *, int, u16); > static int igb_vmm_control(struct igb_adapter *, bool); > static int igb_set_vf_mac(struct net_device *, int, u8*); > static void igb_mbox_handler(struct igb_adapter *); > +static int igb_virtual(struct pci_dev *, int); > #endif > > static int igb_suspend(struct pci_dev *, pm_message_t); > @@ -184,6 +185,9 @@ static struct pci_driver igb_driver = { > #endif > .shutdown = igb_shutdown, > .err_handler = &igb_err_handler, > +#ifdef CONFIG_PCI_IOV > + .virtual = igb_virtual > +#endif #ifdef should not be needed, right? > }; > > static int global_quad_port_a; /* global quad port a indication */ > @@ -5107,6 +5111,32 @@ void igb_set_mc_list_pools(struct igb_adapter *adapter, > reg_data |= (1 << 25); > wr32(E1000_VMOLR(pool), reg_data); > } > + > +static int > +igb_virtual(struct pci_dev *pdev, int nr_virtfn) > +{ > + unsigned char my_mac_addr[6] = {0x00, 0xDE, 0xAD, 0xBE, 0xEF, 0xFF}; > + struct net_device *netdev = pci_get_drvdata(pdev); > + struct igb_adapter *adapter = netdev_priv(netdev); > + int i; > + > + if (nr_virtfn > 7) > + return -EINVAL; Why the check for 7? Is that the max virtual functions for this card? Shouldn't that be a define somewhere so it's easier to fix in future versions of this hardware? :) > + > + if (nr_virtfn) { > + for (i = 0; i < nr_virtfn; i++) { > + printk(KERN_INFO "SR-IOV: VF %d is enabled\n", i); Use dev_info() please, that shows the exact pci device and driver that emitted the message. > + my_mac_addr[5] = (unsigned char)i; > + igb_set_vf_mac(netdev, i, my_mac_addr); > + igb_set_vf_vmolr(adapter, i); > + } > + } else > + printk(KERN_INFO "SR-IOV is disabled\n"); Is that really true? (oh, use dev_info as well.) What happens if you had called this with "5" and then later with "0", you never destroyed those existing virtual functions, yet the code does: > + adapter->vfs_allocated_count = nr_virtfn; Which makes the driver think they are not present. What happens when the driver later goes to shut down? Are those resources freed up properly? thanks, greg k-h -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: Using virtio drivers under Windows 2003 R2 SP2 guest
> Subject: Re: Using virtio drivers under Windows 2003 R2 SP2 guest > > On Wed, Nov 26, 2008 at 1:47 PM, carlopmart <[EMAIL PROTECTED]> > wrote: > > Hi all, > > > > Recently I have installed a ubuntu server to use as a virtual host. I > need > > to install two Windows 2003 R2 SP2 guests on it, and I would like to > use > > virtio drivers for networking but I see that these drivers only exists > for > > Windows XP and Windows 2000, is it correct?? can I use it for Windows > 2003? > > I have been using the XP driver in a 2003 VM and it works correctly, > and IIRC Dor and Avi have said that XP drivers work in 2003. > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to [EMAIL PROTECTED] > More majordomo info at http://vger.kernel.org/majordomo-info.html I just confirm that the XP driver virtio drivers works also here on win 2003 without any issues. (we run several win2003 with exchange in production). Best Regards, Martin Maurer [EMAIL PROTECTED] http://www.proxmox.com -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2351433 ] unhandled vm exit: 0x80000021 running UnixWare guest
Bugs item #2351433, was opened at 2008-11-26 17:39 Message generated for change (Comment added) made by hughesj You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: intel Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: John Hughes (hughesj) Assigned to: Nobody/Anonymous (nobody) Summary: unhandled vm exit: 0x8021 running UnixWare guest Initial Comment: CPU: Xeon 3050 Vendor: Intel Bits: 64 Host distribution: Debian Lenny, kernel kernel 2.6.26-1-amd64 Guest: SCO UnixWare 7.1.1 (SVR5), 32 bits Running a UnixWare (SVR5) guest with kvm 79, debian build kernel 2.6.26-1-amd64 on an Intel Xeon 3050 it crashes during boot with: $ qemu-system-x86_64 -net nic,model=ne2k_isa -net user -hda UnixWare.img unhandled vm exit: 0x8021 vcpu_id 0 rax b101 rbx rcx rdx rsi rdi rsp 0ff8 rbp r8 r9 r10 r11 r12 r13 r14 r15 rip fe6e rflags 00023002 cs 0100 (/ p 1 dpl 0 db 1 s 1 type b l 0 g 1 avl 0) ds 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) es (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) ss 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) fs (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) gs (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) tr 0148 (c04405c0/2067 p 1 dpl 0 db 0 s 0 type b l 0 g 0 avl 0) ldt (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) gdt 5020/2cf idt 52f0/7ff cr0 8001003b cr2 0 cr3 1005000 cr4 2d4 cr8 0 efer 0 Aborted -- Comment By: John Hughes (hughesj) Date: 2008-11-26 17:41 Message: (Works with -no-kvm). -- You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2351433 ] unhandled vm exit: 0x80000021 running UnixWare guest
Bugs item #2351433, was opened at 2008-11-26 17:39 Message generated for change (Tracker Item Submitted) made by Item Submitter You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: intel Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: John Hughes (hughesj) Assigned to: Nobody/Anonymous (nobody) Summary: unhandled vm exit: 0x8021 running UnixWare guest Initial Comment: CPU: Xeon 3050 Vendor: Intel Bits: 64 Host distribution: Debian Lenny, kernel kernel 2.6.26-1-amd64 Guest: SCO UnixWare 7.1.1 (SVR5), 32 bits Running a UnixWare (SVR5) guest with kvm 79, debian build kernel 2.6.26-1-amd64 on an Intel Xeon 3050 it crashes during boot with: $ qemu-system-x86_64 -net nic,model=ne2k_isa -net user -hda UnixWare.img unhandled vm exit: 0x8021 vcpu_id 0 rax b101 rbx rcx rdx rsi rdi rsp 0ff8 rbp r8 r9 r10 r11 r12 r13 r14 r15 rip fe6e rflags 00023002 cs 0100 (/ p 1 dpl 0 db 1 s 1 type b l 0 g 1 avl 0) ds 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) es (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) ss 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) fs (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) gs (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) tr 0148 (c04405c0/2067 p 1 dpl 0 db 0 s 0 type b l 0 g 0 avl 0) ldt (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) gdt 5020/2cf idt 52f0/7ff cr0 8001003b cr2 0 cr3 1005000 cr4 2d4 cr8 0 efer 0 Aborted -- You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2351433&group_id=180599 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: header-sync: fix to work with 2.6.28 kernel
Mark McLoughlin wrote: If you run header-sync against a kernel which has asm/kvm.h but not e.g. asm/vmx.h then $(wildcard $(headers-new)) returns a string with expanded kvm.h path and the vmx.h glob removed. We then pass the original globs to rsync causing that to fail when vmx.h can't be found: rsync: link_stat ".../asm/vmx*.h" failed: No such file or directory (2) Fix by passing the expanded paths returned by the wildcard function to rsync. Applied, thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm-userspace: Cleanup user space NMI injection
Jan Kiszka wrote: Hm. For interrupts we need to check, since the interrupt might be deasserted or masked while the window is closed. Is there no way that this can happen for NMIs? In our emulation, but I also think in real life, there is no way to deassert an NMI. Therefore, our user space API just sets the nmi_pending flag. Masking of NMIs (due to mov ss etc.) is handled by the NMI injection code, and this has to happen in kernel anyway as the other NMI sources (APIC and IOAPIC) do not bother about the NMI windows as well. I think you're right, and applied the patch. Thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm-userspace: fix module build with --kerneldir
Maik Hentsche wrote: If the user specified $kerneldir, it should be in the form /lib/modules/$version/build, no? Ah sorry, I did not know of this requirement. My kerneldir is /tmp/linux (I build inside chroot). I took it from the depmod manpage, but if you're building from /tmp/linux, clearly it isn't a strict requirement. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm-userspace: fix module build with --kerneldir
Jan Kiszka wrote: Avi Kivity wrote: Maik Hentsche wrote: Avi Kivity <[EMAIL PROTECTED]> wrote: Can't the version be determined directly from kernedir itself? e.g. kerneldir is /lib/modules/$version/build? How do you get $version if you are crosscompiling? In this case the usual way of asking uname doesn't work. Instead you somehow need to get the version of the kernel you're building for from the kernel source provided in --kerneldir. I don't know any better way than evaluating UTSRELEASE. If the user specified $kerneldir, it should be in the form /lib/modules/$version/build, no? This is not sufficiently generic. KVM should to accept arbitrary paths to kernel directories, not just those special links (which could cause problems anyway if the link is set up for the target and makes no sense on the build host). Fair enough. This seems to work: awk ' / = / { a[$1] = $3 } END { printf("%s.%s.%s%s\n", a["VERSION"], a["PATCHLEVEL"], a["SUBLEVEL"], a["EXTRAVERSION"]) }' "$kerneldir/Makefile" Or alternatively awk '/Linux kernel version/ { print $NF }' "$kerneldir/.config" -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Using virtio drivers under Windows 2003 R2 SP2 guest
On Wed, Nov 26, 2008 at 1:47 PM, carlopmart <[EMAIL PROTECTED]> wrote: > Hi all, > > Recently I have installed a ubuntu server to use as a virtual host. I need > to install two Windows 2003 R2 SP2 guests on it, and I would like to use > virtio drivers for networking but I see that these drivers only exists for > Windows XP and Windows 2000, is it correct?? can I use it for Windows 2003? I have been using the XP driver in a 2003 VM and it works correctly, and IIRC Dor and Avi have said that XP drivers work in 2003. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Using virtio drivers under Windows 2003 R2 SP2 guest
Hi all, Recently I have installed a ubuntu server to use as a virtual host. I need to install two Windows 2003 R2 SP2 guests on it, and I would like to use virtio drivers for networking but I see that these drivers only exists for Windows XP and Windows 2000, is it correct?? can I use it for Windows 2003? Many thanks. -- CL Martinez carlopmart {at} gmail {d0t} com -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] PCI: allow pci driver to support only dynids
* Jean Delvare ([EMAIL PROTECTED]) wrote: > As a side note, I am curious what PCI driver we do have which has > driver->probe defined but no driver->id_table. Did you hit an actual > issue or are you fixing a theoretical one? Such a driver was patch 2/2. http://lkml.org/lkml/2008/11/26/11 It's meant to have only dynamic ids, worked fine on 2.6.27 and oopsed from NULL id-> deref on current git. thanks, -chris -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core
This patch integrates the IGB driver with the SR-IOV core. It shows how the SR-IOV API is used to support the capability. Obviously people does not need to put much effort to integrate the PF driver with SR-IOV core. All SR-IOV standard stuff are handled by SR-IOV core and PF driver only concerns the device specific resource allocation and deallocation once it gets the necessary information (i.e. number of Virtual Functions) from the callback function. --- drivers/net/igb/igb_main.c | 30 ++ 1 files changed, 30 insertions(+), 0 deletions(-) diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index bc063d4..b8c7dc6 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -139,6 +139,7 @@ void igb_set_mc_list_pools(struct igb_adapter *, struct e1000_hw *, int, u16); static int igb_vmm_control(struct igb_adapter *, bool); static int igb_set_vf_mac(struct net_device *, int, u8*); static void igb_mbox_handler(struct igb_adapter *); +static int igb_virtual(struct pci_dev *, int); #endif static int igb_suspend(struct pci_dev *, pm_message_t); @@ -184,6 +185,9 @@ static struct pci_driver igb_driver = { #endif .shutdown = igb_shutdown, .err_handler = &igb_err_handler, +#ifdef CONFIG_PCI_IOV + .virtual = igb_virtual +#endif }; static int global_quad_port_a; /* global quad port a indication */ @@ -5107,6 +5111,32 @@ void igb_set_mc_list_pools(struct igb_adapter *adapter, reg_data |= (1 << 25); wr32(E1000_VMOLR(pool), reg_data); } + +static int +igb_virtual(struct pci_dev *pdev, int nr_virtfn) +{ + unsigned char my_mac_addr[6] = {0x00, 0xDE, 0xAD, 0xBE, 0xEF, 0xFF}; + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + int i; + + if (nr_virtfn > 7) + return -EINVAL; + + if (nr_virtfn) { + for (i = 0; i < nr_virtfn; i++) { + printk(KERN_INFO "SR-IOV: VF %d is enabled\n", i); + my_mac_addr[5] = (unsigned char)i; + igb_set_vf_mac(netdev, i, my_mac_addr); + igb_set_vf_vmolr(adapter, i); + } + } else + printk(KERN_INFO "SR-IOV is disabled\n"); + + adapter->vfs_allocated_count = nr_virtfn; + + return 0; +} #endif /* igb_main.c */ -- 1.5.4.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[SR-IOV driver example 1/3] PF driver: allocate hardware specific resource
This patch makes the IGB driver allocate hardware resource (rx/tx queues) for Virtual Functions. All operations in this patch are hardware specific. --- drivers/net/igb/Makefile|2 +- drivers/net/igb/e1000_82575.c |1 + drivers/net/igb/e1000_82575.h | 61 drivers/net/igb/e1000_defines.h |7 + drivers/net/igb/e1000_hw.h |2 + drivers/net/igb/e1000_regs.h| 13 + drivers/net/igb/e1000_vf.c | 223 ++ drivers/net/igb/igb.h | 10 + drivers/net/igb/igb_main.c | 604 ++- 9 files changed, 910 insertions(+), 13 deletions(-) create mode 100644 drivers/net/igb/e1000_vf.c diff --git a/drivers/net/igb/Makefile b/drivers/net/igb/Makefile index 1927b3f..ab3944c 100644 --- a/drivers/net/igb/Makefile +++ b/drivers/net/igb/Makefile @@ -33,5 +33,5 @@ obj-$(CONFIG_IGB) += igb.o igb-objs := igb_main.o igb_ethtool.o e1000_82575.o \ - e1000_mac.o e1000_nvm.o e1000_phy.o + e1000_mac.o e1000_nvm.o e1000_phy.o e1000_vf.o diff --git a/drivers/net/igb/e1000_82575.c b/drivers/net/igb/e1000_82575.c index f5e2e72..bb823ac 100644 --- a/drivers/net/igb/e1000_82575.c +++ b/drivers/net/igb/e1000_82575.c @@ -87,6 +87,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw) case E1000_DEV_ID_82576: case E1000_DEV_ID_82576_FIBER: case E1000_DEV_ID_82576_SERDES: + case E1000_DEV_ID_82576_QUAD_COPPER: mac->type = e1000_82576; break; default: diff --git a/drivers/net/igb/e1000_82575.h b/drivers/net/igb/e1000_82575.h index c1928b5..8c488ab 100644 --- a/drivers/net/igb/e1000_82575.h +++ b/drivers/net/igb/e1000_82575.h @@ -170,4 +170,65 @@ struct e1000_adv_tx_context_desc { #define E1000_DCA_TXCTRL_CPUID_SHIFT 24 /* Tx CPUID now in the last byte */ #define E1000_DCA_RXCTRL_CPUID_SHIFT 24 /* Rx CPUID now in the last byte */ +#define MAX_NUM_VFS 8 + +#define E1000_DTXSWC_VMDQ_LOOPBACK_EN (1 << 31) /* global VF LB enable */ + +/* Easy defines for setting default pool, would normally be left a zero */ +#define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7 +#define E1000_VT_CTL_DEFAULT_POOL_MASK (0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT) + +/* Other useful VMD_CTL register defines */ +#define E1000_VT_CTL_DISABLE_DEF_POOL (1 << 29) +#define E1000_VT_CTL_VM_REPL_EN (1 << 30) + +/* Per VM Offload register setup */ +#define E1000_VMOLR_LPE0x0001 /* Accept Long packet */ +#define E1000_VMOLR_AUPE 0x0100 /* Accept untagged packets */ +#define E1000_VMOLR_BAM0x0800 /* Accept Broadcast packets */ +#define E1000_VMOLR_MPME 0x1000 /* Multicast promiscuous mode */ +#define E1000_VMOLR_STRVLAN0x4000 /* Vlan stripping enable */ + +#define E1000_P2VMAILBOX_STS 0x0001 /* Initiate message send to VF */ +#define E1000_P2VMAILBOX_ACK 0x0002 /* Ack message recv'd from VF */ +#define E1000_P2VMAILBOX_VFU 0x0004 /* VF owns the mailbox buffer */ +#define E1000_P2VMAILBOX_PFU 0x0008 /* PF owns the mailbox buffer */ + +#define E1000_VLVF_ARRAY_SIZE 32 +#define E1000_VLVF_VLANID_MASK0x0FFF +#define E1000_VLVF_POOLSEL_SHIFT 12 +#define E1000_VLVF_POOLSEL_MASK (0xFF << E1000_VLVF_POOLSEL_SHIFT) +#define E1000_VLVF_VLANID_ENABLE 0x8000 + +#define E1000_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ + +/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the + * PF. The reverse is true if it is E1000_PF_*. + * Message ACK's are the value or'd with 0xF000 + */ +#define E1000_VT_MSGTYPE_ACK 0xF000 /* Messages below or'd with + * this are the ACK */ +#define E1000_VT_MSGTYPE_NACK 0xFF00 /* Messages below or'd with + * this are the NACK */ +#define E1000_VT_MSGINFO_SHIFT16 +/* bits 23:16 are used for exra info for certain messages */ +#define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT) + +#define E1000_VF_MSGTYPE_REQ_MAC 1 /* VF needs to know its MAC */ +#define E1000_VF_MSGTYPE_VFLR 2 /* VF notifies VFLR to PF */ +#define E1000_VF_SET_MULTICAST3 /* VF requests PF to set MC addr */ +#define E1000_VF_SET_VLAN 4 /* VF requests PF to set VLAN */ +#define E1000_VF_SET_LPE 5 /* VF requests PF to set VMOLR.LPE */ + +s32 e1000_send_mail_to_vf(struct e1000_hw *hw, u32 *msg, + u32 vf_number, s16 size); +s32 e1000_receive_mail_from_vf(struct e1000_hw *hw, u32 *msg, +u32 vf_number, s16 size); +void e1000_vmdq_loopback_enable_vf(struct e1000_hw *hw); +void e1000_vmdq_loopback_disable_vf(struct e1000_hw *hw); +void e1000_vmdq_replication_enable_vf(struct e1000_hw *hw, u32 enables); +void e1000_vmdq_replication_disable_vf(struct e1000_hw *hw); +bool e1000_check_for_pf_ack_vf(struct e1000_hw *hw); +bool e1000_check_for_pf_mail
[SR-IOV driver example 0/3] introduction
SR-IOV drivers of Intel 82576 NIC are available. There are two parts of the drivers: Physical Function driver and Virtual Function driver. The PF driver is based on the IGB driver and is used to control PF to allocate hardware specific resources and interface with the SR-IOV core. The VF driver is a new NIC driver that is same as the traditional PCI device driver. It works in both the host and the guest (Xen and KVM) environment. These two drivers are testing versions and they are *only* intended to show how to use SR-IOV API. Intel 82576 NIC specification can be found at: http://download.intel.com/design/network/datashts/82576_Datasheet_v2p1.pdf [SR-IOV driver example 1/3] PF driver: allocate hardware specific resource [SR-IOV driver example 2/3] PF driver: integrate with SR-IOV core [SR-IOV driver example 3/3] VF driver tar ball -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/5] kvm: qemu: virtio: introduce virtqueue_fill() and virtqueue_flush()
Split virtqueue_push() into two logical steps - adding an element to the used ring and notifying the other side of added elements. This is needed because with the mergeable receive buffers scheme we will add buffers to the used ring as we copy the packet data into them but we only want to notify the guest of the new buffers once all the packet buffers are available. Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]> --- qemu/hw/virtio.c | 23 ++- qemu/hw/virtio.h |3 +++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c index fe0a120..18ad3ed 100644 --- a/qemu/hw/virtio.c +++ b/qemu/hw/virtio.c @@ -107,19 +107,32 @@ static void virtqueue_init(VirtQueue *vq, void *p) vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned long)&vq->vring.avail->ring[vq->vring.num]); } -void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len) +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx) { VRingUsedElem *used; +idx += vq->vring.used->idx; + /* Get a pointer to the next entry in the used ring. */ -used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; +used = &vq->vring.used->ring[idx % vq->vring.num]; used->id = elem->index; used->len = len; +} + +void virtqueue_flush(VirtQueue *vq, unsigned int count) +{ /* Make sure buffer is written before we update index. */ wmb(); -vq->vring.used->idx++; -vq->inuse--; +vq->vring.used->idx += count; +vq->inuse -= count; +} + +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) +{ +virtqueue_fill(vq, elem, len, 0); +virtqueue_flush(vq, 1); } static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h index 0dcedbf..87a15cc 100644 --- a/qemu/hw/virtio.h +++ b/qemu/hw/virtio.h @@ -139,6 +139,9 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len); +void virtqueue_flush(VirtQueue *vq, unsigned int count); +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx); int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem); -- 1.5.4.3 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/5] kvm: qemu: virtio-net: add a new virtio-net receive buffer scheme
Currently, in order to receive large packets, the guest must allocate max-sized packet buffers and pass them to the host. Each of these max-sized packets occupy 20 ring entries, which means we can only transfer a maximum of 12 packets in a single batch with a 256 entry ring. When receiving packets from external networks, we only receive MTU sized packets and so the throughput observed is throttled by the number of packets the ring can hold. Implement the VIRTIO_NET_F_MRG_RXBUF feature to let guests know that we can merge smaller buffers together in order to handle large packets. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]> --- qemu/hw/virtio-net.c | 133 +++-- qemu/hw/virtio.c | 32 qemu/hw/virtio.h |1 + 3 files changed, 139 insertions(+), 27 deletions(-) diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c index 2a52536..b5d5f9e 100644 --- a/qemu/hw/virtio-net.c +++ b/qemu/hw/virtio-net.c @@ -34,9 +34,13 @@ #define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ #define TX_TIMER_INTERVAL 15 /* 150 us */ +/* Maximum packet size we can receive from tap device: header + 64k */ +#define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10)) + /* The config defining mac address (6 bytes) */ struct virtio_net_config { @@ -61,6 +65,14 @@ struct virtio_net_hdr uint16_t csum_offset; }; +/* This is the version of the header to use when the MRG_RXBUF + * feature has been negotiated. */ +struct virtio_net_hdr_mrg_rxbuf +{ +struct virtio_net_hdr hdr; +uint16_t num_buffers; /* Number of merged rx buffers */ +}; + typedef struct VirtIONet { VirtIODevice vdev; @@ -70,6 +82,7 @@ typedef struct VirtIONet VLANClientState *vc; QEMUTimer *tx_timer; int tx_timer_active; +int mergeable_rx_bufs; } VirtIONet; /* TODO @@ -106,6 +119,7 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev) features |= (1 << VIRTIO_NET_F_HOST_TSO4); features |= (1 << VIRTIO_NET_F_HOST_TSO6); features |= (1 << VIRTIO_NET_F_HOST_ECN); + features |= (1 << VIRTIO_NET_F_MRG_RXBUF); /* Kernel can't actually handle UFO in software currently. */ } @@ -117,6 +131,8 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features) VirtIONet *n = to_virtio_net(vdev); VLANClientState *host = n->vc->vlan->first_client; +n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)); + if (!tap_has_vnet_hdr(host) || !host->set_offload) return; @@ -145,7 +161,9 @@ static int virtio_net_can_receive(void *opaque) !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) return 0; -if (n->rx_vq->vring.avail->idx == n->rx_vq->last_avail_idx) { +if (n->rx_vq->vring.avail->idx == n->rx_vq->last_avail_idx || + (n->mergeable_rx_bufs && +!virtqueue_avail_bytes(n->rx_vq, VIRTIO_NET_MAX_BUFSIZE, 0))) { n->rx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; return 0; } @@ -197,41 +215,90 @@ static int iov_fill(struct iovec *iov, int iovcnt, const void *buf, int count) return offset; } -static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) +static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt, + const void *buf, int size, int hdr_len) { -VirtIONet *n = opaque; -VirtQueueElement elem; -struct virtio_net_hdr *hdr; +struct virtio_net_hdr *hdr = iov[0].iov_base; int offset; -int total; - -if (virtqueue_pop(n->rx_vq, &elem) == 0) - return; - -if (elem.in_num < 1 || elem.in_sg[0].iov_len != sizeof(*hdr)) { - fprintf(stderr, "virtio-net header not in first element\n"); - exit(1); -} -hdr = (void *)elem.in_sg[0].iov_base; hdr->flags = 0; hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; -offset = 0; -total = sizeof(*hdr); - if (tap_has_vnet_hdr(n->vc->vlan->first_client)) { memcpy(hdr, buf, sizeof(*hdr)); - offset += total; -work_around_broken_dhclient(hdr, buf + offset, size - offset); + offset = sizeof(*hdr); + work_around_broken_dhclient(hdr, buf + offset, size - offset); } -/* copy in packet. ugh */ -total += iov_fill(&elem.in_sg[1], elem.in_num
[PATCH 1/5] kvm: qemu: virtio: move virtqueue_next_desc() around
virtio_next_desc() is only used in virtqueue_pop(), so move them alongside one another. Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]> --- qemu/hw/virtio.c | 30 +++--- 1 files changed, 15 insertions(+), 15 deletions(-) diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c index 8fac354..fe0a120 100644 --- a/qemu/hw/virtio.c +++ b/qemu/hw/virtio.c @@ -107,6 +107,21 @@ static void virtqueue_init(VirtQueue *vq, void *p) vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned long)&vq->vring.avail->ring[vq->vring.num]); } +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) +{ +VRingUsedElem *used; + +/* Get a pointer to the next entry in the used ring. */ +used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; +used->id = elem->index; +used->len = len; +/* Make sure buffer is written before we update index. */ +wmb(); +vq->vring.used->idx++; +vq->inuse--; +} + static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) { unsigned int next; @@ -126,21 +141,6 @@ static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) return next; } -void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len) -{ -VRingUsedElem *used; - -/* Get a pointer to the next entry in the used ring. */ -used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; -used->id = elem->index; -used->len = len; -/* Make sure buffer is written before we update index. */ -wmb(); -vq->vring.used->idx++; -vq->inuse--; -} - int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) { unsigned int i, head; -- 1.5.4.3 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/5] kvm: qemu: virtio: split some helpers out of virtqueue_pop()
The mergeable receive buffer scheme will introduce a new function which peeks at how much buffer space is available in the queue. Split out some helper functions from virtqueue_pop() for that purpose. Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]> --- qemu/hw/virtio.c | 45 + 1 files changed, 29 insertions(+), 16 deletions(-) diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c index 18ad3ed..42022d4 100644 --- a/qemu/hw/virtio.c +++ b/qemu/hw/virtio.c @@ -135,6 +135,33 @@ void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, virtqueue_flush(vq, 1); } +static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) +{ +uint16_t num_heads = vq->vring.avail->idx - idx; + +/* Check it isn't doing very strange things with descriptor numbers. */ +if (num_heads > vq->vring.num) + errx(1, "Guest moved used index from %u to %u", +idx, vq->vring.avail->idx); + +return num_heads; +} + +static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) +{ +unsigned int head; + +/* Grab the next descriptor number they're advertising, and increment + * the index we've seen. */ +head = vq->vring.avail->ring[idx % vq->vring.num]; + +/* If their number is silly, that's a fatal mistake. */ +if (head >= vq->vring.num) + errx(1, "Guest says index %u is available", head); + +return head; +} + static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) { unsigned int next; @@ -158,27 +185,13 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) { unsigned int i, head; -/* Check it isn't doing very strange things with descriptor numbers. */ -if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num) - errx(1, "Guest moved used index from %u to %u", -vq->last_avail_idx, vq->vring.avail->idx); - -/* If there's nothing new since last we looked, return invalid. */ -if (vq->vring.avail->idx == vq->last_avail_idx) +if (!virtqueue_num_heads(vq, vq->last_avail_idx)) return 0; -/* Grab the next descriptor number they're advertising, and increment - * the index we've seen. */ -head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num]; - -/* If their number is silly, that's a fatal mistake. */ -if (head >= vq->vring.num) - errx(1, "Guest says index %u is available", head); - /* When we start there are none of either input nor output. */ elem->out_num = elem->in_num = 0; -i = head; +i = head = virtqueue_get_head(vq, vq->last_avail_idx++); do { struct iovec *sg; -- 1.5.4.3 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/5] kvm: qemu: virtio_net: add support for mergeable rx buffers
Hi, The mergeable receive buffers scheme has been in the Linux virtio_net driver in net-next-2.6 for a while now, so it's probably safe to assume what's there now is the final iteration of the ABI. The following patches implement support for the scheme in KVM. Only the 5/5 patch contains functional changes. Changes since last time: + We now peak at how much buffer space is actually available in the ring and no longer assume that buffers are at least 4k + The new header is now 'struct virtio_net_hdr_mrg_rxbuf' rather than 'struct virtio_net_hdr2' + The new header no longer is padded to 32 bytes Cheers, Mark. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/5] kvm: qemu: virtio-net: split iov_fill() out from virtio_net_receive()
Simplifies the current code, but more especially, simplifies the implementation of the mergeable receive buffers scheme. Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]> --- qemu/hw/virtio-net.c | 27 ++- 1 files changed, 18 insertions(+), 9 deletions(-) diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c index bc2ede6..2a52536 100644 --- a/qemu/hw/virtio-net.c +++ b/qemu/hw/virtio-net.c @@ -182,12 +182,27 @@ static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, } } +static int iov_fill(struct iovec *iov, int iovcnt, const void *buf, int count) +{ +int offset, i; + +offset = i = 0; +while (offset < count && i < iovcnt) { + int len = MIN(iov[i].iov_len, count - offset); + memcpy(iov[i].iov_base, buf + offset, len); + offset += len; + i++; +} + +return offset; +} + static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) { VirtIONet *n = opaque; VirtQueueElement elem; struct virtio_net_hdr *hdr; -int offset, i; +int offset; int total; if (virtqueue_pop(n->rx_vq, &elem) == 0) @@ -212,14 +227,8 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) } /* copy in packet. ugh */ -i = 1; -while (offset < size && i < elem.in_num) { - int len = MIN(elem.in_sg[i].iov_len, size - offset); - memcpy(elem.in_sg[i].iov_base, buf + offset, len); - offset += len; - total += len; - i++; -} +total += iov_fill(&elem.in_sg[1], elem.in_num - 1, + buf + offset, size - offset); /* signal other side */ virtqueue_push(n->rx_vq, &elem, total); -- 1.5.4.3 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2215532 ] SMP IA32e RHEL5.1 guest can not boot up
Bugs item #2215532, was opened at 2008-11-01 22:58 Message generated for change (Comment added) made by jiajun You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2215532&group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None >Status: Closed >Resolution: Fixed Priority: 5 Private: No Submitted By: Jiajun Xu (jiajun) Assigned to: Nobody/Anonymous (nobody) Summary: SMP IA32e RHEL5.1 guest can not boot up Initial Comment: Against kernel.git:20d09323d7b91819e35194773ea000e9d79badd2 user-space.git: ccabb53f1a84f11896c840391f7ade3756214086, SMP IA32e RHEL5/RHEL4 Guest can not boot up. UP IA32e Guest can boot up well. The issue disappeare if nmi_watchdog=0 in guest kernel parameter. -- >Comment By: Jiajun Xu (jiajun) Date: 2008-11-26 06:49 Message: Verified on kernel.git: 0f7d3ee64e9324b265d5b988c2e81ac1784fc59d userspace.git: 6e63ba19476753595e508713eb9daf559dc50bf6, the issue is fixed. -- Comment By: Avi Kivity (avik) Date: 2008-11-23 11:12 Message: Should be fixed now. Please retest. -- Comment By: Jiajun Xu (jiajun) Date: 2008-11-04 01:21 Message: File Added: config-2.6.18-53.el5 -- Comment By: Jiajun Xu (jiajun) Date: 2008-11-04 01:20 Message: Please see attachement, I uploaded all the call trace messages. I also attched my RHEL5.1 config. The kernel version is 2.6.18-53. File Added: call_trace -- Comment By: Jan Kiszka (kiszka) Date: 2008-11-04 00:08 Message: Please use a (virtual) serial console or a netconsole to catch the guest output. The screenshot only tells the tail of the report. Is there a chance to get my hands on some image to reproduce? I don't have that RHEL version at hand. Which .config does RHEL use? Which kernel version? Any special patches that may make a difference? -- Comment By: Jiajun Xu (jiajun) Date: 2008-11-03 23:34 Message: You can find snapshot of guest call trace in attachement. Guest call trace after loading kernel and start udev. There are a lot of messages printed on guest console, most are related to NMI. There is no any error message on host console. -- Comment By: Jan Kiszka (kiszka) Date: 2008-11-02 01:31 Message: Could you be more specific: Where does it hang? What does the guest kernel console say? Also, what is the nmi_watchdog default otherwise? And is there any message in the host kernel's log? -- You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2215532&group_id=180599 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 2/2] KVM: Change to use new APIs for KVM VT-d
Avi Kivity wrote: > Han, Weidong wrote: >> This patch changes to use new APIs for KVM VT-d, and add device >> deassignment for hotplug. >> >> index 44fd7fa..558bc32 100644 >> --- a/include/linux/kvm.h >> +++ b/include/linux/kvm.h >> @@ -425,6 +425,8 @@ struct kvm_trace_rec { >> struct kvm_assigned_pci_dev) >> #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ >> struct kvm_assigned_irq) >> +#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x71, \ >> + struct kvm_assigned_pci_dev) >> > > Need to advertise using KVM_CAP_DEVICE_DEASSIGNMENT. Okay, will add KVM_CAP_DEVICE_DEASSIGNMENT. Thanks. Regards, Weidong -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 1/2] VT-d: Support multiple device assignment for KVM
Avi Kivity wrote: > Han, Weidong wrote: >> In order to support multiple device assignment for KVM, this patch >> does following main changes: >>- extend dmar_domain to own multiple devices from different >> iommus, use a bitmap of iommus to replace iommu pointer in >> dmar_domain. >>- add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d >> usage. Many functions (e.g. intel_map_single() and >> intel_unmap_single()) won't be used by KVM VT-d. Let them return >> directly when this flag is set. >> > > > This seems brittle. An API that has some functions shorted out > depending on some flag is hard to understand and use. This flag just helps identify kvm VT-d usage, and let kvm VT-d APIs reuse native VT-d code, it needn't duplicate code for kvm VT-d APIs, and it won't impact existed native VT-d code. > > We should either implement the functions, or split the API into a > basic version that talks only to one device, and an expanded versions > that talks to multiple devices, and is implemented by the using the > lower level API. This may require more changes due to the need to > share io pagetables. The expanded versions that supports multiple devices will need to change dmar_domain, this will cause lots of changes, almost duplicate the main functions. > >>- "SAGAW" capability may be different across iommus, that's to >> say the VT-d page table levels may be different among iommus. This >> patch uses a defaut agaw, and skip top levels of page tables for >> iommus which have smaller agaw than default. >> > > Neat trick. > >> void free_dmar_iommu(struct intel_iommu *iommu) >> { >> struct dmar_domain *domain; >> @@ -960,7 +1054,14 @@ void free_dmar_iommu(struct intel_iommu *iommu) >> for (; i < cap_ndoms(iommu->cap); ) { >> domain = iommu->domains[i]; >> clear_bit(i, iommu->domain_ids); >> - domain_exit(domain); >> + >> + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { >> + /* domain may be referenced by other iommus >> */ + if (domain_in_other_iommus(domain, iommu) >> == 0) + domain_exit(domain); + >> } + else >> + domain_exit(domain); >> > > Things like this are best expressed using reference counts, which > removes the need for the test as well. will add a reference count for it. > >> + >> + /* Skip top levels of page tables for >> +* iommu which has less agaw than default. + >> */ + for (agaw = domain->agaw; agaw != iommu->agaw; >> agaw--) { + pgd = >> phys_to_virt(dma_pte_addr(*pgd)); + if >> (!dma_pte_present(*pgd)) { + >> spin_unlock_irqrestore(&iommu->lock, flags); + >> return -ENOMEM; + } >> + } >> + } >> > > Need to check that the agaw is sufficient for mapped memory (and when > adding a device or mapping more memory, need a similar check). I think we can check the smallest agaw across iommus for mapped memory when mapping memory. Regards, Weidong -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] kvm-s390: Fix memory leak of vcpu->run
From: Christian Borntraeger <[EMAIL PROTECTED]> The s390 backend of kvm never calls kvm_vcpu_uninit. This causes a memory leak of vcpu->run pages. Lets call kvm_vcpu_uninit in kvm_arch_vcpu_destroy to free the vcpu->run. Signed-off-by: Christian Borntraeger <[EMAIL PROTECTED]> Acked-by: Carsten Otte <[EMAIL PROTECTED]> --- arch/s390/kvm/kvm-s390.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) Index: kvm/arch/s390/kvm/kvm-s390.c === --- kvm.orig/arch/s390/kvm/kvm-s390.c +++ kvm/arch/s390/kvm/kvm-s390.c @@ -198,6 +198,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vc { VCPU_EVENT(vcpu, 3, "%s", "free cpu"); free_page((unsigned long)(vcpu->arch.sie_block)); + kvm_vcpu_uninit(vcpu); kfree(vcpu); } @@ -230,8 +231,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu * void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { - /* kvm common code refers to this, but does'nt call it */ - BUG(); + /* Nothing todo */ } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] kvm-s390: Fix refcounting and allow module unload
From: Christian Borntraeger <[EMAIL PROTECTED]> Currently it is impossible to unload the kvm module on s390. This patch fixes kvm_arch_destroy_vm to release all cpus. This make it possible to unload the module. In addition we stop messing with the module refcount in arch code. Signed-off-by: Christian Borntraeger <[EMAIL PROTECTED]> Acked-by: Carsten Otte <[EMAIL PROTECTED]> --- arch/s390/kvm/kvm-s390.c | 35 +-- 1 file changed, 21 insertions(+), 14 deletions(-) Index: kvm/arch/s390/kvm/kvm-s390.c === --- kvm.orig/arch/s390/kvm/kvm-s390.c +++ kvm/arch/s390/kvm/kvm-s390.c @@ -185,8 +185,6 @@ struct kvm *kvm_arch_create_vm(void) debug_register_view(kvm->arch.dbf, &debug_sprintf_view); VM_EVENT(kvm, 3, "%s", "vm created"); - try_module_get(THIS_MODULE); - return kvm; out_nodbf: free_page((unsigned long)(kvm->arch.sca)); @@ -196,13 +194,32 @@ out_nokvm: return ERR_PTR(rc); } +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 3, "%s", "free cpu"); + free_page((unsigned long)(vcpu->arch.sie_block)); + kfree(vcpu); +} + +static void kvm_free_vcpus(struct kvm *kvm) +{ + unsigned int i; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_vcpu_destroy(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } +} + void kvm_arch_destroy_vm(struct kvm *kvm) { - debug_unregister(kvm->arch.dbf); + kvm_free_vcpus(kvm); kvm_free_physmem(kvm); free_page((unsigned long)(kvm->arch.sca)); + debug_unregister(kvm->arch.dbf); kfree(kvm); - module_put(THIS_MODULE); } /* Section: vcpu related */ @@ -308,8 +325,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(st VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, vcpu->arch.sie_block); - try_module_get(THIS_MODULE); - return vcpu; out_free_cpu: kfree(vcpu); @@ -317,14 +332,6 @@ out_nomem: return ERR_PTR(rc); } -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - VCPU_EVENT(vcpu, 3, "%s", "destroy cpu"); - free_page((unsigned long)(vcpu->arch.sie_block)); - kfree(vcpu); - module_put(THIS_MODULE); -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { /* kvm common code refers to this, but never calls it */ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/2] two refcount and leak fixes for kvm on s390
Hello Avi, here are two fixes for kvm on s390. [PATCH 1/2] kvm-s390: Fix refcounting and allow module unload [PATCH 2/2] kvm-s390: Fix memory leak of vcpu->run None of the fixed problems is severe. So I guess this should be scheduled for the next merge window. Thanks Christian -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] markers: comment usage of marker_synchronize_unregister()
* Wu Fengguang ([EMAIL PROTECTED]) wrote: > Add more comments to marker_synchronize_unregister() in order to > reduce the chance of misusing. > > Based on comments from Lai Jiangshan <[EMAIL PROTECTED]>. > > Cc: Lai Jiangshan <[EMAIL PROTECTED]> > Cc: Mathieu Desnoyers <[EMAIL PROTECTED]> > Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> > --- > > I'm still not sure about the last sentence. Can anyone clarify on > this? Thanks! > > diff --git a/include/linux/marker.h b/include/linux/marker.h > index 889196c..89ce1b8 100644 > --- a/include/linux/marker.h > +++ b/include/linux/marker.h > @@ -164,6 +164,12 @@ extern void *marker_get_private_data(const char *name, > marker_probe_func *probe, > * marker_synchronize_unregister must be called between the last marker probe > * unregistration and the end of module exit to make sure there is no caller > * executing a probe when it is freed. > + * > + * It must be called _also_ between unregistration and destruction the data > + * that unregistration-ed probes need to make sure there is no caller > executing > + * a probe when it's data is destroyed. it's -> its And the way it's written, this last sentence is a bit misleading. One might think that the synchronize_unregister has to be called two, when in fact it just has to be called once, but it must be called at a moment in time between unregister and free of any resource used by the probes, including the code which is removed by module unload. > + * > + * It works reliably only when all probe routines do not sleep and > reschedule. Per definition, preemption is disabled around marker probe execution, so I don't see why we should add this last sentence ? Mathieu > */ > #define marker_synchronize_unregister() synchronize_sched() > -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Host<->guest channel interface advice needed
On Wed, Nov 26, 2008 at 04:07:01PM +0300, Evgeniy Polyakov wrote: > On Wed, Nov 26, 2008 at 02:39:19PM +0200, Gleb Natapov ([EMAIL PROTECTED]) > wrote: > > The interfaces that are being considered are netlink socket (only datagram > > semantics, linux specific), new socket family or character device with > > different minor number for each channel. Which one better suits for > > the purpose? Is there other kind of interface to consider? New socket > > family looks like a good choice, but it would be nice to hear other > > opinions before starting to work on it. > > What about X (or whatever else) protocol running over host-guest network > device, which are in the kernel already? > I should have mentioned that in my original mail. We don't want to use IP stack for communication between host and guest for variety of reasons. User of the VM may interfere with our communication by mis configuring firewall for instance (and he/she may even not be aware that an OS running inside a VM). We also want be able to communicate with agent inside a guest even when guest's network is not yet configured. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Host<->guest channel interface advice needed
On Wednesday 26 November 2008, Gleb Natapov wrote: > The interfaces that are being considered are netlink socket (only datagram > semantics, linux specific), new socket family or character device with > different minor number for each channel. Which one better suits for > the purpose? Is there other kind of interface to consider? New socket > family looks like a good choice, but it would be nice to hear other > opinions before starting to work on it. I think a socket and a pty both look reasonable here, but one important aspect IMHO is that you only need a new kernel driver for the guest, if you just use the regular pty support or Unix domain sockets in the host. Obviously, there needs to be some control over permissions, as a guest most not be able to just open any socket or pty of the host, so a reasonable approach might be that the guest can only create a socket or pty that can be opened by the host, but not vice versa. Alternatively, you create the socket/pty in host userspace and then allow passing that down into the guest, which creates a virtio device from it. Arnd <>< -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] markers: comment usage of marker_synchronize_unregister()
On Wed, Nov 26, 2008 at 02:46:08PM +0200, Mathieu Desnoyers wrote: > * Wu Fengguang ([EMAIL PROTECTED]) wrote: > > Add more comments to marker_synchronize_unregister() in order to > > reduce the chance of misusing. > > > > Based on comments from Lai Jiangshan <[EMAIL PROTECTED]>. > > > > Cc: Lai Jiangshan <[EMAIL PROTECTED]> > > Cc: Mathieu Desnoyers <[EMAIL PROTECTED]> > > Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> > > --- > > > > I'm still not sure about the last sentence. Can anyone clarify on > > this? Thanks! > > > > diff --git a/include/linux/marker.h b/include/linux/marker.h > > index 889196c..89ce1b8 100644 > > --- a/include/linux/marker.h > > +++ b/include/linux/marker.h > > @@ -164,6 +164,12 @@ extern void *marker_get_private_data(const char *name, > > marker_probe_func *probe, > > * marker_synchronize_unregister must be called between the last marker > > probe > > * unregistration and the end of module exit to make sure there is no > > caller > > * executing a probe when it is freed. > > + * > > + * It must be called _also_ between unregistration and destruction the data > > + * that unregistration-ed probes need to make sure there is no caller > > executing > > + * a probe when it's data is destroyed. > > it's -> its > > And the way it's written, this last sentence is a bit misleading. One > might think that the synchronize_unregister has to be called two, when > in fact it just has to be called once, but it must be called at a moment > in time between unregister and free of any resource used by the probes, > including the code which is removed by module unload. > > > + * > > + * It works reliably only when all probe routines do not sleep and > > reschedule. > > Per definition, preemption is disabled around marker probe execution, so > I don't see why we should add this last sentence ? Thanks, your reminder dismissed my confusion on this last sentence :-) Updated patch according to your helpful comments. Thank you, Fengguang --- markers: comment usage of marker_synchronize_unregister() Add more comments to marker_synchronize_unregister() in order to reduce the chance of misusing. Based on comments from Lai Jiangshan and Mathieu Desnoyers. Cc: Lai Jiangshan <[EMAIL PROTECTED]> Cc: Mathieu Desnoyers <[EMAIL PROTECTED]> Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> --- diff --git a/include/linux/marker.h b/include/linux/marker.h index 889196c..32ce4f2 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -162,8 +162,10 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe, /* * marker_synchronize_unregister must be called between the last marker probe - * unregistration and the end of module exit to make sure there is no caller - * executing a probe when it is freed. + * unregistration and the first one of + * - the end of module exit + * - the free of any resource used by the probes + * to ensure the code and data are all valid for any possibly running probes. */ #define marker_synchronize_unregister() synchronize_sched() -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Host<->guest channel interface advice needed
Hi Gleb. On Wed, Nov 26, 2008 at 02:39:19PM +0200, Gleb Natapov ([EMAIL PROTECTED]) wrote: > The interfaces that are being considered are netlink socket (only datagram > semantics, linux specific), new socket family or character device with > different minor number for each channel. Which one better suits for > the purpose? Is there other kind of interface to consider? New socket > family looks like a good choice, but it would be nice to hear other > opinions before starting to work on it. What about X (or whatever else) protocol running over host-guest network device, which are in the kernel already? -- Evgeniy Polyakov -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Patch 4/5] x86_emulator: add the assembler code for three operands
Guillaume Thouvenin wrote: It's the only code with shld instruction. I don't see how you can have three different shld instructions here. I'm sure that I'm missing something here because for me, when we emulate the shld instruction, the code produced is the same. I mean that src.val and dst.val are always unsigned long and I don't see why register size are important here. In fact I don't understand why we need to use the switch ((_dst).bytes) for the other emulations. The flags vary according to operand size. For example, shld $1, %bx, %ax where %ax == 0x8000 will set eflags.cf, but shld $1, %ebx, %eax , with the same arguments, will not. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Weekly KVM Test report, kernel 30d95f ... userspace fc94d1 ...
Xu, Jiajun wrote: The call trace messages in guest: ### Kernel BUG at block/elevator.c:560 invalid opcode: [1] SMP last sysfs file: /block/hda/removable This suggests something happened to /dev/hda. Could be a timeout or something. Are there any messages in the log before the BUG? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Host<->guest channel interface advice needed
Hello, I'd like to ask what would be the best user space interface for generic guest<->host communication channel. The channel will be used to pass mouse events to/from a guest or by managements software to communicate with agents running in a guests or for something similar. The interfaces that are being considered are netlink socket (only datagram semantics, linux specific), new socket family or character device with different minor number for each channel. Which one better suits for the purpose? Is there other kind of interface to consider? New socket family looks like a good choice, but it would be nice to hear other opinions before starting to work on it. Thanks, -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] markers: comment usage of marker_synchronize_unregister()
Add more comments to marker_synchronize_unregister() in order to reduce the chance of misusing. Based on comments from Lai Jiangshan <[EMAIL PROTECTED]>. Cc: Lai Jiangshan <[EMAIL PROTECTED]> Cc: Mathieu Desnoyers <[EMAIL PROTECTED]> Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> --- I'm still not sure about the last sentence. Can anyone clarify on this? Thanks! diff --git a/include/linux/marker.h b/include/linux/marker.h index 889196c..89ce1b8 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -164,6 +164,12 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe, * marker_synchronize_unregister must be called between the last marker probe * unregistration and the end of module exit to make sure there is no caller * executing a probe when it is freed. + * + * It must be called _also_ between unregistration and destruction the data + * that unregistration-ed probes need to make sure there is no caller executing + * a probe when it's data is destroyed. + * + * It works reliably only when all probe routines do not sleep and reschedule. */ #define marker_synchronize_unregister() synchronize_sched() -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm-userspace: fix module build with --kerneldir
Avi Kivity wrote: > Maik Hentsche wrote: >> Avi Kivity <[EMAIL PROTECTED]> wrote: >> >> >>> Can't the version be determined directly from kernedir itself? e.g. >>> kerneldir is /lib/modules/$version/build? >>> >> >> How do you get $version if you are crosscompiling? In this case the >> usual way of asking uname doesn't work. Instead you somehow need to get >> the version of the kernel you're building for from the kernel source >> provided in --kerneldir. I don't know any better way than evaluating >> UTSRELEASE. >> > > If the user specified $kerneldir, it should be in the form > /lib/modules/$version/build, no? This is not sufficiently generic. KVM should to accept arbitrary paths to kernel directories, not just those special links (which could cause problems anyway if the link is set up for the target and makes no sense on the build host). Jan -- Siemens AG, Corporate Technology, CT SE 2 ES-OS Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm-userspace: fix module build with --kerneldir
(Sorry, I accidentally sent this mail without CC before. Resend with correct CC list.) Avi Kivity <[EMAIL PROTECTED]> wrote: > If the user specified $kerneldir, it should be in the form > /lib/modules/$version/build, no? Ah sorry, I did not know of this requirement. My kerneldir is /tmp/linux (I build inside chroot). so long Maik -- \ AMD Saxony Limited Liability Company & Co. KG Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany System | Register Court Dresden: HRA 4896 Research | General Partner authorized to represent: Center | AMD Saxony LLC (Wilmington, Delaware, US) / General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy signature.asc Description: PGP signature
Re: [PATCH] Prevent trace call into unloaded module text
On Wed, Nov 26, 2008 at 07:46:19PM +0800, Wu Fengguang wrote: > On Wed, Nov 26, 2008 at 01:17:55PM +0200, Avi Kivity wrote: > > Wu Fengguang wrote: > > > Add marker_synchronize_unregister() before module unloading. > > > This prevents possible trace calls into unloaded module text. > > > > > > Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> > > > --- > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > > > index a87f45e..64f38b3 100644 > > > --- a/virt/kvm/kvm_main.c > > > +++ b/virt/kvm/kvm_main.c > > > @@ -2102,5 +2102,6 @@ void kvm_exit(void) > > > kvm_arch_exit(); > > > kvm_exit_debug(); > > > __free_page(bad_page); > > > + marker_synchronize_unregister(); > > > } > > > EXPORT_SYMBOL_GPL(kvm_exit); > > > > > > > What about kvm-intel.ko and kvm-amd.ko? They also contain markers. > > vmx_exit and svm_exit() all calls into kvm_exit(), so they have been > handled in a common way. > > > (and, why doesn't module unload do this automatically?) > > Maybe most modules don't need it for now? OK I got a better answer: because marker_synchronize_unregister() must be called after marker_probe_unregister() calls and the tear down of any private data the trace functions rely on. So there are no automatic way. Below is the updated patch. Thanks, Fengguang --- Prevent trace call into torn down text or data Add marker_synchronize_unregister() immediately after probe unregisters. This prevents possible trace calls into unloaded module text, or the trace functions accessing invalidated data. Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> --- diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c index 41dcc84..f598744 100644 --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -252,6 +252,7 @@ void kvm_trace_cleanup(void) struct kvm_trace_probe *p = &kvm_trace_probes[i]; marker_probe_unregister(p->name, p->probe_func, p); } + marker_synchronize_unregister(); relay_close(kt->rchan); debugfs_remove(kt->lost_file); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Patch 4/5] x86_emulator: add the assembler code for three operands
On Tue, 25 Nov 2008 16:59:00 +0200 Avi Kivity <[EMAIL PROTECTED]> wrote: > > > > +#define __emulate_2op_cl(_op,_src,_src2,_dst,_eflags,_wx,_wy) \ > > + do { > > \ > > + unsigned long _tmp; > > \ > > + > > \ > > + __asm__ __volatile__ ( \ > > + _PRE_EFLAGS("0", "5", "2") > > \ > > + "mov %4, %%rcx \n\t" > > \ > > + _op" %%cl,%3,%1; \n\t" > >\ > > + _POST_EFLAGS("0", "5", "2") > > \ > > + : "=m" (_eflags), "=m" ((_dst).val), > > \ > > + "=&r" (_tmp) > > \ > > + : _wy ((_src).val) , _wy ((_src2).val), "i" > > (EFLAGS_MASK) \ > > + : "%rcx" ); > > \ > > + } while (0) > > > > I tested the code and it seems to work. > > > > That's actually better and could be used for the other emulations. > Please disassemble x86_emulate.o and verify that there are three > different shld instructions, one for each register size. > I tried with the following code (it's nearly the same as above): + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "5", "2") \ + "mov %4, %%rcx \n\t" \ + _op" %3,%1; \n\t" \ + _POST_EFLAGS("0", "5", "2") \ + : "=m" (_eflags), "+r" ((_dst).val), \ + "=&r" (_tmp) \ + : _x ((_src).val) , _y ((_src2).val), "i" (EFLAGS_MASK) \ + : "%rcx" ); \ When I disassemble x86_emulate.o I can see the following produced code: 4787: ... 4788: 41 8f 44 24 08 popq 0x8(%r12) 478d: 48 89 d1mov%rdx,%rcx 4790: 48 0f a5 f0 shld %cl,%rsi,%rax 4794: 9c pushfq 4795: ... It's the only code with shld instruction. I don't see how you can have three different shld instructions here. I'm sure that I'm missing something here because for me, when we emulate the shld instruction, the code produced is the same. I mean that src.val and dst.val are always unsigned long and I don't see why register size are important here. In fact I don't understand why we need to use the switch ((_dst).bytes) for the other emulations. Guillaume -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Prevent trace call into unloaded module text
On Wed, Nov 26, 2008 at 01:17:55PM +0200, Avi Kivity wrote: > Wu Fengguang wrote: > > Add marker_synchronize_unregister() before module unloading. > > This prevents possible trace calls into unloaded module text. > > > > Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> > > --- > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > > index a87f45e..64f38b3 100644 > > --- a/virt/kvm/kvm_main.c > > +++ b/virt/kvm/kvm_main.c > > @@ -2102,5 +2102,6 @@ void kvm_exit(void) > > kvm_arch_exit(); > > kvm_exit_debug(); > > __free_page(bad_page); > > + marker_synchronize_unregister(); > > } > > EXPORT_SYMBOL_GPL(kvm_exit); > > > > What about kvm-intel.ko and kvm-amd.ko? They also contain markers. vmx_exit and svm_exit() all calls into kvm_exit(), so they have been handled in a common way. > (and, why doesn't module unload do this automatically?) Maybe most modules don't need it for now? Thanks, Fengguang -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm-userspace: Cleanup user space NMI injection
Avi Kivity wrote: > Jan Kiszka wrote: >> Cleanup redundant check for an open NMI window before injecting. This >> will no longer be supported by the kernel, and it was broken by design >> anyway. >> >> This change still allows to run the user space against older kernel >> modules. >> >> > > Hm. For interrupts we need to check, since the interrupt might be > deasserted or masked while the window is closed. > > Is there no way that this can happen for NMIs? In our emulation, but I also think in real life, there is no way to deassert an NMI. Therefore, our user space API just sets the nmi_pending flag. Masking of NMIs (due to mov ss etc.) is handled by the NMI injection code, and this has to happen in kernel anyway as the other NMI sources (APIC and IOAPIC) do not bother about the NMI windows as well. Jan -- Siemens AG, Corporate Technology, CT SE 2 ES-OS Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Add KVM version to monitor "info version"
Glauber Costa wrote: On Wed, Nov 26, 2008 at 8:55 AM, Daniel P. Berrange <[EMAIL PROTECTED]> wrote: On Wed, Nov 26, 2008 at 11:22:03AM +0100, Bj?rn Mork wrote: Trying to track the current KVM head as closely as possible, I often find myself wondering which KVM version a particular guest instance is running. The attached patch adds this information to the monitor command "info version": (qemu) info version 0.9.1 (kvm-79) I'd really rather we didn't touch the 'info version' command since it means we have different syntax from upstream QEMU. If we want KVM version info exposed, then I'd suggest 'info kvmversion' or some other new command. I believe it fits nicely into info kvm. Instead of enabled/disabled, we could answer disabled or kvm-whatever (qemu) info kvm enabled (kvm-123) or (qemu) info kvm enabled version kvm-123 May be easier to parse, and more backwards compatible. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm-userspace: Cleanup user space NMI injection
Jan Kiszka wrote: Cleanup redundant check for an open NMI window before injecting. This will no longer be supported by the kernel, and it was broken by design anyway. This change still allows to run the user space against older kernel modules. Hm. For interrupts we need to check, since the interrupt might be deasserted or masked while the window is closed. Is there no way that this can happen for NMIs? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Add KVM version to monitor "info version"
On Wed, Nov 26, 2008 at 8:55 AM, Daniel P. Berrange <[EMAIL PROTECTED]> wrote: > On Wed, Nov 26, 2008 at 11:22:03AM +0100, Bj?rn Mork wrote: >> Trying to track the current KVM head as closely as possible, I often >> find myself wondering which KVM version a particular guest instance is >> running. The attached patch adds this information to the monitor >> command "info version": >> >> (qemu) info version >> 0.9.1 (kvm-79) > > I'd really rather we didn't touch the 'info version' command since it > means we have different syntax from upstream QEMU. If we want KVM > version info exposed, then I'd suggest 'info kvmversion' or some other > new command. I believe it fits nicely into info kvm. Instead of enabled/disabled, we could answer disabled or kvm-whatever -- Glauber Costa. "Free as in Freedom" http://glommer.net "The less confident you are, the more serious you have to act." -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Prevent trace call into unloaded module text
Wu Fengguang wrote: Add marker_synchronize_unregister() before module unloading. This prevents possible trace calls into unloaded module text. Signed-off-by: Wu Fengguang <[EMAIL PROTECTED]> --- diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a87f45e..64f38b3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2102,5 +2102,6 @@ void kvm_exit(void) kvm_arch_exit(); kvm_exit_debug(); __free_page(bad_page); + marker_synchronize_unregister(); } EXPORT_SYMBOL_GPL(kvm_exit); What about kvm-intel.ko and kvm-amd.ko? They also contain markers. (and, why doesn't module unload do this automatically?) -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm-userspace: fix module build with --kerneldir
Maik Hentsche wrote: Avi Kivity <[EMAIL PROTECTED]> wrote: Can't the version be determined directly from kernedir itself? e.g. kerneldir is /lib/modules/$version/build? How do you get $version if you are crosscompiling? In this case the usual way of asking uname doesn't work. Instead you somehow need to get the version of the kernel you're building for from the kernel source provided in --kerneldir. I don't know any better way than evaluating UTSRELEASE. If the user specified $kerneldir, it should be in the form /lib/modules/$version/build, no? If so, then you can extract $version by removing the prefix and suffix from the string. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: MMU: optimize set_spte for page sync
Marcelo Tosatti wrote: Here it goes. KVM: MMU: optimize set_spte for page sync The write protect verification in set_spte is unnecessary for page sync. Its guaranteed that, if the unsync spte was writable, the target page does not have a write protected shadow (if it had, the spte would have been write protected under mmu_lock by rmap_write_protect before). Same reasoning applies to mark_page_dirty: the gfn has been marked as dirty via the pagefault path. The cost of hash table and memslot lookups are quite significant if the workload is pagetable write intensive resulting in increased mmu_lock contention. Applied, thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm-userspace: fix module build with --kerneldir
Avi Kivity <[EMAIL PROTECTED]> wrote: > Can't the version be determined directly from kernedir itself? e.g. > kerneldir is /lib/modules/$version/build? How do you get $version if you are crosscompiling? In this case the usual way of asking uname doesn't work. Instead you somehow need to get the version of the kernel you're building for from the kernel source provided in --kerneldir. I don't know any better way than evaluating UTSRELEASE. so long Maik -- \ AMD Saxony Limited Liability Company & Co. KG Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany System | Register Court Dresden: HRA 4896 Research | General Partner authorized to represent: Center | AMD Saxony LLC (Wilmington, Delaware, US) / General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy signature.asc Description: PGP signature
Re: [PATCH 2/2] KVM: Change to use new APIs for KVM VT-d
Han, Weidong wrote: This patch changes to use new APIs for KVM VT-d, and add device deassignment for hotplug. index 44fd7fa..558bc32 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -425,6 +425,8 @@ struct kvm_trace_rec { struct kvm_assigned_pci_dev) #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) +#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x71, \ +struct kvm_assigned_pci_dev) Need to advertise using KVM_CAP_DEVICE_DEASSIGNMENT. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Add KVM version to monitor "info version"
On Wed, Nov 26, 2008 at 11:22:03AM +0100, Bj?rn Mork wrote: > Trying to track the current KVM head as closely as possible, I often > find myself wondering which KVM version a particular guest instance is > running. The attached patch adds this information to the monitor > command "info version": > > (qemu) info version > 0.9.1 (kvm-79) I'd really rather we didn't touch the 'info version' command since it means we have different syntax from upstream QEMU. If we want KVM version info exposed, then I'd suggest 'info kvmversion' or some other new command. Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] VT-d: Support multiple device assignment for KVM
Han, Weidong wrote: In order to support multiple device assignment for KVM, this patch does following main changes: - extend dmar_domain to own multiple devices from different iommus, use a bitmap of iommus to replace iommu pointer in dmar_domain. - add a flag DOMAIN_FLAG_VIRTUAL_MACHINE to represent KVM VT-d usage. Many functions (e.g. intel_map_single() and intel_unmap_single()) won't be used by KVM VT-d. Let them return directly when this flag is set. This seems brittle. An API that has some functions shorted out depending on some flag is hard to understand and use. We should either implement the functions, or split the API into a basic version that talks only to one device, and an expanded versions that talks to multiple devices, and is implemented by the using the lower level API. This may require more changes due to the need to share io pagetables. - "SAGAW" capability may be different across iommus, that's to say the VT-d page table levels may be different among iommus. This patch uses a defaut agaw, and skip top levels of page tables for iommus which have smaller agaw than default. Neat trick. void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; @@ -960,7 +1054,14 @@ void free_dmar_iommu(struct intel_iommu *iommu) for (; i < cap_ndoms(iommu->cap); ) { domain = iommu->domains[i]; clear_bit(i, iommu->domain_ids); - domain_exit(domain); + + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { + /* domain may be referenced by other iommus */ + if (domain_in_other_iommus(domain, iommu) == 0) + domain_exit(domain); + } + else + domain_exit(domain); Things like this are best expressed using reference counts, which removes the need for the test as well. + + /* Skip top levels of page tables for +* iommu which has less agaw than default. +*/ + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(*pgd)); + if (!dma_pte_present(*pgd)) { + spin_unlock_irqrestore(&iommu->lock, flags); + return -ENOMEM; + } + } + } Need to check that the agaw is sufficient for mapped memory (and when adding a device or mapping more memory, need a similar check). -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm-userspace: fix module build with --kerneldir
Joerg Roedel wrote: From: Maik Hentsche <[EMAIL PROTECTED]> When kvm-userspace is build with a different kernel version than the running kernel the depmod at the end will fail. This patch fixed the problem. Signed-off-by: Maik Hentsche <[EMAIL PROTECTED]> @@ -0,0 +1,9 @@ +#include +#include + +int main() +{ +printf("%s",UTS_RELEASE); +return(0); +} + Can't the version be determined directly from kernedir itself? e.g. kerneldir is /lib/modules/$version/build? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Add KVM version to monitor "info version"
Trying to track the current KVM head as closely as possible, I often find myself wondering which KVM version a particular guest instance is running. The attached patch adds this information to the monitor command "info version": (qemu) info version 0.9.1 (kvm-79) Signed-off-by: Bjørn Mork <[EMAIL PROTECTED]> --- qemu/monitor.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/qemu/monitor.c b/qemu/monitor.c index a58a18e..f61fe56 100644 --- a/qemu/monitor.c +++ b/qemu/monitor.c @@ -246,7 +246,7 @@ static void do_info(const char *item) static void do_info_version(void) { - term_printf("%s\n", QEMU_VERSION); + term_printf("%s\n", QEMU_VERSION " (" KVM_VERSION ")"); } static void do_info_name(void) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/9][v6] Enable MSI for KVM
Sheng Yang wrote: Hi MSI patchset v6 is coming... v5->v6 Addressed all comments from Avi on v5. I also set msi2intx=0 in non-x86 architecture machines. Applied all, thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFT] Rebased gdb/debug register patches
Hollis Blanchard wrote: > On Mon, 2008-11-24 at 16:27 +0100, Jan Kiszka wrote: >> Hi, >> >> this is not yet the official submission, but a request for testing: >> >> I'm happy to announce the availability of a rebased patch series to >> enhance KVM's guest debugging support as well as to add debug register >> emulation. It was rebased because QEMU mainline recently accepted the >> core of my corresponding bits and KVM has merged them over. A few >> patches are still awaiting QEMU merge, and two of them are mandatory to >> provide a clean foundation for the KVM changes - therefore this >> intermediate step. >> >> To test the series, checkout the kernel bits from >> >> git://git.kiszka.org/linux-kvm.git gdb-queue >> >> and the user space part from >> >> git://git.kiszka.org/kvm-userspace.git gdb-queue >> >> Early feedback welcome, also before the final submission. And if someone >> could look into AMD/SVM implementation, this would also be great >> (unfortunately, there is no customer need for it ATM, thus no resources). > > Would you mind posting the patches here, to make feedback easier for the > reviewers? I was waiting for the first two of them getting merged into qemu, what has happened yesterday, plus there was one open oddity that is understood and fixed now as well. Will post them later today. Jan -- Siemens AG, Corporate Technology, CT SE 2 ES-OS Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] PCI: allow pci driver to support only dynids
Hi Chris, On Tue, 25 Nov 2008 19:36:10 -0800, Chris Wright wrote: > commit b41d6cf38e27 (PCI: Check dynids driver_data value for validity) > requires all drivers to include an id table to try and match > driver_data. Before validating driver_data check driver has an id > table. Sorry for missing this case. > Cc: Jean Delvare <[EMAIL PROTECTED]> > Cc: Milton Miller <[EMAIL PROTECTED]> > Signed-off-by: Chris Wright <[EMAIL PROTECTED]> > --- > drivers/pci/pci-driver.c | 20 +++- > 1 files changed, 11 insertions(+), 9 deletions(-) > > diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c > index b4cdd69..0a5edbe 100644 > --- a/drivers/pci/pci-driver.c > +++ b/drivers/pci/pci-driver.c > @@ -48,7 +48,7 @@ store_new_id(struct device_driver *driver, const char *buf, > size_t count) > subdevice=PCI_ANY_ID, class=0, class_mask=0; > unsigned long driver_data=0; > int fields=0; > - int retval; > + int retval=0; > > fields = sscanf(buf, "%x %x %x %x %x %x %lx", > &vendor, &device, &subvendor, &subdevice, > @@ -58,16 +58,18 @@ store_new_id(struct device_driver *driver, const char > *buf, size_t count) > > /* Only accept driver_data values that match an existing id_table > entry */ > - retval = -EINVAL; > - while (ids->vendor || ids->subvendor || ids->class_mask) { > - if (driver_data == ids->driver_data) { > - retval = 0; > - break; > + if (ids) { > + retval = -EINVAL; > + while (ids->vendor || ids->subvendor || ids->class_mask) { > + if (driver_data == ids->driver_data) { > + retval = 0; > + break; > + } > + ids++; > } > - ids++; > + if (retval) /* No match */ > + return retval; > } > - if (retval) /* No match */ > - return retval; > > dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); > if (!dynid) Do we really want to let the user add a new id to a driver which didn't have any? Wouldn't it be safer to simply not create the new_id sysfs file if driver->id_table is NULL? That's a one-line change: Signed-off-by: Jean Delvare <[EMAIL PROTECTED]> --- drivers/pci/pci-driver.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- linux-2.6.28-rc6.orig/drivers/pci/pci-driver.c 2008-10-24 09:28:14.0 +0200 +++ linux-2.6.28-rc6/drivers/pci/pci-driver.c 2008-11-26 09:23:46.0 +0100 @@ -113,7 +113,7 @@ static int pci_create_newid_file(struct pci_driver *drv) { int error = 0; - if (drv->probe != NULL) + if (drv->probe != NULL && drv->id_table != NULL) error = driver_create_file(&drv->driver, &driver_attr_new_id); return error; } As a side note, I am curious what PCI driver we do have which has driver->probe defined but no driver->id_table. Did you hit an actual issue or are you fixing a theoretical one? Thanks, -- Jean Delvare -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html