Re: [kvm-devel] [PATCH][EXTBOOT] Fix read drive parameters to solve Grub Error 18

2008-03-04 Thread Avi Kivity
Anthony Liguori wrote:
> In certain circumstances, the calculated CHS can result in a total number of
> sectors that is less than the actual number of sectors.  I'm not entirely
> sure why this upsets grub, but it seems to be the source of the Grub Error 18
> that sometimes occurs when using extboot.
>
> The solution is to implement the read drive parameters function and return the
> actual numbers of sectors.  This requires changing the QEMU <=> extboot
> interface as this was not previously passed to extboot.
>
>   

Applied, thanks.  Please separate qemu and extboot patches in the future.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 1/6] KVM: In kernel pit model

2008-03-04 Thread Yang, Sheng
On Wednesday 05 March 2008 14:54:06 Avi Kivity wrote:
> Yang, Sheng wrote:
> > +
> > +static int pit_get_out(struct kvm *kvm, int channel)
> > +{
> > +   struct kvm_kpit_channel_state *c =
> > +   &kvm->arch.vpit->pit_state.channels[channel];
> > +   s64 d, t;
> > +   int out;
> > +
> > +   ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
> > +
> > +   t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
> > +   d = muldiv64(t, PIT_FREQ, 1e9);
>
> NSECS_PER_SEC to avoid people jumping on you saying you can't use
> floating point in the kernel (yes, the compiler converts it at
> compile-time, but they'll still say it).

Sorry... I remembered I've modified it, seems something got wrong...

Here is the updated patch:

---
 arch/x86/kvm/Makefile  |3 +-
 arch/x86/kvm/i8254.c   |  583 

 arch/x86/kvm/i8254.h   |   59 +
 arch/x86/kvm/irq.c |3 +
 arch/x86/kvm/x86.c |9 +
 include/asm-x86/kvm_host.h |1 +
 include/linux/kvm.h|2 +
 7 files changed, 659 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/i8254.c
 create mode 100644 arch/x86/kvm/i8254.h

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b3..4d0c22e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o 
ioapic.o)

 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm

-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+   i8254.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 000..84fb3d9
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,583 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a 
copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the 
rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ *   Sheng Yang <[EMAIL PROTECTED]>
+ *   Based on QEMU and Xen.
+ */
+
+#include 
+
+#include "irq.h"
+#include "i8254.h"
+
+#if 1
+#define pit_debug(fmt, arg...) printk(KERN_WARNING fmt, ##arg)
+#else
+#define pit_debug(fmt, arg...)
+#endif
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+   union {
+   u64 ll;
+   struct {
+   u32 low, high;
+   } l;
+   } u, res;
+   u64 rl, rh;
+
+   u.ll = a;
+   rl = (u64)u.l.low * (u64)b;
+   rh = (u64)u.l.high * (u64)b;
+   rh += (rl >> 32);
+   res.l.high = div64_64(rh, c);
+   res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0x)), c);
+   return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+   struct kvm_kpit_channel_state *c =
+   &kvm->arch.vpit->pit_state.channels[channel];
+
+   ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+   switch (c->mode) {
+   default:
+   case 0:
+   case 4:
+   /* XXX: just disable/enable counting */
+   break;
+   case 1:
+   case 2:
+   case 3:
+   case 5:
+   /* Restart counting on rising edge. */
+   if (c->gate < val)
+   c->count_load_time = ktime_get();
+   break;
+   }
+
+   c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+   ASSERT(mutex_is_locked(&kvm->arch.vpi

Re: [kvm-devel] KVM architecture docs

2008-03-04 Thread Avi Kivity
Zhao Forrest wrote:
>>> Normally swapping mechanism choose the Least Recently Used(LRU) pages
>>> of a process to be swapped out. When KVM uses MMU notifier in linux
>>> kernel to implement swapping for VM, could KVM choose LRU pages of a
>>> VM to swap out? If so, could you give a brief description about how
>>> this is implemented?
>>>
>>>   
>> The Linux memory manager approximates LRU by scanning pages for the
>> accessed bit, which is set in the pte by the processor when a page is
>> accessed through that pte. mmu notifiers provide a callback for the
>> check, so that kvm can check the accessed bit on the shadow ptes.
>> 
>
> Linux kernel maintains a reverse mapping from a page frame to all page tables
> pointing to this page frame. Does KVM need to maintain a similar reverse 
> mapping
> from a page frame to all shadow page tables pointing to this page frame?
>   

Yes, look for 'rmap' in mmu.c.  The purpose was initially to be able to 
write-protect shadowed guest page tables without horrible worst-case 
performance, and was later extended to swapping.

With mmu notifiers, when the kernel swaps a page, it first scans its own 
rmap, then calls kvm which scans the kvm rmap.  So one way to look at 
mmu notifiers is as rmap extenders (that's not the whole story -- kvm 
ptes are in a different format than Linux ptes, so the code has to be 
different).

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 1/6] KVM: In kernel pit model

2008-03-04 Thread Avi Kivity
Yang, Sheng wrote:
> +
> +static int pit_get_out(struct kvm *kvm, int channel)
> +{
> + struct kvm_kpit_channel_state *c =
> + &kvm->arch.vpit->pit_state.channels[channel];
> + s64 d, t;
> + int out;
> +
> + ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
> +
> + t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
> + d = muldiv64(t, PIT_FREQ, 1e9);
>   

NSECS_PER_SEC to avoid people jumping on you saying you can't use 
floating point in the kernel (yes, the compiler converts it at 
compile-time, but they'll still say it).

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] loop in copy_user_generic_string

2008-03-04 Thread Avi Kivity
Andi Kleen wrote:
> Avi Kivity <[EMAIL PROTECTED]> writes:
>   
>> Most likely movs emulation is broken for long counts.  Please post a 
>> disassembly of copy_user_generic_string to make sure we're looking at 
>> the same code.
>> 
>
> Be careful -- this code is patched at runtime and what you 
> see in the vmlinux is not necessarily the same that is executed
>
>   

If the disassembled instruction isn't marked as an alternative in the 
source, then it can't be patched, right?


> Incidentially that might cause problems.

Specific to kvm?  how?

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] KVM architecture docs

2008-03-04 Thread Zhao Forrest
> >
> > Normally swapping mechanism choose the Least Recently Used(LRU) pages
> > of a process to be swapped out. When KVM uses MMU notifier in linux
> > kernel to implement swapping for VM, could KVM choose LRU pages of a
> > VM to swap out? If so, could you give a brief description about how
> > this is implemented?
> >
>
> The Linux memory manager approximates LRU by scanning pages for the
> accessed bit, which is set in the pte by the processor when a page is
> accessed through that pte. mmu notifiers provide a callback for the
> check, so that kvm can check the accessed bit on the shadow ptes.

Linux kernel maintains a reverse mapping from a page frame to all page tables
pointing to this page frame. Does KVM need to maintain a similar reverse mapping
from a page frame to all shadow page tables pointing to this page frame?
I should have read the code to find the answer. But it's appreciated
if you could give
a quick answer :)

Thanks,
Forrest

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 23/23] QEMU/KVM: device hot-remove

2008-03-04 Thread Avi Kivity
Marcelo Tosatti wrote:
> Add monitor command to hot-remove devices.
>
> Remove device data on _EJ0 notification.
>
> Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
>
> Index: kvm-userspace.hotplug/qemu/monitor.c
> ===
> --- kvm-userspace.hotplug.orig/qemu/monitor.c
> +++ kvm-userspace.hotplug/qemu/monitor.c
> @@ -1355,6 +1355,7 @@ static term_cmd_t term_cmds[] = {
>"value", "set maximum speed (in bytes) for migrations" },
>  { "cpu_set", "is", do_cpu_set_nr, "cpu [online|offline]", "change cpu 
> state" },
>  { "pci_add", "ss", device_hot_add, "nic|drive 
> [vlan=n][,macaddr=addr][,model=type] 
> [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]", "hotadd PCI 
> device" },
> +{ "pci_remove", "i", device_hot_remove, "slot number", "hot remove PCI 
> device" },
>  { NULL, NULL, },
>   

Should be pci_del for consistency with usb_del.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Avi Kivity
Anthony Liguori wrote:
> Glauber Costa wrote:
>   
>> Anthony Liguori wrote:
>>
>> No, it can't. Because at the time qemu starts, no vcpu -> thread id 
>> relationship exists at all. And we don't know when it will.
>> 
>
> Sure we do.  The vcpu -> thread id relationship is valid after 
> kvm_init_ap() is called which is after machine init but before the 
> select loop is entered for the first time.  Therefore, if you start qemu 
> with -S, then connect on the monitor, and do an info cpus, you could be 
> guaranteed to be told the mapping.
>
> The threads are *idle* at this point so there's no harm if they were 
> started on the "wrong" CPU.  You can now taskset to your hearts content 
> and then when you're happy with placement, you can issue a 'cont' so 
> that the VM actually starts running.  I saw "wrong" because you can 
> still taskset the initial creation guaranteeing that the threads are 
> created on the right group of physical CPUs, you just can't specify the 
> exact mapping until you start interacting with the monitor.
>
>   

Good points.  Initially I thought we ought to abstract the 
implementation and not expose the vcpu thread id, but I'm beginning to 
thing that due the wide variety of options (affinity, page migration, 
priority, cpu control groups) and the relative obscurity of the feature 
(which as you point out, isn't needed in the common case), we can export 
the thread id and let the management tools deal with it directly.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] KVM architecture docs

2008-03-04 Thread Avi Kivity
Zhao Forrest wrote:
> when NPT is used by KVM in the future, this mmu
>   

btw, NPT support is already integrated.


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 23/23] QEMU/KVM: device hot-remove

2008-03-04 Thread Avi Kivity
Daniel P. Berrange wrote:
>> (qemu) info block
>> ide0-hd0: type=hd removable=0 file=/root/images/marcelo5.img ro=0 drv=raw
>> ide1-cd0: type=cdrom removable=1 locked=0 [not inserted]
>> floppy0: type=floppy removable=1 locked=0 [not inserted]
>> sd0: type=floppy removable=1 locked=0 [not inserted]
>> scsi0-hd0: type=hd removable=0 file=/tmp/bigfile ro=0 drv=raw
>> scsi0-hd1: type=hd removable=0 file=/tmp/bigfile.2 ro=0 drv=raw
>>
>> (qemu) info network
>> VLAN 0 devices:
>>   tap: ifname=tap0 setup_script=qemu-ifup-tap0
>>   rtl8139 pci macaddr=52:54:00:12:34:56
>> 
>
> This is utterly horrible for a human to parse & use if they're using the
> QEMU monitor, let alone something that libvirt could parse. In fact this
> doesn't let you map between the network device & pci device if there is
> more than one device added because 'info pci' doesn't show the MAC address
> info, and 'info network' does not show any PCI device number info - the
> same for disks.
>
>   

We need a machine friendly protocol for libvirt and other management 
tools.  Versioned commands (with some backward compatibility), command 
discovery, and command/response tagging so you can associate an async 
reply to the command that triggered it, and quoting so that strings with 
spaces and other special chars are properly supported.  But how the 
information is presented is orthogonal to what information is presented.

btw, the qemu command line parses something fairly similar, I don't see 
why libvirt should have problems with it.  It wouldn't be fun to code, 
but is doable.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 00/23] [RFC] QEMU/KVM ACPI PCI hotplug

2008-03-04 Thread Avi Kivity
Marcelo Tosatti wrote:
> The following patchset adds ACPI PCI hotplug support for QEMU.
>
> It extends the number of slots with IRQ routing information from 6 to 32.
>
> The only PCI driver which the unregister method has been added is LSI SCSI, 
> would
> like more comments to implement that for the remaining drivers.
>
>   

Very nice patchset, looks minimally intrusive for such complex 
functionality.

Please post the next iteration on qemu-devel to see if they have any 
objections.  Since this is a large patchset, I don't want to keep it 
churning for too long, so if you prefer, you can rip out drive hotplug 
and add it back later (see my comments to patch 14).

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] KVM architecture docs

2008-03-04 Thread Zhao Forrest
> > Normally swapping mechanism choose the Least Recently Used(LRU) pages
> > of a process to be swapped out. When KVM uses MMU notifier in linux
> > kernel to implement swapping for VM, could KVM choose LRU pages of a
> > VM to swap out? If so, could you give a brief description about how
> > this is implemented?
> >
>
> The Linux memory manager approximates LRU by scanning pages for the
> accessed bit, which is set in the pte by the processor when a page is
> accessed through that pte. mmu notifiers provide a callback for the
> check, so that kvm can check the accessed bit on the shadow ptes.

If I understand correctly, when NPT is used by KVM in the future, this mmu
notifier can't help much for swapping out pages used by VM, right?
That is, when NPT is used, a balloon para-virt driver running on gust
OS might be more efficient for swapping, am I right?

Thanks,
Forrest

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] KVM architecture docs

2008-03-04 Thread Avi Kivity
Zhao Forrest wrote:
>>> Normally swapping mechanism choose the Least Recently Used(LRU) pages
>>> of a process to be swapped out. When KVM uses MMU notifier in linux
>>> kernel to implement swapping for VM, could KVM choose LRU pages of a
>>> VM to swap out? If so, could you give a brief description about how
>>> this is implemented?
>>>
>>>   
>> The Linux memory manager approximates LRU by scanning pages for the
>> accessed bit, which is set in the pte by the processor when a page is
>> accessed through that pte. mmu notifiers provide a callback for the
>> check, so that kvm can check the accessed bit on the shadow ptes.
>> 
>
> If I understand correctly, when NPT is used by KVM in the future, this mmu
> notifier can't help much for swapping out pages used by VM, right?
>   

No, NPT does not change things materially.  Shadow page tables are still 
used, though instead of mapping guest virtual addresses to host physical 
addresses, they now translate guest physical addresses to host physical 
addresses.  Swapping and all the other goodies still work.

> That is, when NPT is used, a balloon para-virt driver running on gust
> OS might be more efficient for swapping, am I right?
>   

Ballooning is more efficient than swapping both with and without NPT.  
The problem with ballooning is that it requires guest cooperation.  The 
guest may not be able to balloon, or it may take a long time to balloon, 
while the host may need the memory immediately.  A rebooting guest also 
implicitly deflates its balloon, creating a large and unpredictable 
memory demand on the host.

A good solution needs to use ballooning with swapping as a fallback for 
guaranteeing that the system does not run out of memory.

A nice feature in 2.6.25 is the ability to select which guests will 
swap, via the memory controller feature (mlock() also works, but is 
relatively crude).

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 14/23] QEMU/KVM: device hot-add

2008-03-04 Thread Avi Kivity
Marcelo Tosatti wrote:
> Add monitor command to hot-add PCI devices (nic and drive).
>
>   

A drive is not a pci device.  One would hot-plug a scsi controller, and 
then hot-plug a device to that controller.


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 23/23] QEMU/KVM: device hot-remove

2008-03-04 Thread Avi Kivity
Anthony Liguori wrote:
> Daniel P. Berrange wrote:
>> Removing based on pci device number is very un-pleasant, since its 
>> not something
>> the user of the monitor cares about. Nor do they even know what the 
>> PCI device number
>> assigned by 'pci_add' is.
>>
>> As with addition, I'd like separate commands for NIC vs Drive, and 
>> for the removal
>> key to be based upon the same data used for addition. eg so one can 
>> remove the
>> NIC based on its MAC address, or remove the drive based on the 
>> (if,bus,unit,filename)
>> data items.
>>
>>nic_remove [vlan=n][,macaddr=addr][,model=type]
>>drive_remove 
>> [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]
>>
>> Though, perhaps still allow removal based on the PCI device ID as an 
>> alternative
>> for those who happen to have that data available.
>>   
>
> pci_remove is consistent with usb_del and things like stopcapture.  
> The thing to add would be an "info pci" that let a user associate the 
> slot number with higher level information about the device.
>

pci_add should return the slot information, which can later be used as 
an identifier for pci_remove. It would also be nice to be able to 
specify the slot in pci_add, though I hardly have a compelling use case 
for that.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 01/23] QEMU/KVM: add PCI IRQ routing information up to slot 32

2008-03-04 Thread Avi Kivity
Marcelo Tosatti wrote:
> Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
>
> Index: kvm-userspace.hotplug/bios/acpi-dsdt.dsl
> ===
> --- kvm-userspace.hotplug.orig/bios/acpi-dsdt.dsl
> +++ kvm-userspace.hotplug/bios/acpi-dsdt.dsl
> @@ -249,6 +249,162 @@ DefinitionBlock (
>  Package() {0x0005, 1, LNKB, 0},
>  Package() {0x0005, 2, LNKC, 0},
>  Package() {0x0005, 3, LNKD, 0},
> +
> +// PCI Slot 6
> +Package() {0x0006, 0, LNKB, 0},
> +Package() {0x0006, 1, LNKC, 0},
> +Package() {0x0006, 2, LNKD, 0},
> +Package() {0x0006, 3, LNKA, 0},
>   

This is already in kvm-userspace.git.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Avi Kivity
Anthony Liguori wrote:
> Glauber Costa wrote:
>> My main interest is in management tools being able to specify pinning
>> set ups at VM creation time.
>>
>> As I said, it can be done through tools like taskset, but then you'd 
>> have to know:
>>  * when are the threads created
>>  * which thread ids corresponds to each cpu
>>
>> And of course, for an amount of time, the threads will be running in 
>> a "wrong" cpu, which may affect workloads running there. (which is a 
>> case cpu pinning usually tries to address)
>
> A management tool can start QEMU with -S to prevent any CPUs from 
> running, query the VCPU=>thread id relationship (modifying info cpus 
> would be a good thing to do for this), taskset, and then run 'cont' in 
> the monitor if they desperately need this functionality.  However, I 
> don't think the vast majority of people need this particular 
> functionality.
>


Affinity control is probably useful mostly for numa configurations, 
where you want to restrict virtual cpus to run on the cores closest to 
memory.  However it may well be that the scheduler is already good 
enough to do this on its own.


> My feeling is that adding an interface to do this in QEMU encourages 
> people to not use the existing Linux tools for this or worse yet, to 
> think they can do a better job than Linux.  The whole reason this 
> exists in Xen is that Xen's schedulers were incapable of doing CPU 
> migration historically (which is no longer true since the credit 
> scheduler).  It was necessary to specify pinning upon creation or you 
> were stuck with round-robin placement.  So libvirt has APIs for this 
> because they were part of the Xen API because it was needed to get 
> reasonable performance at some point in time on Xen.  I don't think 
> this behavior is useful for KVM though.  Just because Xen does it 
> doesn't imply that we should do it.
>

In the brutal world of hypervisors, if your competitor has a feature, 
you must have it too.  I often get asked about cpu pinning in kvm.

[I'd like to see how Xen implements swapping, though]

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Avi Kivity
Glauber Costa wrote:
> Hi guys,
>
> Here's a first series of patch aiming at vcpu pinning support in qemu.
> Ideally, as vcpu as just normal threads, the usual userspace tools can be used
> to set cpu affinities mask.
>
> However, It makes it very difficult to _start_ a vm with vcpus pinned, since
> we don't know the thread ids from qemu in advance, nor do we know when are the
> vcpus created.
>
> The patches introduce a -cpu-map option, that, if specified, starts the 
> virtual cpus
> with the specified affinities.
>
> Comments? Welcome. Random rants? Not welcome, but... how can I stop you? So 
> go ahead!
>
>   

A monitor interface would be more useful than a command line option, as 
it allows you to migrate the vcpus at runtime, and also control 
hotplugged cpus.  For unmanaged use, taskset is probably sufficient to 
control affinity from the command line.

Normally I encourage splitting patches, but this is a bit extreme.  1 
and 3 are pointless without each other, 4 and 5, 7 and 8.  Hope that 
doesn't interfere with any pay-per-patch contract.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] KVM architecture docs

2008-03-04 Thread Avi Kivity
Zhao Forrest wrote:
>> - swapping allows you to overcommit memory
>> 
>
> Normally swapping mechanism choose the Least Recently Used(LRU) pages
> of a process to be swapped out. When KVM uses MMU notifier in linux
> kernel to implement swapping for VM, could KVM choose LRU pages of a
> VM to swap out? If so, could you give a brief description about how
> this is implemented?
>   

The Linux memory manager approximates LRU by scanning pages for the 
accessed bit, which is set in the pte by the processor when a page is 
accessed through that pte.  mmu notifiers provide a callback for the 
check, so that kvm can check the accessed bit on the shadow ptes.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/6] In kernel PIT patch

2008-03-04 Thread Yang, Sheng
On Wednesday 05 March 2008 12:25:07 Anthony Liguori wrote:
> Yang, Sheng wrote:
> > On Wednesday 05 March 2008 08:50:24 Anthony Liguori wrote:
> >> So how do we measure the benefits of an in-kernel PIT?
> >
> > On the time accuracy side, one typical example is in RHEL5 32E guest,
> > time flows very slow compared to the host
> > (https://sourceforge.net/tracker/?func=detail&atid=893831&aid=1826080&gro
> >up_id=180599). You can simple using "sleep" to test it. And many people
> > complained it before, e,g,
> > http://www.mail-archive.com/kvm-devel@lists.sourceforge.net/msg10928.html
> > And I have to say the timer problem in current KVM is very serious, and
> > this patch can solve this.
>
> Okay, then my question is, how much does this patch set improve the
> situation?
>
> For instance, the bug report shows some circumstances where:
>
> On IA32e RHEL4 guest with
> Realtime 3min
> Guest3min15s

Um... I see the problem. I haven't test IA32e RHEL4 before(tested Windows XP, 
RHEL5 PAE/IA32e, RHEL5.1 pae with default kernel parameter), and seems it got 
same problem with pae RHEL4 (I almost forgot that problem, thanks for 
reminder :) ). I have to tested it with "clock=pit", and it get exactly 3min 
for 3min in real time. But without it, the timer run much faster...

You see, this patch can only guarantee PIT interrupts was injected 
correctly... I think the problem on RHEL4 expose another timer bug, like the 
pae smp RHEL5 before. I would do some investigate. 

> So what is the guest time with an in-kernel PIT?  How is this affected
> by the various possible -clock options?  What I'm looking for is an
> example of how much we're improving the situation and some assurance
> that this is the only way to solve the problem.
>
> I'm not fundamentally opposed to an in-kernel PIT, I just am trying to
> understand the justification.

For the irq chip is in kernel, and userspace pit can't touch it, I think in 
kernel PIT is proper one to solve the problem - clear, and light weight for 
this kind of very frequent calling. 

>
> Regards,
>
> Anthony Liguori
>
> > I think you are most worrying about the regressions. That's why I spent a
> > lot of time to solve TSC problem (PAE SMP RHEL5.1 can't boot up). For in
> > kernel PIT accelerate the process, the same bug was exposed on PAE SMP
> > RHEL5 with the patch. Though I don't think it's a real regression, I have
> > got it done to prevent this patch bring any bad effect.
> >
> > I would do more test to ensure this patch won't break something.



-- 
Thanks
Yang, Sheng

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] Notifier for Externally Mapped Memory (EMM)

2008-03-04 Thread Avi Kivity
Peter Zijlstra wrote:
> On Tue, 2008-03-04 at 14:35 -0800, Christoph Lameter wrote:
>
>   
>> RCU means that the callbacks occur in an atomic context.
>> 
>
> Not really, if it requires moving the VM locks to sleepable locks under
> a .config option, I think its also fair to require PREEMPT_RCU.
>
> OTOH, if you want to unconditionally move the VM locks to sleepable
> locks you have a point.
>   

Isn't that out of the question for .25?

I really wish we can get the atomic variant in now, and add on 
sleepability in .26, updating users if necessary.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/6] In kernel PIT patch

2008-03-04 Thread Anthony Liguori
Yang, Sheng wrote:
> On Wednesday 05 March 2008 08:50:24 Anthony Liguori wrote:
>   
>> So how do we measure the benefits of an in-kernel PIT?
>> 
>
> On the time accuracy side, one typical example is in RHEL5 32E guest, time 
> flows very slow compared to the host 
> (https://sourceforge.net/tracker/?func=detail&atid=893831&aid=1826080&group_id=180599).
>  
> You can simple using "sleep" to test it. And many people complained it 
> before, e,g, 
> http://www.mail-archive.com/kvm-devel@lists.sourceforge.net/msg10928.html
> And I have to say the timer problem in current KVM is very serious, and this 
> patch can solve this.
>   

Okay, then my question is, how much does this patch set improve the 
situation?

For instance, the bug report shows some circumstances where:

On IA32e RHEL4 guest with 
Realtime 3min
Guest3min15s


So what is the guest time with an in-kernel PIT?  How is this affected 
by the various possible -clock options?  What I'm looking for is an 
example of how much we're improving the situation and some assurance 
that this is the only way to solve the problem.

I'm not fundamentally opposed to an in-kernel PIT, I just am trying to 
understand the justification.

Regards,

Anthony Liguori

> I think you are most worrying about the regressions. That's why I spent a lot 
> of time to solve TSC problem (PAE SMP RHEL5.1 can't boot up). For in kernel 
> PIT accelerate the process, the same bug was exposed on PAE SMP RHEL5 with 
> the patch. Though I don't think it's a real regression, I have got it done to 
> prevent this patch bring any bad effect. 
>
> I would do more test to ensure this patch won't break something. 
>   


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] KVM architecture docs

2008-03-04 Thread Zhao Forrest
> >
> > could you (or anybody) elaborate on that? the mmu-related threads show
> > lots of progress, but it's way (way) out of my league.
> >
> > AFAICT, it's about the infrastructure to later write drivers (virtio?)
> > to DMA-heavy hardware (IB, RDMA, etc). am i wrong? or is it
> > something more complete (like a ready to use driver)?
> >
> >
>
> mmu notifiers provide a way for the core Linux memory management code to
> propagate changes in how Linux views a process' memory map to external
> memory management units that are also interested in that memory map.
> These changes include things like swapping, page migration, changes to
> memory protection, defragmentation, and copy-on-write. In this context,
> kvm appears as a dma capable memory controller, like RDMA NICs or GPUs.
>
> For kvm, this is important as it allows all those features to be used
> transparently with guests.
>
> - swapping allows you to overcommit memory

Normally swapping mechanism choose the Least Recently Used(LRU) pages
of a process to be swapped out. When KVM uses MMU notifier in linux
kernel to implement swapping for VM, could KVM choose LRU pages of a
VM to swap out? If so, could you give a brief description about how
this is implemented?

> - page migration allows optimization of memory placement within the host
> in response to changing workloads
> - defragmentation will allow (if/when it is merged into Linux) more
> widespread use of large pages, which improve performance
> - copy-on-write allows sharing identical pages of memory among guests,
> increasing guest density

Thanks,
Forrest

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Anthony Liguori
Glauber Costa wrote:
> Anthony Liguori wrote:
>
> No, it can't. Because at the time qemu starts, no vcpu -> thread id 
> relationship exists at all. And we don't know when it will.

Sure we do.  The vcpu -> thread id relationship is valid after 
kvm_init_ap() is called which is after machine init but before the 
select loop is entered for the first time.  Therefore, if you start qemu 
with -S, then connect on the monitor, and do an info cpus, you could be 
guaranteed to be told the mapping.

The threads are *idle* at this point so there's no harm if they were 
started on the "wrong" CPU.  You can now taskset to your hearts content 
and then when you're happy with placement, you can issue a 'cont' so 
that the VM actually starts running.  I saw "wrong" because you can 
still taskset the initial creation guaranteeing that the threads are 
created on the right group of physical CPUs, you just can't specify the 
exact mapping until you start interacting with the monitor.

Regards,

Anthony Liguori

>> Regards,
>>
>> Anthony Liguori
>>
>>>
>>>
>>
>


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/6] In kernel PIT patch

2008-03-04 Thread Yang, Sheng
On Wednesday 05 March 2008 08:50:24 Anthony Liguori wrote:
> Dor Laor wrote:
> > On Tue, 2008-03-04 at 09:52 -0600, Anthony Liguori wrote:
> >> Yang, Sheng wrote:
> >>> Hi
> >>>
> >>> Here is the last in-kernel PIT patch for KVM. The mainly change from
> >>> last version is the supporting to save/restore. I also tested live
> >>> migration.
> >>>
> >>> The other modifies including some date structure changed to be better
> >>> for supporting the save/restore. I moved the PIT timer to outside of
> >>> channel structure, which explicitly means only one channel (channel 0)
> >>> would trigger it.
> >>>
> >>> After fix TSC problem on SMP PAE RHEL5/5.1 guest, now the patch works
> >>> well without any modify of kernel parameter.
> >>
> >> How are you measuring the improvements from an in-kernel PIT?  From your
> >> mails, you're claiming it increases the timer accuracy.  How are you
> >> measuring it and how much does it improve it?
> >
> > It's also a functionality addition: userspace pit & pic combination
> > needed to use -tdf option (time drift fix). The tdf took care of pending
> > pit irqs and tried to make the guest ack the right number of irqs the
> > pit was configured.
>
> I thought there was some discussion about whether -tdf was every useful
> in practice?
>
> > Once we switched to the default in-kernel pic, the userspace pit
> > couldn't get the acks from the pit.
> > One can see the effect when running multiple guests (windows, standard
> > HAL) playing video, the time slows down.
>
> Okay, that makes sense.  So have you done any tests to confirm this?  We
> suffered through a fair number of regressions when we moved to an
> in-kernel APIC.  Before moving another big chunk of code in the kernel
> and going through possible regressions, I want to make sure we have a
> measurable argument that it's the right thing to do.
>
> So how do we measure the benefits of an in-kernel PIT?

On the time accuracy side, one typical example is in RHEL5 32E guest, time 
flows very slow compared to the host 
(https://sourceforge.net/tracker/?func=detail&atid=893831&aid=1826080&group_id=180599).
 
You can simple using "sleep" to test it. And many people complained it 
before, e,g, 
http://www.mail-archive.com/kvm-devel@lists.sourceforge.net/msg10928.html
And I have to say the timer problem in current KVM is very serious, and this 
patch can solve this.

I think you are most worrying about the regressions. That's why I spent a lot 
of time to solve TSC problem (PAE SMP RHEL5.1 can't boot up). For in kernel 
PIT accelerate the process, the same bug was exposed on PAE SMP RHEL5 with 
the patch. Though I don't think it's a real regression, I have got it done to 
prevent this patch bring any bad effect. 

I would do more test to ensure this patch won't break something. 

>
> Regards,
>
> Anthony Liguori
>
> > This patch set has a pending counter and takes care for it too.
> >
> >> Do you expect an overall performance improvement from this or is it
> >> simply about improving timer accuracy?
> >
> > It will probably help older kernels with slow HZ run faster HZ guests.
> > Without CONFIG_DYNTICK the guests behaved jumpy because of that.
> >
> >> Regards,
> >>
> >> Anthony Liguori
> >>
> >>
> >>
> >> 
> >>- This SF.net email is sponsored by: Microsoft
> >> Defy all challenges. Microsoft(R) Visual Studio 2008.
> >> http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
> >> ___
> >> kvm-devel mailing list
> >> kvm-devel@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/kvm-devel



-- 
Thanks
Yang, Sheng

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/6] kvm: qemu: Add opt ion for enable/disable in kernel PIT

2008-03-04 Thread Yang, Sheng
On Tuesday 04 March 2008 21:23:46 Uri Lublin wrote:
> > @@ -492,9 +494,13 @@ PITState *pit_init(int base, qemu_irq irq)
> >  PITChannelState *s;
> >
> >  s = &pit->channels[0];
> > -/* the timer 0 is connected to an IRQ */
> > -s->irq_timer = qemu_new_timer(vm_clock, pit_irq_timer, s);
> > -s->irq = irq;
> > +#ifdef KVM_CAP_PIT
>
> I  think you should remove the ifdef such that the if-statement is
> executed even if KVM_CAP_PIT
> is not defined (in which case qemu pit emulation should be used).

Oh, yes. Thanks for the comment! 

Here is the updated patch:

---
 libkvm/libkvm-x86.c |9 +
 qemu/hw/i8254.c |   12 
 qemu/qemu-kvm.c |4 
 qemu/qemu-kvm.h |2 ++
 qemu/vl.c   |8 
 5 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/libkvm/libkvm-x86.c b/libkvm/libkvm-x86.c
index b3a241e..d19d17f 100644
--- a/libkvm/libkvm-x86.c
+++ b/libkvm/libkvm-x86.c
@@ -630,3 +630,12 @@ int kvm_disable_tpr_access_reporting(kvm_context_t kvm, 
int vcpu)
 }

 #endif
+
+int kvm_pit_in_kernel(kvm_context_t kvm)
+{
+#ifdef KVM_CAP_PIT
+   return kvm->pit_in_kernel;
+#else
+   return 0;
+#endif
+}
diff --git a/qemu/hw/i8254.c b/qemu/hw/i8254.c
index c281680..9e18ebc 100644
--- a/qemu/hw/i8254.c
+++ b/qemu/hw/i8254.c
@@ -26,6 +26,8 @@
 #include "isa.h"
 #include "qemu-timer.h"

+#include "qemu-kvm.h"
+
 //#define DEBUG_PIT

 #define RW_STATE_LSB 1
@@ -491,10 +493,12 @@ PITState *pit_init(int base, qemu_irq irq)
 PITState *pit = &pit_state;
 PITChannelState *s;

-s = &pit->channels[0];
-/* the timer 0 is connected to an IRQ */
-s->irq_timer = qemu_new_timer(vm_clock, pit_irq_timer, s);
-s->irq = irq;
+if (!kvm_enabled() || !qemu_kvm_pit_in_kernel()) {
+   s = &pit->channels[0];
+   /* the timer 0 is connected to an IRQ */
+   s->irq_timer = qemu_new_timer(vm_clock, pit_irq_timer, s);
+   s->irq = irq;
+}

 register_savevm("i8254", base, 1, pit_save, pit_load, pit);

diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 051946e..196e38e 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -10,6 +10,7 @@

 int kvm_allowed = 1;
 int kvm_irqchip = 1;
+int kvm_pit = 1;

 #include 
 #include "hw/hw.h"
@@ -536,6 +537,9 @@ int kvm_qemu_create_context(void)
 if (!kvm_irqchip) {
 kvm_disable_irqchip_creation(kvm_context);
 }
+if (!kvm_pit) {
+kvm_disable_pit_creation(kvm_context);
+}
 if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
kvm_qemu_destroy();
return -1;
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index 8e45f30..ff9c86e 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -84,9 +84,11 @@ extern kvm_context_t kvm_context;

 #define kvm_enabled() (kvm_allowed)
 #define qemu_kvm_irqchip_in_kernel() kvm_irqchip_in_kernel(kvm_context)
+#define qemu_kvm_pit_in_kernel() kvm_pit_in_kernel(kvm_context)
 #else
 #define kvm_enabled() (0)
 #define qemu_kvm_irqchip_in_kernel() (0)
+#define qemu_kvm_pit_in_kernel() (0)
 #endif

 #endif
diff --git a/qemu/vl.c b/qemu/vl.c
index f10fbd8..4c69ff4 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -8059,6 +8059,7 @@ static void help(int exitcode)
   "-no-kvm disable KVM hardware virtualization\n"
 #endif
   "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n"
+  "-no-kvm-pit disable KVM kernel mode PIT\n"
 #endif
 #ifdef TARGET_I386
"-std-vgasimulate a standard VGA card with VESA Bochs 
Extensions\n"
@@ -8177,6 +8178,7 @@ enum {
 QEMU_OPTION_no_acpi,
 QEMU_OPTION_no_kvm,
 QEMU_OPTION_no_kvm_irqchip,
+QEMU_OPTION_no_kvm_pit,
 QEMU_OPTION_no_reboot,
 QEMU_OPTION_show_cursor,
 QEMU_OPTION_daemonize,
@@ -8263,6 +8265,7 @@ const QEMUOption qemu_options[] = {
 { "no-kvm", 0, QEMU_OPTION_no_kvm },
 #endif
 { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip },
+{ "no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit },
 #endif
 #if defined(TARGET_PPC) || defined(TARGET_SPARC)
 { "g", 1, QEMU_OPTION_g },
@@ -9191,6 +9194,11 @@ int main(int argc, char **argv)
kvm_irqchip = 0;
break;
}
+   case QEMU_OPTION_no_kvm_pit: {
+   extern int kvm_pit;
+   kvm_pit = 0;
+   break;
+   }
 #endif
 case QEMU_OPTION_usb:
 usb_enabled = 1;
--
debian.1.5.3.7.1-dirty




From 986676752cc553dc3fc10c0083e432ef62dbfcbb Mon Sep 17 00:00:00 2001
From: Sheng Yang <[EMAIL PROTECTED]>
Date: Wed, 5 Mar 2008 02:07:47 +0800
Subject: [PATCH] kvm: qemu: Add option for enable/disable in kernel PIT


Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
---
 libkvm/libkvm-x86.c |9 +
 qemu/hw/i8254.c |   12 
 qemu/qemu-kvm.c |4 
 qemu/qemu-kvm.h |2 ++
 qemu/vl.c   |8 
 5 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/l

Re: [kvm-devel] [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges

2008-03-04 Thread Nick Piggin
On Wednesday 05 March 2008 05:58, Christoph Lameter wrote:
> On Tue, 4 Mar 2008, Nick Piggin wrote:
> > > Then put it into the arch code for TLB invalidation. Paravirt ops gives
> > > good examples on how to do that.
> >
> > Put what into arch code?
>
> The mmu notifier code.

It isn't arch specific.


> > > > What about a completely different approach... XPmem runs over
> > > > NUMAlink, right? Why not provide some non-sleeping way to basically
> > > > IPI remote nodes over the NUMAlink where they can process the
> > > > invalidation? If you intra-node cache coherency has to run over this
> > > > link anyway, then presumably it is capable.
> > >
> > > There is another Linux instance at the remote end that first has to
> > > remove its own ptes.
> >
> > Yeah, what's the problem?
>
> The remote end has to invalidate the page which involves locking etc.

I don't see what the problem is.


> > > Also would not work for Inifiniband and other
> > > solutions.
> >
> > infiniband doesn't want it. Other solutions is just handwaving,
> > because if we don't know what the other soloutions are, then we can't
> > make any sort of informed choices.
>
> We need a solution in general to avoid the pinning problems. Infiniband
> has those too.
>
> > > All the approaches that require evictions in an atomic context
> > > are limiting the approach and do not allow the generic functionality
> > > that we want in order to not add alternate APIs for this.
> >
> > The only generic way to do this that I have seen (and the only proposed
> > way that doesn't add alternate APIs for that matter) is turning VM locks
> > into sleeping locks. In which case, Andrea's notifiers will work just
> > fine (except for relatively minor details like rcu list scanning).
>
> No they wont. As you pointed out the callback need RCU locking.

That can be fixed easily.


> > > The good enough solution right now is to pin pages by elevating
> > > refcounts.
> >
> > Which kind of leads to the question of why do you need any further
> > kernel patches if that is good enough?
>
> Well its good enough with severe problems during reclaim, livelocks etc.
> One could improve on that scheme through Rik's work trying to add a new
> page flag that mark pinned pages and then keep them off the LRUs and
> limiting their number. Having pinned page would limit the ability to
> reclaim by the VM and make page migration, memory unplug etc impossible.

Well not impossible. You could have a callback to invalidate the remote
TLB and drop the pin on a given page.


> It is better to have notifier scheme that allows to tell a device driver
> to free up the memory it has mapped.

Yeah, it would be nice for those people with clusters of Altixes. Doesn't
mean it has to go upstream, though.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/6] In kernel PIT patch

2008-03-04 Thread Anthony Liguori
Dor Laor wrote:
> On Tue, 2008-03-04 at 09:52 -0600, Anthony Liguori wrote:
>   
>> Yang, Sheng wrote:
>> 
>>> Hi
>>>
>>> Here is the last in-kernel PIT patch for KVM. The mainly change from last 
>>> version is the supporting to save/restore. I also tested live migration.
>>>
>>> The other modifies including some date structure changed to be better for 
>>> supporting the save/restore. I moved the PIT timer to outside of channel 
>>> structure, which explicitly means only one channel (channel 0) would 
>>> trigger 
>>> it.
>>>
>>> After fix TSC problem on SMP PAE RHEL5/5.1 guest, now the patch works well 
>>> without any modify of kernel parameter.
>>>   
>>>   
>> How are you measuring the improvements from an in-kernel PIT?  From your 
>> mails, you're claiming it increases the timer accuracy.  How are you 
>> measuring it and how much does it improve it?
>>
>> 
>
> It's also a functionality addition: userspace pit & pic combination
> needed to use -tdf option (time drift fix). The tdf took care of pending
> pit irqs and tried to make the guest ack the right number of irqs the
> pit was configured.
>   

I thought there was some discussion about whether -tdf was every useful 
in practice?

> Once we switched to the default in-kernel pic, the userspace pit
> couldn't get the acks from the pit.
> One can see the effect when running multiple guests (windows, standard
> HAL) playing video, the time slows down.
>   

Okay, that makes sense.  So have you done any tests to confirm this?  We 
suffered through a fair number of regressions when we moved to an 
in-kernel APIC.  Before moving another big chunk of code in the kernel 
and going through possible regressions, I want to make sure we have a 
measurable argument that it's the right thing to do.

So how do we measure the benefits of an in-kernel PIT?

Regards,

Anthony Liguori

> This patch set has a pending counter and takes care for it too.
>
>   
>> Do you expect an overall performance improvement from this or is it 
>> simply about improving timer accuracy?
>>
>> 
>
> It will probably help older kernels with slow HZ run faster HZ guests.
> Without CONFIG_DYNTICK the guests behaved jumpy because of that.
>
>   
>> Regards,
>>
>> Anthony Liguori
>>
>>
>>
>> -
>> This SF.net email is sponsored by: Microsoft
>> Defy all challenges. Microsoft(R) Visual Studio 2008.
>> http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
>> ___
>> kvm-devel mailing list
>> kvm-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/kvm-devel
>> 
>
>   


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] mmu notifiers #v8

2008-03-04 Thread Nick Piggin
On Mon, Mar 03, 2008 at 11:01:22AM -0800, Christoph Lameter wrote:
> On Mon, 3 Mar 2008, Nick Piggin wrote:
> 
> > I'm still not completely happy with this. I had a very quick look
> > at the GRU driver, but I don't see why it can't be implemented
> > more like the regular TLB model, and have TLB insertions depend on
> > the linux pte, and do invalidates _after_ restricting permissions
> > to the pte.
> > 
> > Ie. I'd still like to get rid of invalidate_range_begin, and get
> > rid of invalidate calls from places where permissions are relaxed.
> 
> Isnt this more a job for paravirt ops if it is so tightly bound to page 
> tables? Are we not adding another similar API?

Um, it's bound to the *Linux page tables*, yes. And I have no idea why
you would use the paravirt ops for this.

 

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] Notifier for Externally Mapped Memory (EMM)

2008-03-04 Thread Peter Zijlstra

FWIW, I'll cut the kvm and openfabrics lists from any future posts.
I'm getting tired of the bounces.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/6] In kernel PIT patch

2008-03-04 Thread Dor Laor

On Tue, 2008-03-04 at 09:52 -0600, Anthony Liguori wrote:
> Yang, Sheng wrote:
> > Hi
> >
> > Here is the last in-kernel PIT patch for KVM. The mainly change from last 
> > version is the supporting to save/restore. I also tested live migration.
> >
> > The other modifies including some date structure changed to be better for 
> > supporting the save/restore. I moved the PIT timer to outside of channel 
> > structure, which explicitly means only one channel (channel 0) would 
> > trigger 
> > it.
> >
> > After fix TSC problem on SMP PAE RHEL5/5.1 guest, now the patch works well 
> > without any modify of kernel parameter.
> >   
> 
> How are you measuring the improvements from an in-kernel PIT?  From your 
> mails, you're claiming it increases the timer accuracy.  How are you 
> measuring it and how much does it improve it?
> 

It's also a functionality addition: userspace pit & pic combination
needed to use -tdf option (time drift fix). The tdf took care of pending
pit irqs and tried to make the guest ack the right number of irqs the
pit was configured.

Once we switched to the default in-kernel pic, the userspace pit
couldn't get the acks from the pit.
One can see the effect when running multiple guests (windows, standard
HAL) playing video, the time slows down.

This patch set has a pending counter and takes care for it too.

> Do you expect an overall performance improvement from this or is it 
> simply about improving timer accuracy?
> 

It will probably help older kernels with slow HZ run faster HZ guests.
Without CONFIG_DYNTICK the guests behaved jumpy because of that.

> Regards,
> 
> Anthony Liguori
> 
> 
> 
> -
> This SF.net email is sponsored by: Microsoft
> Defy all challenges. Microsoft(R) Visual Studio 2008.
> http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
> ___
> kvm-devel mailing list
> kvm-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/kvm-devel


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] Notifier for Externally Mapped Memory (EMM)

2008-03-04 Thread Christoph Lameter
On Tue, 4 Mar 2008, Peter Zijlstra wrote:

> 
> On Tue, 2008-03-04 at 14:35 -0800, Christoph Lameter wrote:
> 
> > RCU means that the callbacks occur in an atomic context.
> 
> Not really, if it requires moving the VM locks to sleepable locks under
> a .config option, I think its also fair to require PREEMPT_RCU.

Which would make the patchset pretty complex. RCU is not needed with a 
single linked list. Linked list operations can exploit atomic pointer 
updates and we only tear down the list when a single execution thread 
remains.


Having said that: Here a couple of updates to address Andrea's complaint 
that we not check the referenced bit from the external mapper when the 
rerferences bit is set on an OS pte.

Plus two barriers to ensure that a new emm notifier object becomes
visible before the base pointer is updated.

Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]>

---
 mm/rmap.c |   10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

Index: linux-2.6/mm/rmap.c
===
--- linux-2.6.orig/mm/rmap.c2008-03-04 14:36:36.321922321 -0800
+++ linux-2.6/mm/rmap.c 2008-03-04 15:10:46.159429369 -0800
@@ -298,10 +298,10 @@ static int page_referenced_one(struct pa
 
(*mapcount)--;
pte_unmap_unlock(pte, ptl);
-   if (!referenced)
-   /* rmap lock held */
-   referenced = emm_notify(mm, emm_referenced,
-   address, address + PAGE_SIZE);
+
+   /* rmap lock held */
+   if (emm_notify(mm, emm_referenced, address, address + PAGE_SIZE))
+   referenced = 1;
 out:
return referenced;
 }
@@ -1057,6 +1057,7 @@ EXPORT_SYMBOL_GPL(emm_notifier_release);
 void emm_notifier_register(struct emm_notifier *e, struct mm_struct *mm)
 {
e->next = mm->emm_notifier;
+   smp_wmb();
mm->emm_notifier = e;
 }
 EXPORT_SYMBOL_GPL(emm_notifier_register);
@@ -1069,6 +1070,7 @@ int __emm_notify(struct mm_struct *mm, e
int x;
 
while (e) {
+   smp_rmb();
if (e->func) {
x = e->func(e, mm, op, start, end);
if (x)

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] KVM-61/62 build fails on SLES 10

2008-03-04 Thread Dor Laor

> Hello
> 
> Sorry for my intervention into your communication, but are you sure, 
> when you want to use virtio, you must have 2.6.25 kernel on both - guest 
> and host system?
> I test this case right now, and I have running 2.6.25-rc3 only on guest 
> system with kvm-62.
> 

It's enough running the new guest kernel. Virtio on the host is
implemented completely in qemu. Just use tap for better performance.
Actually there is a backport for the guest kernel that Anthony Liguori
wrote that needs minor changes, I'll post it tomorrow.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] Check device status in tx timer

2008-03-04 Thread Dor Laor

On Tue, 2008-03-04 at 09:42 -0600, Anthony Liguori wrote:
> > diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
> > index 612cf6b..a05eafb 100644
> > --- a/qemu/hw/virtio-net.c
> > +++ b/qemu/hw/virtio-net.c
> > @@ -280,6 +280,10 @@ static void virtio_net_tx_timer(void *opaque)
> >  {
> >  VirtIONet *n = opaque;
> >
> > +/* Just in case the driver is not ready on more */
> > +if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
> > +return;
> > +
> >   
> 
> You probably want to set tx_timer_active = 0 here too.
> 

yap, thanks.

> Regards,
> 
> Anthony Liguori
> 
> >  n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
> >  n->tx_timer_active = 0;
> >  virtio_net_flush_tx(n, n->tx_vq);
> >
> >   
> 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] Notifier for Externally Mapped Memory (EMM)

2008-03-04 Thread Christoph Lameter
On Tue, 4 Mar 2008, Andrea Arcangeli wrote:

> I once ripped invalidate_page while working on #v8 but then I
> reintroduced it because I thought reducing the total number of hooks
> was beneficial to the core linux VM (even if only a
> microoptimization, I sure agree about that, but it's trivial to add
> one hook instead of two hooks there, so a microoptimization was worth
> it IMHO).

Well the problem is if one does not have the begin/end hooks then 
reliable clearing of the mapping may not be possible. begin/end allow
holding off new references and that avoids the issue that would come
with an single callback that could race with something else.
 
> Your API is also too restrictive, if we'll happen to need one more
> method that doesn't take just (start,end) we'll have to cause all
> drivers to have significant changes instead of one-liners to use
> whatever new feature.

What would that be? I think the API need to stay as simple as possible. 
And this set is pretty minimal and easy to understand. Not having the 
invalidate_page() removes a troublespot from the API.
 
> IMHO the design is actually same and I don't understand why you
> rewrote it once more time in a less flexibile way (on a style side
> you're not even using hlist), dropping RCU (not sure how you replace
> it with), etc

All of that is needed in order to allow sleeping in the future. Your 
version locks us into atomic callbacks. It also makes the API needlessly 
complex.

RCU means that the callbacks occur in an atomic context.

> Converging in a single design is great, but it'd be nice if we could
> converge into a single implementation, and my last patch doesn't have
> any bug and I think it's quite nicer too (also including Nick cleanup
> work) but then I may be biased ;).

It is the atomic dead end that we want to avoid. And your patch is exactly 
that. Both the invalidate_page and the RCU locks us into this.

> But as usual I'm entirely satisfied by your brand new EMM Notifier to
> be merged and all perfecting work done on my MMU notifier patch over
> the weeks by multiple developers (including you) to be dropped for
> good, as long as we can enable the new advanced KVM features in
> 2.6.25.

Well I really want us to have one API that is suitable for multiple 
purposes and that allows a generic use by device drivers for multiple 
purposes. The discussion in the last month have made that possible. I am 
glad that you do not see any major issues with the patch. I sure wish I 
would not have to post a competing patchset because I want things to be 
merged ASAP and get this over with. But we need to have at minimum clear 
way to support sleeping with the existing API in the future.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] Notifier for Externally Mapped Memory (EMM)

2008-03-04 Thread Andrea Arcangeli
On Tue, Mar 04, 2008 at 11:00:31AM -0800, Christoph Lameter wrote:
> But as you pointed out before that path is a slow path anyways. Its rarely 

It's a slow path but I don't see why you think two hooks are better
than one, when only one is necessary.

I once ripped invalidate_page while working on #v8 but then I
reintroduced it because I thought reducing the total number of hooks
was beneficial to the core linux VM (even if only a
microoptimization, I sure agree about that, but it's trivial to add
one hook instead of two hooks there, so a microoptimization was worth
it IMHO).

Your API is also too restrictive, if we'll happen to need one more
method that doesn't take just (start,end) we'll have to cause all
drivers to have significant changes instead of one-liners to use
whatever new feature.

> taken. Having a single eviction callback simplifies design.

IMHO the design is actually same and I don't understand why you
rewrote it once more time in a less flexibile way (on a style side
you're not even using hlist), dropping RCU (not sure how you replace
it with), etc

Your implementation has the same bug you had in your first V1, see how
you're not clearing the spte young bits if the pte young bit is
set. Once you fix that, your change in the ptep_clear_flush_young path
will look remarkably similar to the patch I posted incremental with
#v8 to make ->clear_flush_young sleep capable...

Converging in a single design is great, but it'd be nice if we could
converge into a single implementation, and my last patch doesn't have
any bug and I think it's quite nicer too (also including Nick cleanup
work) but then I may be biased ;).

But as usual I'm entirely satisfied by your brand new EMM Notifier to
be merged and all perfecting work done on my MMU notifier patch over
the weeks by multiple developers (including you) to be dropped for
good, as long as we can enable the new advanced KVM features in
2.6.25.

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 14/23] QEMU/KVM: device hot-add

2008-03-04 Thread Itamar Heim
Actually, the following (old) patch by Yuval Kashtan was sent to
qemu-devel back in October (but not merged AFAIK) to facilitate usb_del
using the same string used during usb_add.

"
Hello,

This proposed patch add support for deleting usb devices by providing
the (exact) same string they were added with, thus enabling to remove a
usb device with the host string.
The old capability of deleting usb device by their internal port is not
harmed. 

I had to move USBHostDevice to vl.h so that I will be able to reference
it from vl.c

and last, I've changed the output of info usb so that it will include
the saved string (for reference).

Please comment. 

BR, 
Yuval Kashtan.
"

-Original Message-
From: [EMAIL PROTECTED]
[mailto:[EMAIL PROTECTED] On Behalf Of Daniel P.
Berrange
Sent: Tuesday, March 04, 2008 9:54 PM
To: Anthony Liguori
Cc: kvm-devel@lists.sourceforge.net; Marcelo Tosatti; Avi Kivity;
Glauber Costa
Subject: Re: [kvm-devel] [patch 14/23] QEMU/KVM: device hot-add

On Tue, Mar 04, 2008 at 01:30:42PM -0600, Anthony Liguori wrote:
> Daniel P. Berrange wrote:
> >On Tue, Mar 04, 2008 at 03:34:33PM -0300, Marcelo Tosatti wrote:
> >>Add monitor command to hot-add PCI devices (nic and drive).
> >[snip]
> >
> >  
> >>Index: kvm-userspace.hotplug/qemu/monitor.c
> >>===
> >>--- kvm-userspace.hotplug.orig/qemu/monitor.c
> >>+++ kvm-userspace.hotplug/qemu/monitor.c
> >>@@ -1354,6 +1354,7 @@ static term_cmd_t term_cmds[] = {
> >> { "migrate_set_speed", "s", do_migrate_set_speed,
> >>   "value", "set maximum speed (in bytes) for migrations" },
> >> { "cpu_set", "is", do_cpu_set_nr, "cpu [online|offline]",
"change 
> >> cpu state" },
> >>+{ "pci_add", "ss", device_hot_add, "nic|drive 
> >>[vlan=n][,macaddr=addr][,model=type] 
> >>[[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]",
"hotadd PCI 
> >>device" },
> >>
> >
> >This syntax is not very nice IMHO. We should have explicit commands
> >for the different types of device,
> >
> >ie
> >
> >   nic_add [vlan=n][,macaddr=addr][,model=type]
> >   drive_add
[[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]
> >
> >ie, follow naming of the command line args   -nic, and -drive.
> >
> >This also keeps 'pci_add' as a command name available for future use
to
> >do generic host->guest  pci device pass-through.
> >  
> 
> There is symmetry with pci_add and usb_add since usb_add takes either
a 
> USB device identifier or the name of an emulated device along with 
> parameters.  So I prefer the pci_add syntax just to maintain
consistency 
> with the rest of QEMU.

This is true, but the usb_add/remove syntax is horrible to work with.
One
of the reasons I've not added  USB hotplug/remove to libvirt yet is the
difficulty (perhaps even impossibility) of reliably finding out the USB
device number associated with the device that was added. I guess ideally
the add command would have to print out / return the number associated
with the device, which libvirt could record for use when it later comes
time to remove the device.

Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392
2496 -=|
|=-   Perl modules: http://search.cpan.org/~danberr/
-=|
|=-   Projects: http://freshmeat.net/~danielpb/
-=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505
-=| 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


usb_del.patch
Description: usb_del.patch
-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 23/23] QEMU/KVM: device hot-remove

2008-03-04 Thread Daniel P. Berrange
On Tue, Mar 04, 2008 at 05:00:09PM -0300, Marcelo Tosatti wrote:
> On Tue, Mar 04, 2008 at 01:32:22PM -0600, Anthony Liguori wrote:
> > Daniel P. Berrange wrote:
> > >
> > >   nic_remove [vlan=n][,macaddr=addr][,model=type]
> > >   drive_remove [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]
> > >
> > >Though, perhaps still allow removal based on the PCI device ID as an 
> > >alternative
> > >for those who happen to have that data available.
> > >  
> > 
> > pci_remove is consistent with usb_del and things like stopcapture.  The 
> > thing to add would be an "info pci" that let a user associate the slot 
> > number with higher level information about the device.
> 
> Its there already:
> 
> (qemu) info pci
>   Bus  0, device   0, function 0:
> Host bridge: PCI device 8086:1237
>   Bus  0, device   1, function 0:
> ISA bridge: PCI device 8086:7000
>   Bus  0, device   1, function 1:
> IDE controller: PCI device 8086:7010
>   BAR4: I/O at 0xc000 [0xc00f].
>   Bus  0, device   1, function 3:
> Bridge: PCI device 8086:7113
>   IRQ 9.
>   Bus  0, device   2, function 0:
> VGA controller: PCI device 1013:00b8
>   BAR0: 32 bit memory at 0xf000 [0xf1ff].
>   BAR1: 32 bit memory at 0xf200 [0xf2000fff].
>   Bus  0, device   3, function 0:
> Ethernet controller: PCI device 10ec:8139
>   IRQ 11.
>   BAR0: I/O at 0xc100 [0xc1ff].
>   BAR1: 32 bit memory at 0xf2001000 [0xf20010ff].
> 
> And block,network:
> 
> (qemu) info block
> ide0-hd0: type=hd removable=0 file=/root/images/marcelo5.img ro=0 drv=raw
> ide1-cd0: type=cdrom removable=1 locked=0 [not inserted]
> floppy0: type=floppy removable=1 locked=0 [not inserted]
> sd0: type=floppy removable=1 locked=0 [not inserted]
> scsi0-hd0: type=hd removable=0 file=/tmp/bigfile ro=0 drv=raw
> scsi0-hd1: type=hd removable=0 file=/tmp/bigfile.2 ro=0 drv=raw
> 
> (qemu) info network
> VLAN 0 devices:
>   tap: ifname=tap0 setup_script=qemu-ifup-tap0
>   rtl8139 pci macaddr=52:54:00:12:34:56

This is utterly horrible for a human to parse & use if they're using the
QEMU monitor, let alone something that libvirt could parse. In fact this
doesn't let you map between the network device & pci device if there is
more than one device added because 'info pci' doesn't show the MAC address
info, and 'info network' does not show any PCI device number info - the
same for disks.

> Perhaps reporting the  pair for PCI devices on block and
> network info provides the necessary information that you need Dan? 
> Oh, and unit,bus,media for block too.

Yes, getting PCI  (bus,slot,func) triple reported against each line in the
'info block' and 'info network' data would at least let you reliably map
from NIC -> PCI devs. 

Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-   Perl modules: http://search.cpan.org/~danberr/  -=|
|=-   Projects: http://freshmeat.net/~danielpb/   -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Glauber Costa
Daniel P. Berrange wrote:
> On Tue, Mar 04, 2008 at 01:28:24PM -0600, Anthony Liguori wrote:
>> Glauber Costa wrote:
>>> My main interest is in management tools being able to specify pinning
>>> set ups at VM creation time.
>>>
>>> As I said, it can be done through tools like taskset, but then you'd 
>>> have to know:
>>>  * when are the threads created
>>>  * which thread ids corresponds to each cpu
>>>
>>> And of course, for an amount of time, the threads will be running in a 
>>> "wrong" cpu, which may affect workloads running there. (which is a 
>>> case cpu pinning usually tries to address)
>> A management tool can start QEMU with -S to prevent any CPUs from 
>> running, query the VCPU=>thread id relationship (modifying info cpus 
>> would be a good thing to do for this), taskset, and then run 'cont' in 
>> the monitor if they desperately need this functionality.  However, I 
>> don't think the vast majority of people need this particular functionality.
> 
> I fully expected to have to run QEMU with -S and then use cont if I were
> todo CPU pinning from libvirt.
> 
> The only info I'd need to get is the  PID <-> vCPU mapping data. Then
> I can use regular Linux taskset capabilities from libvirt to assign the
> initial pCPU <-> vCPU mapping and finally run 'cont'.
> 
>> My feeling is that adding an interface to do this in QEMU encourages 
>> people to not use the existing Linux tools for this or worse yet, to 
>> think they can do a better job than Linux.  The whole reason this exists 
>> in Xen is that Xen's schedulers were incapable of doing CPU migration 
>> historically (which is no longer true since the credit scheduler).  It 
>> was necessary to specify pinning upon creation or you were stuck with 
>> round-robin placement.  So libvirt has APIs for this because they were 
>> part of the Xen API because it was needed to get reasonable performance 
>> at some point in time on Xen.  I don't think this behavior is useful for 
>> KVM though.  Just because Xen does it doesn't imply that we should do it.
> 
> I agree that adding QEMU commands for stuff which Linux already has APIs
> and tools is a bad idea. QEMU/KVM is much nicer to manage than Xen, 
> precisely because I can already use Linux APIs & process management tools.

I totally agree this is ideal, and I did not start this after thinking a 
little bit about
this situation. The main point is that we don't know when the cpus are 
created, and it does not seem to me that we will without a considerable 
amount of work.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Glauber Costa
Anthony Liguori wrote:
> Glauber Costa wrote:
>> My main interest is in management tools being able to specify pinning
>> set ups at VM creation time.
>>
>> As I said, it can be done through tools like taskset, but then you'd 
>> have to know:
>>  * when are the threads created
>>  * which thread ids corresponds to each cpu
>>
>> And of course, for an amount of time, the threads will be running in a 
>> "wrong" cpu, which may affect workloads running there. (which is a 
>> case cpu pinning usually tries to address)
> 
> A management tool can start QEMU with -S to prevent any CPUs from 
> running, query the VCPU=>thread id relationship (modifying info cpus 
> would be a good thing to do for this), taskset, and then run 'cont' in 
> the monitor if they desperately need this functionality.  However, I 
> don't think the vast majority of people need this particular functionality.

No, it can't. Because at the time qemu starts, no vcpu -> thread id 
relationship exists at all. And we don't know when it will.

It would be a different story if there were some kind of api that could
warn qemu
 > My feeling is that adding an interface to do this in QEMU encourages
 > people to not use the existing Linux tools for this or worse yet, to
 > think they can do a better job than Linux.

I agree with you that we should stick with linux tools, and that's why I 
didn't provide any kind of runtime setting via qemu monitor to do this 
(with the infrastructure, it would be trivial). taskset will do.

> The whole reason this exists 
> in Xen is that Xen's schedulers were incapable of doing CPU migration 
> historically (which is no longer true since the credit scheduler).  It 
> was necessary to specify pinning upon creation or you were stuck with 
> round-robin placement.  So libvirt has APIs for this because they were 
> part of the Xen API because it was needed to get reasonable performance 
> at some point in time on Xen.  I don't think this behavior is useful for 
> KVM though.  Just because Xen does it doesn't imply that we should do it.
No, not just because xen does.

I do however feel it useful, since starting a vm and then let it run 
unchanged is definitely an useful use case. And as I tried to show you,
I can't see a good way to do that for pinning.

> Regards,
> 
> Anthony Liguori
> 
>>
>>
> 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 23/23] QEMU/KVM: device hot-remove

2008-03-04 Thread Marcelo Tosatti
On Tue, Mar 04, 2008 at 01:32:22PM -0600, Anthony Liguori wrote:
> Daniel P. Berrange wrote:
> >Removing based on pci device number is very un-pleasant, since its not 
> >something
> >the user of the monitor cares about. Nor do they even know what the PCI 
> >device number
> >assigned by 'pci_add' is.
> >
> >As with addition, I'd like separate commands for NIC vs Drive, and for the 
> >removal
> >key to be based upon the same data used for addition. eg so one can remove 
> >the
> >NIC based on its MAC address, or remove the drive based on the 
> >(if,bus,unit,filename)
> >data items.
> >
> >   nic_remove [vlan=n][,macaddr=addr][,model=type]
> >   drive_remove [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]
> >
> >Though, perhaps still allow removal based on the PCI device ID as an 
> >alternative
> >for those who happen to have that data available.
> >  
> 
> pci_remove is consistent with usb_del and things like stopcapture.  The 
> thing to add would be an "info pci" that let a user associate the slot 
> number with higher level information about the device.

Its there already:

(qemu) info pci
  Bus  0, device   0, function 0:
Host bridge: PCI device 8086:1237
  Bus  0, device   1, function 0:
ISA bridge: PCI device 8086:7000
  Bus  0, device   1, function 1:
IDE controller: PCI device 8086:7010
  BAR4: I/O at 0xc000 [0xc00f].
  Bus  0, device   1, function 3:
Bridge: PCI device 8086:7113
  IRQ 9.
  Bus  0, device   2, function 0:
VGA controller: PCI device 1013:00b8
  BAR0: 32 bit memory at 0xf000 [0xf1ff].
  BAR1: 32 bit memory at 0xf200 [0xf2000fff].
  Bus  0, device   3, function 0:
Ethernet controller: PCI device 10ec:8139
  IRQ 11.
  BAR0: I/O at 0xc100 [0xc1ff].
  BAR1: 32 bit memory at 0xf2001000 [0xf20010ff].

And block,network:

(qemu) info block
ide0-hd0: type=hd removable=0 file=/root/images/marcelo5.img ro=0 drv=raw
ide1-cd0: type=cdrom removable=1 locked=0 [not inserted]
floppy0: type=floppy removable=1 locked=0 [not inserted]
sd0: type=floppy removable=1 locked=0 [not inserted]
scsi0-hd0: type=hd removable=0 file=/tmp/bigfile ro=0 drv=raw
scsi0-hd1: type=hd removable=0 file=/tmp/bigfile.2 ro=0 drv=raw

(qemu) info network
VLAN 0 devices:
  tap: ifname=tap0 setup_script=qemu-ifup-tap0
  rtl8139 pci macaddr=52:54:00:12:34:56

Perhaps reporting the  pair for PCI devices on block and
network info provides the necessary information that you need Dan? 
Oh, and unit,bus,media for block too.

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 14/23] QEMU/KVM: device hot-add

2008-03-04 Thread Daniel P. Berrange
On Tue, Mar 04, 2008 at 01:30:42PM -0600, Anthony Liguori wrote:
> Daniel P. Berrange wrote:
> >On Tue, Mar 04, 2008 at 03:34:33PM -0300, Marcelo Tosatti wrote:
> >>Add monitor command to hot-add PCI devices (nic and drive).
> >[snip]
> >
> >  
> >>Index: kvm-userspace.hotplug/qemu/monitor.c
> >>===
> >>--- kvm-userspace.hotplug.orig/qemu/monitor.c
> >>+++ kvm-userspace.hotplug/qemu/monitor.c
> >>@@ -1354,6 +1354,7 @@ static term_cmd_t term_cmds[] = {
> >> { "migrate_set_speed", "s", do_migrate_set_speed,
> >>   "value", "set maximum speed (in bytes) for migrations" },
> >> { "cpu_set", "is", do_cpu_set_nr, "cpu [online|offline]", "change 
> >> cpu state" },
> >>+{ "pci_add", "ss", device_hot_add, "nic|drive 
> >>[vlan=n][,macaddr=addr][,model=type] 
> >>[[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]", "hotadd PCI 
> >>device" },
> >>
> >
> >This syntax is not very nice IMHO. We should have explicit commands
> >for the different types of device,
> >
> >ie
> >
> >   nic_add [vlan=n][,macaddr=addr][,model=type]
> >   drive_add [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]
> >
> >ie, follow naming of the command line args   -nic, and -drive.
> >
> >This also keeps 'pci_add' as a command name available for future use to
> >do generic host->guest  pci device pass-through.
> >  
> 
> There is symmetry with pci_add and usb_add since usb_add takes either a 
> USB device identifier or the name of an emulated device along with 
> parameters.  So I prefer the pci_add syntax just to maintain consistency 
> with the rest of QEMU.

This is true, but the usb_add/remove syntax is horrible to work with. One
of the reasons I've not added  USB hotplug/remove to libvirt yet is the
difficulty (perhaps even impossibility) of reliably finding out the USB
device number associated with the device that was added. I guess ideally
the add command would have to print out / return the number associated
with the device, which libvirt could record for use when it later comes
time to remove the device.

Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-   Perl modules: http://search.cpan.org/~danberr/  -=|
|=-   Projects: http://freshmeat.net/~danielpb/   -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Daniel P. Berrange
On Tue, Mar 04, 2008 at 01:28:24PM -0600, Anthony Liguori wrote:
> Glauber Costa wrote:
> > My main interest is in management tools being able to specify pinning
> > set ups at VM creation time.
> >
> > As I said, it can be done through tools like taskset, but then you'd 
> > have to know:
> >  * when are the threads created
> >  * which thread ids corresponds to each cpu
> >
> > And of course, for an amount of time, the threads will be running in a 
> > "wrong" cpu, which may affect workloads running there. (which is a 
> > case cpu pinning usually tries to address)
> 
> A management tool can start QEMU with -S to prevent any CPUs from 
> running, query the VCPU=>thread id relationship (modifying info cpus 
> would be a good thing to do for this), taskset, and then run 'cont' in 
> the monitor if they desperately need this functionality.  However, I 
> don't think the vast majority of people need this particular functionality.

I fully expected to have to run QEMU with -S and then use cont if I were
todo CPU pinning from libvirt.

The only info I'd need to get is the  PID <-> vCPU mapping data. Then
I can use regular Linux taskset capabilities from libvirt to assign the
initial pCPU <-> vCPU mapping and finally run 'cont'.

> My feeling is that adding an interface to do this in QEMU encourages 
> people to not use the existing Linux tools for this or worse yet, to 
> think they can do a better job than Linux.  The whole reason this exists 
> in Xen is that Xen's schedulers were incapable of doing CPU migration 
> historically (which is no longer true since the credit scheduler).  It 
> was necessary to specify pinning upon creation or you were stuck with 
> round-robin placement.  So libvirt has APIs for this because they were 
> part of the Xen API because it was needed to get reasonable performance 
> at some point in time on Xen.  I don't think this behavior is useful for 
> KVM though.  Just because Xen does it doesn't imply that we should do it.

I agree that adding QEMU commands for stuff which Linux already has APIs
and tools is a bad idea. QEMU/KVM is much nicer to manage than Xen, 
precisely because I can already use Linux APIs & process management tools.

Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-   Perl modules: http://search.cpan.org/~danberr/  -=|
|=-   Projects: http://freshmeat.net/~danielpb/   -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 23/23] QEMU/KVM: device hot-remove

2008-03-04 Thread Anthony Liguori
Daniel P. Berrange wrote:
> Removing based on pci device number is very un-pleasant, since its not 
> something
> the user of the monitor cares about. Nor do they even know what the PCI 
> device number
> assigned by 'pci_add' is.
>
> As with addition, I'd like separate commands for NIC vs Drive, and for the 
> removal
> key to be based upon the same data used for addition. eg so one can remove the
> NIC based on its MAC address, or remove the drive based on the 
> (if,bus,unit,filename)
> data items.
>
>nic_remove [vlan=n][,macaddr=addr][,model=type]
>drive_remove [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]
>
> Though, perhaps still allow removal based on the PCI device ID as an 
> alternative
> for those who happen to have that data available.
>   

pci_remove is consistent with usb_del and things like stopcapture.  The 
thing to add would be an "info pci" that let a user associate the slot 
number with higher level information about the device.

Regards,

Anthony Liguori

> Regards,
> Dan.
>   


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 14/23] QEMU/KVM: device hot-add

2008-03-04 Thread Anthony Liguori
Daniel P. Berrange wrote:
> On Tue, Mar 04, 2008 at 03:34:33PM -0300, Marcelo Tosatti wrote:
>   
>> Add monitor command to hot-add PCI devices (nic and drive).
>> 
>
> [snip]
>
>   
>> Index: kvm-userspace.hotplug/qemu/monitor.c
>> ===
>> --- kvm-userspace.hotplug.orig/qemu/monitor.c
>> +++ kvm-userspace.hotplug/qemu/monitor.c
>> @@ -1354,6 +1354,7 @@ static term_cmd_t term_cmds[] = {
>>  { "migrate_set_speed", "s", do_migrate_set_speed,
>>"value", "set maximum speed (in bytes) for migrations" },
>>  { "cpu_set", "is", do_cpu_set_nr, "cpu [online|offline]", "change cpu 
>> state" },
>> +{ "pci_add", "ss", device_hot_add, "nic|drive 
>> [vlan=n][,macaddr=addr][,model=type] 
>> [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]", "hotadd PCI 
>> device" },
>> 
>
> This syntax is not very nice IMHO. We should have explicit commands
> for the different types of device,
>
> ie
>
>nic_add [vlan=n][,macaddr=addr][,model=type]
>drive_add [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]
>
> ie, follow naming of the command line args   -nic, and -drive.
>
> This also keeps 'pci_add' as a command name available for future use to
> do generic host->guest  pci device pass-through.
>   

There is symmetry with pci_add and usb_add since usb_add takes either a 
USB device identifier or the name of an emulated device along with 
parameters.  So I prefer the pci_add syntax just to maintain consistency 
with the rest of QEMU.

Regards,

Anthony Liguori

> Regards,
> Dan.
>   


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Anthony Liguori
Glauber Costa wrote:
> My main interest is in management tools being able to specify pinning
> set ups at VM creation time.
>
> As I said, it can be done through tools like taskset, but then you'd 
> have to know:
>  * when are the threads created
>  * which thread ids corresponds to each cpu
>
> And of course, for an amount of time, the threads will be running in a 
> "wrong" cpu, which may affect workloads running there. (which is a 
> case cpu pinning usually tries to address)

A management tool can start QEMU with -S to prevent any CPUs from 
running, query the VCPU=>thread id relationship (modifying info cpus 
would be a good thing to do for this), taskset, and then run 'cont' in 
the monitor if they desperately need this functionality.  However, I 
don't think the vast majority of people need this particular functionality.

My feeling is that adding an interface to do this in QEMU encourages 
people to not use the existing Linux tools for this or worse yet, to 
think they can do a better job than Linux.  The whole reason this exists 
in Xen is that Xen's schedulers were incapable of doing CPU migration 
historically (which is no longer true since the credit scheduler).  It 
was necessary to specify pinning upon creation or you were stuck with 
round-robin placement.  So libvirt has APIs for this because they were 
part of the Xen API because it was needed to get reasonable performance 
at some point in time on Xen.  I don't think this behavior is useful for 
KVM though.  Just because Xen does it doesn't imply that we should do it.

Regards,

Anthony Liguori

>
>


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 19/23] QEMU/KVM: add qemu_free_irqs

2008-03-04 Thread Marcelo Tosatti
Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/hw/irq.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/irq.c
+++ kvm-userspace.hotplug/qemu/hw/irq.c
@@ -56,6 +56,12 @@ qemu_irq *qemu_allocate_irqs(qemu_irq_ha
 return s;
 }
 
+void qemu_free_irqs(qemu_irq *s)
+{
+qemu_free(s[0]);
+qemu_free(s);
+}
+
 static void qemu_notirq(void *opaque, int line, int level)
 {
 struct IRQState *irq = opaque;
Index: kvm-userspace.hotplug/qemu/hw/irq.h
===
--- kvm-userspace.hotplug.orig/qemu/hw/irq.h
+++ kvm-userspace.hotplug/qemu/hw/irq.h
@@ -28,6 +28,8 @@ static inline void qemu_irq_pulse(qemu_i
 /* Returns an array of N IRQs.  */
 qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void *opaque, int n);
 
+void qemu_free_irqs(qemu_irq *s);
+
 /* Returns a new IRQ with opposite polarity.  */
 qemu_irq qemu_irq_invert(qemu_irq irq);
 

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 23/23] QEMU/KVM: device hot-remove

2008-03-04 Thread Daniel P. Berrange
On Tue, Mar 04, 2008 at 03:34:42PM -0300, Marcelo Tosatti wrote:
> Add monitor command to hot-remove devices.
> 
> Remove device data on _EJ0 notification.
> 
> Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
> 
> Index: kvm-userspace.hotplug/qemu/monitor.c
> ===
> --- kvm-userspace.hotplug.orig/qemu/monitor.c
> +++ kvm-userspace.hotplug/qemu/monitor.c
> @@ -1355,6 +1355,7 @@ static term_cmd_t term_cmds[] = {
>"value", "set maximum speed (in bytes) for migrations" },
>  { "cpu_set", "is", do_cpu_set_nr, "cpu [online|offline]", "change cpu 
> state" },
>  { "pci_add", "ss", device_hot_add, "nic|drive 
> [vlan=n][,macaddr=addr][,model=type] 
> [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]", "hotadd PCI 
> device" },
> +{ "pci_remove", "i", device_hot_remove, "slot number", "hot remove PCI 
> device" },
>  { NULL, NULL, },

Removing based on pci device number is very un-pleasant, since its not something
the user of the monitor cares about. Nor do they even know what the PCI device 
number
assigned by 'pci_add' is.

As with addition, I'd like separate commands for NIC vs Drive, and for the 
removal
key to be based upon the same data used for addition. eg so one can remove the
NIC based on its MAC address, or remove the drive based on the 
(if,bus,unit,filename)
data items.

   nic_remove [vlan=n][,macaddr=addr][,model=type]
   drive_remove [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]

Though, perhaps still allow removal based on the PCI device ID as an alternative
for those who happen to have that data available.

Regards,
Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-   Perl modules: http://search.cpan.org/~danberr/  -=|
|=-   Projects: http://freshmeat.net/~danielpb/   -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 14/23] QEMU/KVM: device hot-add

2008-03-04 Thread Daniel P. Berrange
On Tue, Mar 04, 2008 at 03:34:33PM -0300, Marcelo Tosatti wrote:
> Add monitor command to hot-add PCI devices (nic and drive).

[snip]

> Index: kvm-userspace.hotplug/qemu/monitor.c
> ===
> --- kvm-userspace.hotplug.orig/qemu/monitor.c
> +++ kvm-userspace.hotplug/qemu/monitor.c
> @@ -1354,6 +1354,7 @@ static term_cmd_t term_cmds[] = {
>  { "migrate_set_speed", "s", do_migrate_set_speed,
>"value", "set maximum speed (in bytes) for migrations" },
>  { "cpu_set", "is", do_cpu_set_nr, "cpu [online|offline]", "change cpu 
> state" },
> +{ "pci_add", "ss", device_hot_add, "nic|drive 
> [vlan=n][,macaddr=addr][,model=type] 
> [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]", "hotadd PCI 
> device" },

This syntax is not very nice IMHO. We should have explicit commands
for the different types of device,

ie

   nic_add [vlan=n][,macaddr=addr][,model=type]
   drive_add [[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]

ie, follow naming of the command line args   -nic, and -drive.

This also keeps 'pci_add' as a command name available for future use to
do generic host->guest  pci device pass-through.

Regards,
Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-   Perl modules: http://search.cpan.org/~danberr/  -=|
|=-   Projects: http://freshmeat.net/~danielpb/   -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] Notifier for Externally Mapped Memory (EMM)

2008-03-04 Thread Christoph Lameter
On Tue, 4 Mar 2008, Andrea Arcangeli wrote:

> When working with single pages it's more efficient and preferable to
> call invalidate_page and only later release the VM reference on the
> page.

But as you pointed out before that path is a slow path anyways. Its rarely 
taken. Having a single eviction callback simplifies design.

Plus the device driver can still check if the mapping was of PAGE_SIZE and 
then implement its own optimization.
 

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges

2008-03-04 Thread Christoph Lameter
On Tue, 4 Mar 2008, Nick Piggin wrote:

> > Then put it into the arch code for TLB invalidation. Paravirt ops gives
> > good examples on how to do that.
> 
> Put what into arch code?

The mmu notifier code.

> > > What about a completely different approach... XPmem runs over NUMAlink,
> > > right? Why not provide some non-sleeping way to basically IPI remote
> > > nodes over the NUMAlink where they can process the invalidation? If you
> > > intra-node cache coherency has to run over this link anyway, then
> > > presumably it is capable.
> >
> > There is another Linux instance at the remote end that first has to
> > remove its own ptes.
> 
> Yeah, what's the problem?

The remote end has to invalidate the page which involves locking etc.

> > Also would not work for Inifiniband and other 
> > solutions.
> 
> infiniband doesn't want it. Other solutions is just handwaving,
> because if we don't know what the other soloutions are, then we can't
> make any sort of informed choices.

We need a solution in general to avoid the pinning problems. Infiniband 
has those too.

> > All the approaches that require evictions in an atomic context 
> > are limiting the approach and do not allow the generic functionality that
> > we want in order to not add alternate APIs for this.
> 
> The only generic way to do this that I have seen (and the only proposed
> way that doesn't add alternate APIs for that matter) is turning VM locks
> into sleeping locks. In which case, Andrea's notifiers will work just
> fine (except for relatively minor details like rcu list scanning).

No they wont. As you pointed out the callback need RCU locking.

> > The good enough solution right now is to pin pages by elevating
> > refcounts.
> 
> Which kind of leads to the question of why do you need any further
> kernel patches if that is good enough?

Well its good enough with severe problems during reclaim, livelocks etc. 
One could improve on that scheme through Rik's work trying to add a new 
page flag that mark pinned pages and then keep them off the LRUs and 
limiting their number. Having pinned page would limit the ability to 
reclaim by the VM and make page migration, memory unplug etc impossible. 
It is better to have notifier scheme that allows to tell a device driver 
to free up the memory it has mapped.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 05/23] QEMU/KVM: return PCIDevice on net device init and record devfn

2008-03-04 Thread Marcelo Tosatti
Change the PCI network drivers init functions to return the PCIDev, to
inform which slot has been hot-plugged.

Also record devfn on the NICInfo structure to locate for release
on hot-removal.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/hw/e1000.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/e1000.c
+++ kvm-userspace.hotplug/qemu/hw/e1000.c
@@ -932,7 +932,7 @@ e1000_mmio_map(PCIDevice *pci_dev, int r
 cpu_register_physical_memory(addr, PNPMMIO_SIZE, d->mmio_index);
 }
 
-void
+PCIDevice *
 pci_e1000_init(PCIBus *bus, NICInfo *nd, int devfn)
 {
 E1000State *d;
@@ -996,4 +996,6 @@ pci_e1000_init(PCIBus *bus, NICInfo *nd,
  d->nd->macaddr[3], d->nd->macaddr[4], d->nd->macaddr[5]);
 
 register_savevm(info_str, d->instance, 1, nic_save, nic_load, d);
+
+return (PCIDevice *)d;
 }
Index: kvm-userspace.hotplug/qemu/hw/eepro100.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/eepro100.c
+++ kvm-userspace.hotplug/qemu/hw/eepro100.c
@@ -1742,7 +1742,7 @@ static void nic_save(QEMUFile * f, void 
 qemu_put_buffer(f, s->configuration, sizeof(s->configuration));
 }
 
-static void nic_init(PCIBus * bus, NICInfo * nd,
+static PCIDevice *nic_init(PCIBus * bus, NICInfo * nd,
  const char *name, uint32_t device)
 {
 PCIEEPRO100State *d;
@@ -1794,22 +1794,23 @@ static void nic_init(PCIBus * bus, NICIn
 
 /* XXX: instance number ? */
 register_savevm(name, 0, 3, nic_save, nic_load, s);
+return (PCIDevice *)d;
 }
 
-void pci_i82551_init(PCIBus * bus, NICInfo * nd, int devfn)
+PCIDevice *pci_i82551_init(PCIBus * bus, NICInfo * nd, int devfn)
 {
-nic_init(bus, nd, "i82551", i82551);
+return nic_init(bus, nd, "i82551", i82551);
 //~ uint8_t *pci_conf = d->dev.config;
 }
 
-void pci_i82557b_init(PCIBus * bus, NICInfo * nd, int devfn)
+PCIDevice *pci_i82557b_init(PCIBus * bus, NICInfo * nd, int devfn)
 {
-nic_init(bus, nd, "i82557b", i82557B);
+return nic_init(bus, nd, "i82557b", i82557B);
 }
 
-void pci_i82559er_init(PCIBus * bus, NICInfo * nd, int devfn)
+PCIDevice *pci_i82559er_init(PCIBus * bus, NICInfo * nd, int devfn)
 {
-nic_init(bus, nd, "i82559er", i82559ER);
+return nic_init(bus, nd, "i82559er", i82559ER);
 }
 
 /* eof */
Index: kvm-userspace.hotplug/qemu/hw/ne2000.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/ne2000.c
+++ kvm-userspace.hotplug/qemu/hw/ne2000.c
@@ -786,7 +786,7 @@ static void ne2000_map(PCIDevice *pci_de
 register_ioport_read(addr + 0x1f, 1, 1, ne2000_reset_ioport_read, s);
 }
 
-void pci_ne2000_init(PCIBus *bus, NICInfo *nd, int devfn)
+PCIDevice *pci_ne2000_init(PCIBus *bus, NICInfo *nd, int devfn)
 {
 PCINE2000State *d;
 NE2000State *s;
@@ -827,4 +827,6 @@ void pci_ne2000_init(PCIBus *bus, NICInf
 
 /* XXX: instance number ? */
 register_savevm("ne2000", ne2000_id++, 3, ne2000_save, ne2000_load, s);
+
+return (PCIDevice *)d;
 }
Index: kvm-userspace.hotplug/qemu/hw/pc.h
===
--- kvm-userspace.hotplug.orig/qemu/hw/pc.h
+++ kvm-userspace.hotplug/qemu/hw/pc.h
@@ -146,7 +146,7 @@ void isa_ne2000_init(int base, qemu_irq 
 
 /* virtio-net.c */
 
-void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn);
+PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn);
 void virtio_net_poll(void);
 
 /* virtio-blk.h */
Index: kvm-userspace.hotplug/qemu/hw/pci.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/pci.c
+++ kvm-userspace.hotplug/qemu/hw/pci.c
@@ -625,24 +625,26 @@ void pci_info(void)
 }
 
 /* Initialize a PCI NIC.  */
-void pci_nic_init(PCIBus *bus, NICInfo *nd, int devfn)
+PCIDevice *pci_nic_init(PCIBus *bus, NICInfo *nd, int devfn)
 {
+PCIDevice *pci_dev;
+
 if (strcmp(nd->model, "ne2k_pci") == 0) {
-pci_ne2000_init(bus, nd, devfn);
+pci_dev = pci_ne2000_init(bus, nd, devfn);
 } else if (strcmp(nd->model, "i82551") == 0) {
-pci_i82551_init(bus, nd, devfn);
+pci_dev = pci_i82551_init(bus, nd, devfn);
 } else if (strcmp(nd->model, "i82557b") == 0) {
-pci_i82557b_init(bus, nd, devfn);
+pci_dev = pci_i82557b_init(bus, nd, devfn);
 } else if (strcmp(nd->model, "i82559er") == 0) {
-pci_i82559er_init(bus, nd, devfn);
+pci_dev = pci_i82559er_init(bus, nd, devfn);
 } else if (strcmp(nd->model, "rtl8139") == 0) {
-pci_rtl8139_init(bus, nd, devfn);
+pci_dev = pci_rtl8139_init(bus, nd, devfn);
 } else if (strcmp(nd->model, "e1000") == 0) {
-pci_e1000_init(bus, nd, devfn);
+pci_dev = pci_e1000_init(bus, nd, devfn);
 } else if (strcmp(nd->model, "pcnet") == 0) {
-pci_pcnet_init(bus, nd, devfn);
+pci_dev = pci

[kvm-devel] [patch 20/23] QEMU/KVM: add pci_unregister_device

2008-03-04 Thread Marcelo Tosatti
Unregister the pci device, unassign its IO and memory regions, and free
associated data.

Add a callback so drivers can free device state.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/hw/pci.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/pci.c
+++ kvm-userspace.hotplug/qemu/hw/pci.c
@@ -185,6 +185,48 @@ PCIDevice *pci_register_device(PCIBus *b
 return pci_dev;
 }
 
+static target_phys_addr_t pci_to_cpu_addr(target_phys_addr_t addr)
+{
+return addr + pci_mem_base;
+}
+
+static void pci_unregister_io_regions(PCIDevice *pci_dev)
+{
+PCIIORegion *r;
+int i;
+
+for(i = 0; i < PCI_NUM_REGIONS; i++) {
+r = &pci_dev->io_regions[i];
+if (!r->size)
+continue;
+if (r->type == PCI_ADDRESS_SPACE_IO) {
+isa_unassign_ioport(r->addr, r->size);
+} else {
+cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
+ r->size,
+ IO_MEM_UNASSIGNED);
+}
+}
+}
+
+int pci_unregister_device(PCIDevice *pci_dev)
+{
+int ret = 0;
+
+if (pci_dev->unregister)
+ret = pci_dev->unregister(pci_dev);
+if (ret)
+return ret;
+
+pci_unregister_io_regions(pci_dev);
+
+qemu_free_irqs(pci_dev->irq);
+pci_irq_index--;
+pci_dev->bus->devices[pci_dev->devfn] = NULL;
+qemu_free(pci_dev);
+return 0;
+}
+
 void pci_register_io_region(PCIDevice *pci_dev, int region_num,
 uint32_t size, int type,
 PCIMapIORegionFunc *map_func)
@@ -207,10 +249,6 @@ void pci_register_io_region(PCIDevice *p
 *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
 }
 
-static target_phys_addr_t pci_to_cpu_addr(target_phys_addr_t addr)
-{
-return addr + pci_mem_base;
-}
 
 static void pci_update_mappings(PCIDevice *d)
 {
Index: kvm-userspace.hotplug/qemu/hw/pci.h
===
--- kvm-userspace.hotplug.orig/qemu/hw/pci.h
+++ kvm-userspace.hotplug/qemu/hw/pci.h
@@ -15,6 +15,7 @@ typedef uint32_t PCIConfigReadFunc(PCIDe
uint32_t address, int len);
 typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
 uint32_t addr, uint32_t size, int type);
+typedef int PCIUnregisterFunc(PCIDevice *pci_dev);
 
 #define PCI_ADDRESS_SPACE_MEM  0x00
 #define PCI_ADDRESS_SPACE_IO   0x01
@@ -56,6 +57,7 @@ struct PCIDevice {
 /* do not access the following fields */
 PCIConfigReadFunc *config_read;
 PCIConfigWriteFunc *config_write;
+PCIUnregisterFunc *unregister;
 /* ??? This is a PC-specific hack, and should be removed.  */
 int irq_index;
 
@@ -71,6 +73,8 @@ PCIDevice *pci_register_device(PCIBus *b
PCIConfigReadFunc *config_read,
PCIConfigWriteFunc *config_write);
 
+int pci_unregister_device(PCIDevice *pci_dev);
+
 void pci_register_io_region(PCIDevice *pci_dev, int region_num,
 uint32_t size, int type,
 PCIMapIORegionFunc *map_func);

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 17/23] QEMU/KVM: notify _EJ0 through _SEJ OperationRegion

2008-03-04 Thread Marcelo Tosatti
The _EJ0 method is executed by the OS once it has successfully finished
device removal. Inform that event through IO port space so QEMU 
can free the associated data.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/bios/acpi-dsdt.dsl
===
--- kvm-userspace.hotplug.orig/bios/acpi-dsdt.dsl
+++ kvm-userspace.hotplug/bios/acpi-dsdt.dsl
@@ -414,159 +414,258 @@ DefinitionBlock (
PCID, 32,
}
 
+OperationRegion(SEJ, SystemIO, 0xae08, 0x04)
+Field (SEJ, DWordAcc, NoLock, WriteAsZeros)
+{
+B0EJ, 32,
+}
+
 Device (S1) {  // Slot 1
Name (_ADR, 0x0001)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x2, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S2) {  // Slot 2
Name (_ADR, 0x0002)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x4, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S3) {  // Slot 3
Name (_ADR, 0x0003)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store (0x8, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S4) {  // Slot 4
Name (_ADR, 0x0004)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x10, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S5) {  // Slot 5
Name (_ADR, 0x0005)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x20, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S6) {  // Slot 6
Name (_ADR, 0x0006)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x40, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S7) {  // Slot 7
Name (_ADR, 0x0007)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x80, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S8) {  // Slot 8
Name (_ADR, 0x0008)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x100, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S9) {  // Slot 9
Name (_ADR, 0x0009)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x200, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S10) {  // Slot 10
Name (_ADR, 0x000A)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x400, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S11) {  // Slot 11
Name (_ADR, 0x000B)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x800, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S12) {  // Slot 12
Name (_ADR, 0x000C)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x1000, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S13) {  // Slot 13
Name (_ADR, 0x000D)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x2000, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S14) {  // Slot 14
Name (_ADR, 0x000E)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x4000, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S15) {  // Slot 15
Name (_ADR, 0x000F)
-   Method (_EJ0,1) { Return (0x0) }
+   Method (_EJ0,1) {
+Store(0x8000, B0EJ)
+Return (0x0)
+   }
 }
 
 Device (S16) {  // Slot 16
Name 

[kvm-devel] [patch 23/23] QEMU/KVM: device hot-remove

2008-03-04 Thread Marcelo Tosatti
Add monitor command to hot-remove devices.

Remove device data on _EJ0 notification.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/monitor.c
===
--- kvm-userspace.hotplug.orig/qemu/monitor.c
+++ kvm-userspace.hotplug/qemu/monitor.c
@@ -1355,6 +1355,7 @@ static term_cmd_t term_cmds[] = {
   "value", "set maximum speed (in bytes) for migrations" },
 { "cpu_set", "is", do_cpu_set_nr, "cpu [online|offline]", "change cpu 
state" },
 { "pci_add", "ss", device_hot_add, "nic|drive 
[vlan=n][,macaddr=addr][,model=type] 
[[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]", "hotadd PCI 
device" },
+{ "pci_remove", "i", device_hot_remove, "slot number", "hot remove PCI 
device" },
 { NULL, NULL, },
 };
 
Index: kvm-userspace.hotplug/qemu/hw/device-hotplug.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/device-hotplug.c
+++ kvm-userspace.hotplug/qemu/hw/device-hotplug.c
@@ -5,6 +5,8 @@
 #include "sysemu.h"
 #include "pc.h"
 #include "console.h"
+#include "block_int.h"
+#include 
 
 static PCIDevice *qemu_system_hot_add_nic(const char *opts, int bus_nr)
 {
@@ -90,3 +92,67 @@ void device_hot_add(const char *type, co
 else
 term_printf("failed to add %s\n", opts);
 }
+
+void device_hot_remove(int slot)
+{
+PCIDevice *d = pci_find_device(0, slot);
+
+if (!d) {
+term_printf("invalid slot %d\n", slot);
+return;
+}
+
+qemu_system_device_hot_add(slot, 0);
+}
+
+static void destroy_nic(int slot)
+{
+int i;
+
+for (i = 0; i < MAX_NICS; i++)
+if (nd_table[i].used &&
+PCI_SLOT(nd_table[i].devfn) == slot)
+net_client_uninit(&nd_table[i]);
+}
+
+static void destroy_bdrvs(int slot)
+{
+int i;
+struct BlockDriverState *bs;
+
+for (i = 0; i <= MAX_DRIVES; i++) {
+bs = drives_table[i].bdrv;
+if (bs && (PCI_SLOT(bs->devfn) == slot)) {
+drive_uninit(bs);
+bdrv_delete(bs);
+}
+}
+}
+
+/*
+ * OS has executed _EJ0 method, we now can remove the device
+ */
+void device_hot_remove_success(int slot)
+{
+PCIDevice *d = pci_find_device(0, slot);
+int class_code;
+
+if (!d) {
+term_printf("invalid slot %d\n", slot);
+return;
+}
+
+class_code = d->config_read(d, PCI_CLASS_DEVICE+1, 1);
+
+pci_unregister_device(d);
+
+switch(class_code) {
+case PCI_BASE_CLASS_STORAGE:
+destroy_bdrvs(slot);
+break;
+case PCI_BASE_CLASS_NETWORK:
+destroy_nic(slot);
+break;
+}
+
+}
Index: kvm-userspace.hotplug/qemu/sysemu.h
===
--- kvm-userspace.hotplug.orig/qemu/sysemu.h
+++ kvm-userspace.hotplug/qemu/sysemu.h
@@ -178,6 +178,8 @@ void qemu_system_device_hot_add(int slot
 
 /* device-hotplug */
 void device_hot_add(const char *type, const char *opts);
+void device_hot_remove(int slot);
+void device_hot_remove_success(int slot);
 
 /* vmchannel devices */
 
Index: kvm-userspace.hotplug/qemu/hw/acpi.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/acpi.c
+++ kvm-userspace.hotplug/qemu/hw/acpi.c
@@ -673,6 +673,8 @@ static void pciej_write(void *opaque, ui
 {
 int slot = ffs(val) - 1;
 
+device_hot_remove_success(slot);
+
 #if defined(DEBUG)
 printf("pciej write %lx <== %d\n", addr, val);
 #endif

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 18/23] QEMU/KVM: handle SEJ notifications

2008-03-04 Thread Marcelo Tosatti
Handle the _EJ0 notifications.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/hw/acpi.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/acpi.c
+++ kvm-userspace.hotplug/qemu/hw/acpi.c
@@ -26,6 +26,7 @@
 #ifdef USE_KVM
 #include "qemu-kvm.h"
 #endif
+#include "string.h"
 
 //#define DEBUG
 
@@ -539,6 +540,7 @@ void qemu_system_powerdown(void)
 #define GPE_BASE 0xafe0
 #define PROC_BASE 0xaf00
 #define PCI_BASE 0xae00
+#define PCI_EJ_BASE 0xae08
 
 struct gpe_regs {
 uint16_t sts; /* status */
@@ -659,6 +661,23 @@ static void pcihotplug_write(void *opaqu
 #endif
 }
 
+static uint32_t pciej_read(void *opaque, uint32_t addr)
+{
+#if defined(DEBUG)
+printf("pciej read %lx == %lx\n", addr, val);
+#endif
+return 0;
+}
+
+static void pciej_write(void *opaque, uint32_t addr, uint32_t val)
+{
+int slot = ffs(val) - 1;
+
+#if defined(DEBUG)
+printf("pciej write %lx <== %d\n", addr, val);
+#endif
+}
+
 
 static char *model;
 
@@ -673,6 +692,9 @@ void qemu_system_hot_add_init(char *cpu_
 register_ioport_write(PCI_BASE, 8, 4, pcihotplug_write, &pci0_status);
 register_ioport_read(PCI_BASE, 8, 4,  pcihotplug_read, &pci0_status);
 
+register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, NULL);
+register_ioport_read(PCI_EJ_BASE, 4, 4,  pciej_read, NULL);
+
 model = cpu_model;
 }
 

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 21/23] QEMU/KVM: LSI SCSI unregister callback

2008-03-04 Thread Marcelo Tosatti
Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/hw/lsi53c895a.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/lsi53c895a.c
+++ kvm-userspace.hotplug/qemu/hw/lsi53c895a.c
@@ -1849,6 +1849,18 @@ void lsi_scsi_attach(void *opaque, Block
 bd->devfn = s->pci_dev.devfn;
 }
 
+int lsi_scsi_uninit(PCIDevice *d)
+{
+LSIState *s = (LSIState *) d;
+
+cpu_unregister_io_memory(s->mmio_io_addr);
+cpu_unregister_io_memory(s->ram_io_addr);
+
+qemu_free(s->queue);
+
+return 0;
+}
+
 void *lsi_scsi_init(PCIBus *bus, int devfn)
 {
 LSIState *s;
@@ -1881,6 +1893,7 @@ void *lsi_scsi_init(PCIBus *bus, int dev
 s->queue = qemu_malloc(sizeof(lsi_queue));
 s->queue_len = 1;
 s->active_commands = 0;
+s->pci_dev.unregister = lsi_scsi_uninit;
 
 lsi_soft_reset(s);
 

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 22/23] QEMU/KVM: zero ioport_opaque on isa_unassign_ioport

2008-03-04 Thread Marcelo Tosatti
If the io port is unassigned, the previous private pointer is 
meaningless.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/vl.c
===
--- kvm-userspace.hotplug.orig/qemu/vl.c
+++ kvm-userspace.hotplug/qemu/vl.c
@@ -399,6 +399,8 @@ void isa_unassign_ioport(int start, int 
 ioport_write_table[0][i] = default_ioport_writeb;
 ioport_write_table[1][i] = default_ioport_writew;
 ioport_write_table[2][i] = default_ioport_writel;
+
+ioport_opaque[i] = NULL;
 }
 }
 

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 16/23] QEMU/KVM: add cpu_unregister_io_memory and make io mem table index dynamic

2008-03-04 Thread Marcelo Tosatti
So drivers can clear their mem io table entries on exit back to unassigned 
state.

Also make the io mem index allocation dynamic. 

Perhaps freeing the state created during cpu_register_physical_memory()
is also necessary.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/cpu-all.h
===
--- kvm-userspace.hotplug.orig/qemu/cpu-all.h
+++ kvm-userspace.hotplug/qemu/cpu-all.h
@@ -837,6 +837,7 @@ int cpu_register_io_memory(int io_index,
CPUReadMemoryFunc **mem_read,
CPUWriteMemoryFunc **mem_write,
void *opaque);
+void cpu_unregister_io_memory(int table_address);
 CPUWriteMemoryFunc **cpu_get_io_memory_write(int io_index);
 CPUReadMemoryFunc **cpu_get_io_memory_read(int io_index);
 
Index: kvm-userspace.hotplug/qemu/exec.c
===
--- kvm-userspace.hotplug.orig/qemu/exec.c
+++ kvm-userspace.hotplug/qemu/exec.c
@@ -158,7 +158,7 @@ PhysPageDesc **l1_phys_map;
 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
-static int io_mem_nb;
+char io_mem_used[IO_MEM_NB_ENTRIES];
 #if defined(CONFIG_SOFTMMU)
 static int io_mem_watch;
 #endif
@@ -2493,12 +2493,28 @@ static void *subpage_init (target_phys_a
 return mmio;
 }
 
+static int get_free_io_mem_idx(void)
+{
+int i;
+
+for (i = 0; i> IO_MEM_SHIFT, error_mem_read, 
unassigned_mem_write, NULL);
 cpu_register_io_memory(IO_MEM_UNASSIGNED >> IO_MEM_SHIFT, 
unassigned_mem_read, unassigned_mem_write, NULL);
 cpu_register_io_memory(IO_MEM_NOTDIRTY >> IO_MEM_SHIFT, error_mem_read, 
notdirty_mem_write, NULL);
-io_mem_nb = 5;
+for (i=0; i<5; i++)
+io_mem_used[i] = 0;
 
 #if defined(CONFIG_SOFTMMU)
 io_mem_watch = cpu_register_io_memory(-1, watch_mem_read,
@@ -2525,9 +2541,9 @@ int cpu_register_io_memory(int io_index,
 int i, subwidth = 0;
 
 if (io_index <= 0) {
-if (io_mem_nb >= IO_MEM_NB_ENTRIES)
-return -1;
-io_index = io_mem_nb++;
+io_index = get_free_io_mem_idx();
+if (io_index == -1)
+return io_index;
 } else {
 if (io_index >= IO_MEM_NB_ENTRIES)
 return -1;
@@ -2543,6 +2559,19 @@ int cpu_register_io_memory(int io_index,
 return (io_index << IO_MEM_SHIFT) | subwidth;
 }
 
+void cpu_unregister_io_memory(int io_table_address)
+{
+int i;
+int io_index = io_table_address >> IO_MEM_SHIFT;
+
+for (i=0;i < 3; i++) {
+io_mem_read[io_index][i] = unassigned_mem_read[i];
+io_mem_write[io_index][i] = unassigned_mem_write[i];
+}
+io_mem_opaque[io_index] = NULL;
+io_mem_used[io_index] = 0;
+}
+
 CPUWriteMemoryFunc **cpu_get_io_memory_write(int io_index)
 {
 return io_mem_write[io_index >> IO_MEM_SHIFT];

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 14/23] QEMU/KVM: device hot-add

2008-03-04 Thread Marcelo Tosatti
Add monitor command to hot-add PCI devices (nic and drive).

Save QEMUMachine necessary for drive_init.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/Makefile.target
===
--- kvm-userspace.hotplug.orig/qemu/Makefile.target
+++ kvm-userspace.hotplug/qemu/Makefile.target
@@ -576,6 +576,8 @@ OBJS+= hypercall.o
 # virtio devices
 OBJS += virtio.o virtio-net.o virtio-blk.o
 
+OBJS += device-hotplug.o
+
 ifeq ($(TARGET_BASE_ARCH), i386)
 # Hardware support
 OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
Index: kvm-userspace.hotplug/qemu/hw/device-hotplug.c
===
--- /dev/null
+++ kvm-userspace.hotplug/qemu/hw/device-hotplug.c
@@ -0,0 +1,92 @@
+#include "hw.h"
+#include "boards.h"
+#include "pci.h"
+#include "net.h"
+#include "sysemu.h"
+#include "pc.h"
+#include "console.h"
+
+static PCIDevice *qemu_system_hot_add_nic(const char *opts, int bus_nr)
+{
+   int ret;
+   char buf[4096];
+   PCIBus *pci_bus;
+
+   pci_bus = pci_find_bus (bus_nr);
+   if (!pci_bus) {
+  term_printf ("Can't find pci_bus %d\n", bus_nr);
+  return NULL;
+   }
+
+   memset (buf, 0, sizeof (buf));
+
+   strcpy (buf, "nic,");
+   strncat (buf, opts, sizeof (buf) - strlen (buf) - 1);
+
+   ret = net_client_init (buf);
+   if (ret < 0 || !nd_table[ret].model)
+  return NULL;
+   return pci_nic_init (pci_bus, &nd_table[ret], -1);
+}
+
+static PCIDevice *qemu_system_hot_add_drive(const char *opts, int bus_nr)
+{
+int drive_opt_idx, drive_idx;
+int type = 0;
+int bus = 0;
+void *opaque = NULL;
+PCIBus *pci_bus;
+
+pci_bus = pci_find_bus(bus_nr);
+if (!pci_bus) {
+term_printf("Can't find pci_bus %d\n", bus_nr);
+return NULL;
+}
+
+drive_opt_idx = drive_add(NULL, "%s", opts);
+if (!drive_opt_idx)
+return NULL;
+
+drive_idx = drive_init(&drives_opt[drive_opt_idx], 0, current_machine);
+if (drive_idx == -1) {
+drive_remove(drive_opt_idx);
+return NULL;
+}
+
+type = drives_table[drive_idx].type;
+bus = drive_get_max_bus (type);
+
+switch (type) {
+case IF_SCSI:
+  /* XXX: additional unit on existing device? */
+  opaque = lsi_scsi_init (pci_bus, -1);
+  lsi_scsi_attach (opaque, drives_table[drive_idx].bdrv,
+   drives_table[drive_idx].unit);
+  break;
+case IF_VIRTIO:
+  opaque = virtio_blk_init (pci_bus, 0x1AF4, 0x1001,
+drives_table[drive_idx].bdrv);
+  break;
+default:
+  term_printf ("type %d not a PCI device!\n", type);
+}
+
+return opaque;
+}
+
+void device_hot_add(const char *type, const char *opts)
+{
+PCIDevice *dev = NULL;
+
+if (strcmp(type, "nic") == 0)
+dev = qemu_system_hot_add_nic(opts, 0);
+else if (strcmp(type, "drive") == 0)
+dev = qemu_system_hot_add_drive(opts, 0);
+else
+term_printf("invalid type: %s\n", type);
+
+if (dev)
+qemu_system_device_hot_add(PCI_SLOT(dev->devfn), 1);
+else
+term_printf("failed to add %s\n", opts);
+}
Index: kvm-userspace.hotplug/qemu/monitor.c
===
--- kvm-userspace.hotplug.orig/qemu/monitor.c
+++ kvm-userspace.hotplug/qemu/monitor.c
@@ -1354,6 +1354,7 @@ static term_cmd_t term_cmds[] = {
 { "migrate_set_speed", "s", do_migrate_set_speed,
   "value", "set maximum speed (in bytes) for migrations" },
 { "cpu_set", "is", do_cpu_set_nr, "cpu [online|offline]", "change cpu 
state" },
+{ "pci_add", "ss", device_hot_add, "nic|drive 
[vlan=n][,macaddr=addr][,model=type] 
[[file=file][,if=type][,bus=n][,unit=m][,media=d][index=i]]", "hotadd PCI 
device" },
 { NULL, NULL, },
 };
 
Index: kvm-userspace.hotplug/qemu/hw/boards.h
===
--- kvm-userspace.hotplug.orig/qemu/hw/boards.h
+++ kvm-userspace.hotplug/qemu/hw/boards.h
@@ -19,6 +19,8 @@ typedef struct QEMUMachine {
 
 int qemu_register_machine(QEMUMachine *m);
 
+extern QEMUMachine *current_machine;
+
 /* Axis ETRAX.  */
 extern QEMUMachine bareetraxfs_machine;
 
Index: kvm-userspace.hotplug/qemu/sysemu.h
===
--- kvm-userspace.hotplug.orig/qemu/sysemu.h
+++ kvm-userspace.hotplug/qemu/sysemu.h
@@ -174,6 +174,10 @@ extern int drive_init(struct drive_opt *
 /* acpi */
 void qemu_system_cpu_hot_add(int cpu, int state);
 void qemu_system_hot_add_init(char *cpu_model);
+void qemu_system_device_hot_add(int slot, int state);
+
+/* device-hotplug */
+void device_hot_add(const char *type, const char *opts);
 
 /* vmchannel devices */
 
Index: kvm-userspace.hotplug/qemu/vl.c
===
--- kvm-userspace.hotplug.orig/qemu/vl.c
+++ kvm-userspace.hotplug/qemu/vl.c
@@ -7573,

[kvm-devel] [patch 15/23] QEMU/KVM: add pci_find_device

2008-03-04 Thread Marcelo Tosatti
Return PCIDevice from bus number and slot.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/hw/pci.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/pci.c
+++ kvm-userspace.hotplug/qemu/hw/pci.c
@@ -689,6 +689,23 @@ PCIBus *pci_find_bus(int bus_num)
 return bus;
 }
 
+PCIDevice *pci_find_device(int bus_num, int slot)
+{
+int devfn;
+PCIDevice *d;
+PCIBus *bus = pci_find_bus(bus_num);
+
+if (!bus)
+return NULL;
+
+for(devfn = 0; devfn < 256; devfn++) {
+d = bus->devices[devfn];
+if (d && PCI_SLOT(devfn) == slot)
+return d;
+}
+return NULL;
+}
+
 PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint32_t id,
 pci_map_irq_fn map_irq, const char *name)
 {
Index: kvm-userspace.hotplug/qemu/hw/pci.h
===
--- kvm-userspace.hotplug.orig/qemu/hw/pci.h
+++ kvm-userspace.hotplug/qemu/hw/pci.h
@@ -93,6 +93,7 @@ uint32_t pci_data_read(void *opaque, uin
 int pci_bus_num(PCIBus *s);
 void pci_for_each_device(int bus_num, void (*fn)(PCIDevice *d));
 PCIBus *pci_find_bus(int bus_num);
+PCIDevice *pci_find_device(int bus_num, int slot);
 
 void pci_info(void);
 PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint32_t id,

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 13/23] QEMU/KVM: add net_client_uninit

2008-03-04 Thread Marcelo Tosatti
Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/net.h
===
--- kvm-userspace.hotplug.orig/qemu/net.h
+++ kvm-userspace.hotplug/qemu/net.h
@@ -38,6 +38,7 @@ void do_info_network(void);
 int hack_around_tap(void *opaque);
 
 int net_client_init(const char *str);
+void net_client_uninit(NICInfo *nd);
 
 /* NIC info */
 
Index: kvm-userspace.hotplug/qemu/vl.c
===
--- kvm-userspace.hotplug.orig/qemu/vl.c
+++ kvm-userspace.hotplug/qemu/vl.c
@@ -4900,6 +4900,14 @@ int net_client_init(const char *str)
 return ret;
 }
 
+void net_client_uninit(NICInfo *nd)
+{
+nd->vlan->nb_guest_devs--; /* XXX: free vlan on last reference */
+nb_nics--;
+nd->used = 0;
+free(nd->model);
+}
+
 void do_info_network(void)
 {
 VLANState *vlan;

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 12/23] QEMU/KVM: net/drive add/remove tweaks

2008-03-04 Thread Marcelo Tosatti
Export net/drive add/remove functions for device hotplug usage.

Return the table index on add.

Return failure instead of exiting if limit has been reached 
on drive_add.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/vl.c
===
--- kvm-userspace.hotplug.orig/qemu/vl.c
+++ kvm-userspace.hotplug/qemu/vl.c
@@ -4760,7 +4760,7 @@ static int nic_get_free_idx(void)
 return -1;
 }
 
-static int net_client_init(const char *str)
+int net_client_init(const char *str)
 {
 const char *p;
 char *q;
@@ -4819,7 +4819,7 @@ static int net_client_init(const char *s
 nd->used = 1;
 nb_nics++;
 vlan->nb_guest_devs++;
-ret = 0;
+ret = idx;
 } else
 if (!strcmp(device, "none")) {
 /* does nothing. It is needed to signal that no network cards
@@ -4949,14 +4949,14 @@ static int drive_get_free_idx(void)
 return -1;
 }
 
-static int drive_add(const char *file, const char *fmt, ...)
+int drive_add(const char *file, const char *fmt, ...)
 {
 va_list ap;
 int index = drive_opt_get_free_idx();
 
 if (nb_drives_opt >= MAX_DRIVES || index == -1) {
 fprintf(stderr, "qemu: too many drives\n");
-exit(1);
+return -1;
 }
 
 drives_opt[index].file = file;
@@ -5019,9 +5019,10 @@ void drive_uninit(BlockDriverState *bdrv
 }
 }
 
-static int drive_init(struct drive_opt *arg, int snapshot,
-  QEMUMachine *machine)
+int drive_init(struct drive_opt *arg, int snapshot,
+  void *opaque)
 {
+QEMUMachine *machine = opaque;
 char buf[128];
 char file[1024];
 char devname[128];
@@ -5274,7 +5275,7 @@ static int drive_init(struct drive_opt *
  */
 
 if (drive_get_index(type, bus_id, unit_id) != -1)
-return 0;
+return -2;
 
 /* init */
 
@@ -5322,7 +5323,7 @@ static int drive_init(struct drive_opt *
 break;
 }
 if (!file[0])
-return 0;
+return -2;
 bdrv_flags = 0;
 if (snapshot)
 bdrv_flags |= BDRV_O_SNAPSHOT;
@@ -5333,7 +5334,7 @@ static int drive_init(struct drive_opt *
 file);
 return -1;
 }
-return 0;
+return drives_table_idx;
 }
 
 /***/
Index: kvm-userspace.hotplug/qemu/net.h
===
--- kvm-userspace.hotplug.orig/qemu/net.h
+++ kvm-userspace.hotplug/qemu/net.h
@@ -37,6 +37,8 @@ void do_info_network(void);
 /* virtio hack for zero copy receive */
 int hack_around_tap(void *opaque);
 
+int net_client_init(const char *str);
+
 /* NIC info */
 
 #define MAX_NICS 8
Index: kvm-userspace.hotplug/qemu/sysemu.h
===
--- kvm-userspace.hotplug.orig/qemu/sysemu.h
+++ kvm-userspace.hotplug/qemu/sysemu.h
@@ -168,6 +168,9 @@ struct drive_opt {
 extern struct drive_opt drives_opt[MAX_DRIVES];
 extern int nb_drives_opt;
 
+extern int drive_add(const char *file, const char *fmt, ...);
+extern int drive_init(struct drive_opt *arg, int snapshot, void *machine);
+
 /* acpi */
 void qemu_system_cpu_hot_add(int cpu, int state);
 void qemu_system_hot_add_init(char *cpu_model);

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 10/23] QEMU/KVM: record devfn on block driver instance

2008-03-04 Thread Marcelo Tosatti
Record devfn on the BlockDriverState structure to locate for release 
on hot-removal.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/block_int.h
===
--- kvm-userspace.hotplug.orig/qemu/block_int.h
+++ kvm-userspace.hotplug/qemu/block_int.h
@@ -129,6 +129,8 @@ struct BlockDriverState {
 int cyls, heads, secs, translation;
 int type;
 char device_name[32];
+/* PCI devfn of parent */
+int devfn;
 BlockDriverState *next;
 };
 
Index: kvm-userspace.hotplug/qemu/hw/lsi53c895a.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/lsi53c895a.c
+++ kvm-userspace.hotplug/qemu/hw/lsi53c895a.c
@@ -13,6 +13,7 @@
 #include "hw.h"
 #include "pci.h"
 #include "scsi-disk.h"
+#include "block_int.h"
 
 //#define DEBUG_LSI
 //#define DEBUG_LSI_REG
@@ -1845,6 +1846,7 @@ void lsi_scsi_attach(void *opaque, Block
 s->scsi_dev[id] = scsi_generic_init(bd, 1, lsi_command_complete, s);
 if (s->scsi_dev[id] == NULL)
 s->scsi_dev[id] = scsi_disk_init(bd, 1, lsi_command_complete, s);
+bd->devfn = s->pci_dev.devfn;
 }
 
 void *lsi_scsi_init(PCIBus *bus, int devfn)
Index: kvm-userspace.hotplug/qemu/hw/virtio-blk.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/virtio-blk.c
+++ kvm-userspace.hotplug/qemu/hw/virtio-blk.c
@@ -13,6 +13,7 @@
 
 #include "virtio.h"
 #include "block.h"
+#include "block_int.h"
 #include "pc.h"
 
 /* from Linux's linux/virtio_blk.h */
@@ -156,6 +157,7 @@ void *virtio_blk_init(PCIBus *bus, uint1
 s->vdev.update_config = virtio_blk_update_config;
 s->vdev.get_features = virtio_blk_get_features;
 s->bs = bs;
+bs->devfn = s->vdev.pci_dev.devfn;
 
 virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
 
Index: kvm-userspace.hotplug/qemu/hw/ide.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/ide.c
+++ kvm-userspace.hotplug/qemu/hw/ide.c
@@ -28,6 +28,7 @@
 #include "scsi-disk.h"
 #include "pcmcia.h"
 #include "block.h"
+#include "block_int.h"
 #include "qemu-timer.h"
 #include "sysemu.h"
 #include "ppc_mac.h"
@@ -2938,6 +2939,7 @@ void pci_piix3_ide_init(PCIBus *bus, Blo
 {
 PCIIDEState *d;
 uint8_t *pci_conf;
+int i;
 
 /* register a function 1 of PIIX3 */
 d = (PCIIDEState *)pci_register_device(bus, "PIIX3 IDE",
@@ -2966,6 +2968,10 @@ void pci_piix3_ide_init(PCIBus *bus, Blo
 ide_init_ioport(&d->ide_if[0], 0x1f0, 0x3f6);
 ide_init_ioport(&d->ide_if[2], 0x170, 0x376);
 
+for (i = 0; i < 4; i++)
+if (hd_table[i])
+hd_table[i]->devfn = d->dev.devfn;
+
 register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d);
 }
 

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 11/23] QEMU/KVM: move drives_opt for external use

2008-03-04 Thread Marcelo Tosatti
Device hotplug will use that structure from a separate
file.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/sysemu.h
===
--- kvm-userspace.hotplug.orig/qemu/sysemu.h
+++ kvm-userspace.hotplug/qemu/sysemu.h
@@ -159,6 +159,15 @@ extern int drive_get_max_bus(BlockInterf
 extern void drive_uninit(BlockDriverState *bdrv);
 extern void drive_remove(int index);
 
+struct drive_opt {
+const char *file;
+char opt[1024];
+int used;
+};
+
+extern struct drive_opt drives_opt[MAX_DRIVES];
+extern int nb_drives_opt;
+
 /* acpi */
 void qemu_system_cpu_hot_add(int cpu, int state);
 void qemu_system_hot_add_init(char *cpu_model);
Index: kvm-userspace.hotplug/qemu/vl.c
===
--- kvm-userspace.hotplug.orig/qemu/vl.c
+++ kvm-userspace.hotplug/qemu/vl.c
@@ -251,11 +251,7 @@ unsigned int nb_prom_envs = 0;
 const char *prom_envs[MAX_PROM_ENVS];
 #endif
 int nb_drives_opt;
-struct drive_opt {
-const char *file;
-char opt[1024];
-int used;
-} drives_opt[MAX_DRIVES];
+struct drive_opt drives_opt[MAX_DRIVES];
 
 static CPUState *cur_cpu;
 static CPUState *next_cpu;

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 08/23] QEMU/KVM: dynamic nic info index allocation

2008-03-04 Thread Marcelo Tosatti
The same, but for nics.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/net.h
===
--- kvm-userspace.hotplug.orig/qemu/net.h
+++ kvm-userspace.hotplug/qemu/net.h
@@ -46,6 +46,7 @@ struct NICInfo {
 const char *model;
 VLANState *vlan;
 int devfn;
+int used;
 };
 
 extern int nb_nics;
Index: kvm-userspace.hotplug/qemu/vl.c
===
--- kvm-userspace.hotplug.orig/qemu/vl.c
+++ kvm-userspace.hotplug/qemu/vl.c
@@ -4754,6 +4754,15 @@ static int check_params(char *buf, int b
 return 0;
 }
 
+static int nic_get_free_idx(void)
+{
+int index;
+
+for (index = 0; index < MAX_NICS; index++)
+if (!nd_table[index].used)
+return index;
+return -1;
+}
 
 static int net_client_init(const char *str)
 {
@@ -4786,19 +4795,20 @@ static int net_client_init(const char *s
 if (!strcmp(device, "nic")) {
 NICInfo *nd;
 uint8_t *macaddr;
+int idx = nic_get_free_idx();
 
-if (nb_nics >= MAX_NICS) {
+if (idx == -1 || nb_nics >= MAX_NICS) {
 fprintf(stderr, "Too Many NICs\n");
 return -1;
 }
-nd = &nd_table[nb_nics];
+nd = &nd_table[idx];
 macaddr = nd->macaddr;
 macaddr[0] = 0x52;
 macaddr[1] = 0x54;
 macaddr[2] = 0x00;
 macaddr[3] = 0x12;
 macaddr[4] = 0x34;
-macaddr[5] = 0x56 + nb_nics;
+macaddr[5] = 0x56 + idx;
 
 if (get_param_value(buf, sizeof(buf), "macaddr", p)) {
 if (parse_macaddr(macaddr, buf) < 0) {
@@ -4810,6 +4820,7 @@ static int net_client_init(const char *s
 nd->model = strdup(buf);
 }
 nd->vlan = vlan;
+nd->used = 1;
 nb_nics++;
 vlan->nb_guest_devs++;
 ret = 0;

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 09/23] QEMU/KVM: drive removal support

2008-03-04 Thread Marcelo Tosatti
To be used by hot-remove.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/vl.c
===
--- kvm-userspace.hotplug.orig/qemu/vl.c
+++ kvm-userspace.hotplug/qemu/vl.c
@@ -4972,6 +4972,12 @@ static int drive_add(const char *file, c
 return index;
 }
 
+void drive_remove(int index)
+{
+drives_opt[index].used = 0;
+nb_drives_opt--;
+}
+
 int drive_get_index(BlockInterfaceType type, int bus, int unit)
 {
 int index;
@@ -5002,6 +5008,20 @@ int drive_get_max_bus(BlockInterfaceType
 return max_bus;
 }
 
+void drive_uninit(BlockDriverState *bdrv)
+{
+int i;
+
+for (i = 0; i < MAX_DRIVES; i++)
+if (drives_table[i].bdrv == bdrv) {
+drives_table[i].bdrv = NULL;
+drives_table[i].used = 0;
+drive_remove(drives_table[i].drive_opt_idx);
+nb_drives--;
+break;
+}
+}
+
 static int drive_init(struct drive_opt *arg, int snapshot,
   QEMUMachine *machine)
 {
@@ -5275,6 +5295,7 @@ static int drive_init(struct drive_opt *
 drives_table[drives_table_idx].type = type;
 drives_table[drives_table_idx].bus = bus_id;
 drives_table[drives_table_idx].unit = unit_id;
+drives_table[drives_table_idx].drive_opt_idx = arg - drives_opt;
 nb_drives++;
 
 switch(type) {
Index: kvm-userspace.hotplug/qemu/sysemu.h
===
--- kvm-userspace.hotplug.orig/qemu/sysemu.h
+++ kvm-userspace.hotplug/qemu/sysemu.h
@@ -142,6 +142,7 @@ typedef struct DriveInfo {
 int bus;
 int unit;
 int used;
+int drive_opt_idx;
 } DriveInfo;
 
 #define MAX_IDE_DEVS   2
@@ -155,6 +156,9 @@ int extboot_drive;
 extern int drive_get_index(BlockInterfaceType type, int bus, int unit);
 extern int drive_get_max_bus(BlockInterfaceType type);
 
+extern void drive_uninit(BlockDriverState *bdrv);
+extern void drive_remove(int index);
+
 /* acpi */
 void qemu_system_cpu_hot_add(int cpu, int state);
 void qemu_system_hot_add_init(char *cpu_model);

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 07/23] QEMU/KVM: dynamic drive/drive_opt index allocation

2008-03-04 Thread Marcelo Tosatti
Dynamically allocate drive options and drive table index, so to 
reused indexes when devices are removed.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/sysemu.h
===
--- kvm-userspace.hotplug.orig/qemu/sysemu.h
+++ kvm-userspace.hotplug/qemu/sysemu.h
@@ -141,6 +141,7 @@ typedef struct DriveInfo {
 BlockInterfaceType type;
 int bus;
 int unit;
+int used;
 } DriveInfo;
 
 #define MAX_IDE_DEVS   2
Index: kvm-userspace.hotplug/qemu/vl.c
===
--- kvm-userspace.hotplug.orig/qemu/vl.c
+++ kvm-userspace.hotplug/qemu/vl.c
@@ -254,6 +254,7 @@ int nb_drives_opt;
 struct drive_opt {
 const char *file;
 char opt[1024];
+int used;
 } drives_opt[MAX_DRIVES];
 
 static CPUState *cur_cpu;
@@ -4915,22 +4916,50 @@ void do_info_network(void)
 #define MTD_ALIAS "if=mtd"
 #define SD_ALIAS "index=0,if=sd"
 
+static int drive_opt_get_free_idx(void)
+{
+int index;
+
+for (index = 0; index < MAX_DRIVES; index++)
+if (!drives_opt[index].used) {
+drives_opt[index].used = 1;
+return index;
+}
+
+return -1;
+}
+
+static int drive_get_free_idx(void)
+{
+int index;
+
+for (index = 0; index < MAX_DRIVES; index++)
+if (!drives_table[index].used) {
+drives_table[index].used = 1;
+return index;
+}
+
+return -1;
+}
+
 static int drive_add(const char *file, const char *fmt, ...)
 {
 va_list ap;
+int index = drive_opt_get_free_idx();
 
-if (nb_drives_opt >= MAX_DRIVES) {
+if (nb_drives_opt >= MAX_DRIVES || index == -1) {
 fprintf(stderr, "qemu: too many drives\n");
 exit(1);
 }
 
-drives_opt[nb_drives_opt].file = file;
+drives_opt[index].file = file;
 va_start(ap, fmt);
-vsnprintf(drives_opt[nb_drives_opt].opt,
+vsnprintf(drives_opt[index].opt,
   sizeof(drives_opt[0].opt), fmt, ap);
 va_end(ap);
 
-return nb_drives_opt++;
+nb_drives_opt++;
+return index;
 }
 
 int drive_get_index(BlockInterfaceType type, int bus, int unit)
@@ -4939,10 +4968,11 @@ int drive_get_index(BlockInterfaceType t
 
 /* seek interface, bus and unit */
 
-for (index = 0; index < nb_drives; index++)
+for (index = 0; index < MAX_DRIVES; index++)
 if (drives_table[index].type == type &&
drives_table[index].bus == bus &&
-   drives_table[index].unit == unit)
+   drives_table[index].unit == unit &&
+   drives_table[index].used)
 return index;
 
 return -1;
@@ -4978,6 +5008,7 @@ static int drive_init(struct drive_opt *
 int index;
 int cache;
 int bdrv_flags;
+int drives_table_idx;
 char *str = arg->opt;
 char *params[] = { "bus", "unit", "if", "index", "cyls", "heads",
"secs", "trans", "media", "snapshot", "file",
@@ -5229,10 +5260,11 @@ static int drive_init(struct drive_opt *
 snprintf(buf, sizeof(buf), "%s%s%i",
  devname, mediastr, unit_id);
 bdrv = bdrv_new(buf);
-drives_table[nb_drives].bdrv = bdrv;
-drives_table[nb_drives].type = type;
-drives_table[nb_drives].bus = bus_id;
-drives_table[nb_drives].unit = unit_id;
+drives_table_idx = drive_get_free_idx();
+drives_table[drives_table_idx].bdrv = bdrv;
+drives_table[drives_table_idx].type = type;
+drives_table[drives_table_idx].bus = bus_id;
+drives_table[drives_table_idx].unit = unit_id;
 nb_drives++;
 
 switch(type) {
@@ -9524,8 +9556,10 @@ int main(int argc, char **argv)
 if (nb_drives_opt < MAX_DRIVES)
 drive_add(NULL, SD_ALIAS);
 
-/* open the virtual block devices */
-
+/* open the virtual block devices
+ * note that migration with device
+ * hot add/remove is broken.
+ */
 for(i = 0; i < nb_drives_opt; i++)
 if (drive_init(&drives_opt[i], snapshot, machine) == -1)
exit(1);

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 06/23] QEMU/KVM: pci hotplug GPE support

2008-03-04 Thread Marcelo Tosatti
Enable the corresponding bit on the PCIST region and trigger the SCI.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/hw/acpi.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/acpi.c
+++ kvm-userspace.hotplug/qemu/hw/acpi.c
@@ -538,6 +538,7 @@ void qemu_system_powerdown(void)
 #endif
 #define GPE_BASE 0xafe0
 #define PROC_BASE 0xaf00
+#define PCI_BASE 0xae00
 
 struct gpe_regs {
 uint16_t sts; /* status */
@@ -546,7 +547,13 @@ struct gpe_regs {
 uint8_t down;
 };
 
+struct pci_status {
+uint32_t up;
+uint32_t down;
+};
+
 static struct gpe_regs gpe;
+static struct pci_status pci0_status;
 
 static uint32_t gpe_readb(void *opaque, uint32_t addr)
 {
@@ -614,6 +621,45 @@ static void gpe_writeb(void *opaque, uin
 #endif
 }
 
+static uint32_t pcihotplug_read(void *opaque, uint32_t addr)
+{
+uint32_t val = 0;
+struct pci_status *g = opaque;
+switch (addr) {
+case PCI_BASE:
+val = g->up;
+break;
+case PCI_BASE + 4:
+val = g->down;
+break;
+default:
+break;
+}
+
+#if defined(DEBUG)
+printf("pcihotplug read %lx == %lx\n", addr, val);
+#endif
+return val;
+}
+
+static void pcihotplug_write(void *opaque, uint32_t addr, uint32_t val)
+{
+struct pci_status *g = opaque;
+switch (addr) {
+case PCI_BASE:
+g->up = val;
+break;
+case PCI_BASE + 4:
+g->down = val;
+break;
+   }
+
+#if defined(DEBUG)
+printf("pcihotplug write %lx <== %d\n", addr, val);
+#endif
+}
+
+
 static char *model;
 
 void qemu_system_hot_add_init(char *cpu_model)
@@ -624,6 +670,9 @@ void qemu_system_hot_add_init(char *cpu_
 register_ioport_write(PROC_BASE, 4, 1, gpe_writeb, &gpe);
 register_ioport_read(PROC_BASE, 4, 1,  gpe_readb, &gpe);
 
+register_ioport_write(PCI_BASE, 8, 4, pcihotplug_write, &pci0_status);
+register_ioport_read(PCI_BASE, 8, 4,  pcihotplug_read, &pci0_status);
+
 model = cpu_model;
 }
 
@@ -665,3 +714,29 @@ void qemu_system_cpu_hot_add(int cpu, in
 disable_processor(&gpe, cpu);
 qemu_set_irq(pm_state->irq, 0);
 }
+
+static void enable_device(struct pci_status *p, struct gpe_regs *g, int slot)
+{
+g->sts |= 2;
+g->en |= 2;
+p->up |= (1 << slot);
+}
+
+static void disable_device(struct pci_status *p, struct gpe_regs *g, int slot)
+{
+g->sts |= 2;
+g->en |= 2;
+p->down |= (1 << slot);
+}
+
+void qemu_system_device_hot_add(int slot, int state)
+{
+qemu_set_irq(pm_state->irq, 1);
+pci0_status.up = 0;
+pci0_status.down = 0;
+if (state)
+enable_device(&pci0_status, &gpe, slot);
+else
+disable_device(&pci0_status, &gpe, slot);
+qemu_set_irq(pm_state->irq, 0);
+}

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 04/23] QEMU/KVM: add pci_find_bus

2008-03-04 Thread Marcelo Tosatti
Return PCIBus pointer from bus number integer.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/qemu/hw/pci.c
===
--- kvm-userspace.hotplug.orig/qemu/hw/pci.c
+++ kvm-userspace.hotplug/qemu/hw/pci.c
@@ -675,6 +675,16 @@ static void pci_bridge_write_config(PCID
 pci_default_write_config(d, address, val, len);
 }
 
+PCIBus *pci_find_bus(int bus_num)
+{
+PCIBus *bus = first_bus;
+
+while (bus && bus->bus_num != bus_num)
+bus = bus->next;
+
+return bus;
+}
+
 PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint32_t id,
 pci_map_irq_fn map_irq, const char *name)
 {
Index: kvm-userspace.hotplug/qemu/hw/pci.h
===
--- kvm-userspace.hotplug.orig/qemu/hw/pci.h
+++ kvm-userspace.hotplug/qemu/hw/pci.h
@@ -3,6 +3,7 @@
 
 /* PCI includes legacy ISA access.  */
 #include "isa.h"
+#include 
 
 /* PCI bus */
 
@@ -91,6 +92,7 @@ void pci_data_write(void *opaque, uint32
 uint32_t pci_data_read(void *opaque, uint32_t addr, int len);
 int pci_bus_num(PCIBus *s);
 void pci_for_each_device(int bus_num, void (*fn)(PCIDevice *d));
+PCIBus *pci_find_bus(int bus_num);
 
 void pci_info(void);
 PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint32_t id,

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch 02/23] QEMU/KVM: add devices to represent PCI slots with _EJ0 method

2008-03-04 Thread Marcelo Tosatti
Presence of _EJ0 method indicates that slots are hot-pluggable.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/bios/acpi-dsdt.dsl
===
--- kvm-userspace.hotplug.orig/bios/acpi-dsdt.dsl
+++ kvm-userspace.hotplug/bios/acpi-dsdt.dsl
@@ -407,6 +407,161 @@ DefinitionBlock (
 Package() {0x001f, 2, LNKB, 0},
 })
 
+Device (S1) {  // Slot 1
+   Name (_ADR, 0x0001)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S2) {  // Slot 2
+   Name (_ADR, 0x0002)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S3) {  // Slot 3
+   Name (_ADR, 0x0003)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S4) {  // Slot 4
+   Name (_ADR, 0x0004)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S5) {  // Slot 5
+   Name (_ADR, 0x0005)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S6) {  // Slot 6
+   Name (_ADR, 0x0006)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S7) {  // Slot 7
+   Name (_ADR, 0x0007)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S8) {  // Slot 8
+   Name (_ADR, 0x0008)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S9) {  // Slot 9
+   Name (_ADR, 0x0009)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S10) {  // Slot 10
+   Name (_ADR, 0x000A)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S11) {  // Slot 11
+   Name (_ADR, 0x000B)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S12) {  // Slot 12
+   Name (_ADR, 0x000C)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S13) {  // Slot 13
+   Name (_ADR, 0x000D)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S14) {  // Slot 14
+   Name (_ADR, 0x000E)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S15) {  // Slot 15
+   Name (_ADR, 0x000F)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S16) {  // Slot 16
+   Name (_ADR, 0x0010)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S17) {  // Slot 17
+   Name (_ADR, 0x0011)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S18) {  // Slot 18
+   Name (_ADR, 0x0012)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S19) {  // Slot 19
+   Name (_ADR, 0x0013)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S20) {  // Slot 20
+   Name (_ADR, 0x0014)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S21) {  // Slot 21
+   Name (_ADR, 0x0015)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S22) {  // Slot 22
+   Name (_ADR, 0x0016)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S23) {  // Slot 23
+   Name (_ADR, 0x0017)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S24) {  // Slot 24
+   Name (_ADR, 0x0018)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S25) {  // Slot 25
+   Name (_ADR, 0x0019)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S26) {  // Slot 26
+   Name (_ADR, 0x001A)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S27) {  // Slot 27
+   Name (_ADR, 0x001B)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S28) {  // Slot 28
+   Name (_ADR, 0x001C)
+   Method (_EJ0,1) { Return (0x0) }
+}
+
+Device (S29) {  // Slot 29
+   Name (_ADR, 0x001D)
+   Method (_EJ0,1) { Return (0x0) }
+

[kvm-devel] [patch 01/23] QEMU/KVM: add PCI IRQ routing information up to slot 32

2008-03-04 Thread Marcelo Tosatti
Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/bios/acpi-dsdt.dsl
===
--- kvm-userspace.hotplug.orig/bios/acpi-dsdt.dsl
+++ kvm-userspace.hotplug/bios/acpi-dsdt.dsl
@@ -249,6 +249,162 @@ DefinitionBlock (
 Package() {0x0005, 1, LNKB, 0},
 Package() {0x0005, 2, LNKC, 0},
 Package() {0x0005, 3, LNKD, 0},
+
+// PCI Slot 6
+Package() {0x0006, 0, LNKB, 0},
+Package() {0x0006, 1, LNKC, 0},
+Package() {0x0006, 2, LNKD, 0},
+Package() {0x0006, 3, LNKA, 0},
+
+// PCI Slot 7
+Package() {0x0007, 0, LNKC, 0},
+Package() {0x0007, 1, LNKD, 0},
+Package() {0x0007, 2, LNKA, 0},
+Package() {0x0007, 3, LNKB, 0},
+
+// PCI Slot 8
+Package() {0x0008, 0, LNKD, 0},
+Package() {0x0008, 1, LNKA, 0},
+Package() {0x0008, 2, LNKB, 0},
+Package() {0x0008, 3, LNKC, 0},
+
+// PCI Slot 9
+Package() {0x0009, 0, LNKA, 0},
+Package() {0x0009, 1, LNKB, 0},
+Package() {0x0009, 2, LNKC, 0},
+Package() {0x0009, 3, LNKD, 0},
+
+// PCI Slot 10
+Package() {0x000a, 0, LNKB, 0},
+Package() {0x000a, 1, LNKC, 0},
+Package() {0x000a, 2, LNKD, 0},
+Package() {0x000a, 3, LNKA, 0},
+
+// PCI Slot 11
+Package() {0x000b, 0, LNKC, 0},
+Package() {0x000b, 1, LNKD, 0},
+Package() {0x000b, 2, LNKA, 0},
+Package() {0x000b, 3, LNKB, 0},
+
+// PCI Slot 12
+Package() {0x000c, 0, LNKD, 0},
+Package() {0x000c, 1, LNKA, 0},
+Package() {0x000c, 2, LNKB, 0},
+Package() {0x000c, 3, LNKC, 0},
+
+// PCI Slot 13
+Package() {0x000d, 0, LNKA, 0},
+Package() {0x000d, 1, LNKB, 0},
+Package() {0x000d, 2, LNKC, 0},
+Package() {0x000d, 3, LNKD, 0},
+
+// PCI Slot 14
+Package() {0x000e, 0, LNKB, 0},
+Package() {0x000e, 1, LNKC, 0},
+Package() {0x000e, 2, LNKD, 0},
+Package() {0x000e, 3, LNKA, 0},
+
+// PCI Slot 15
+Package() {0x000f, 0, LNKC, 0},
+Package() {0x000f, 1, LNKD, 0},
+Package() {0x000f, 2, LNKA, 0},
+Package() {0x000f, 3, LNKB, 0},
+
+// PCI Slot 16
+Package() {0x0010, 0, LNKD, 0},
+Package() {0x0010, 1, LNKA, 0},
+Package() {0x0010, 2, LNKB, 0},
+Package() {0x0010, 3, LNKC, 0},
+
+// PCI Slot 17
+Package() {0x0011, 0, LNKA, 0},
+Package() {0x0011, 1, LNKB, 0},
+Package() {0x0011, 2, LNKC, 0},
+Package() {0x0011, 3, LNKD, 0},
+
+// PCI Slot 18
+Package() {0x0012, 0, LNKB, 0},
+Package() {0x0012, 1, LNKC, 0},
+Package() {0x0012, 2, LNKD, 0},
+Package() {0x0012, 3, LNKA, 0},
+
+// PCI Slot 19
+Package() {0x0013, 0, LNKC, 0},
+Package() {0x0013, 1, LNKD, 0},
+Package() {0x0013, 2, LNKA, 0},
+Package() {0x0013, 3, LNKB, 0},
+
+// PCI Slot 20
+Package() {0x0014, 0, LNKD, 0},
+Package() {0x0014, 1, LNKA, 0},
+Package() {0x0014, 2, LNKB, 0},
+Package() {0x0014, 3, LNKC, 0},
+
+// PCI Slot 21
+Package() {0x0015, 0, LNKA, 0},
+Package() {0x0015, 1, LNKB, 0},
+Package() {0x0015, 2, LNKC, 0},
+Package() {0x0015, 3, LNKD, 0},
+
+// PCI Slot 22
+Package() {0x0016, 0, LNKB, 0},
+Package() {0x0016, 1, LNKC, 0},
+Package() {0x0016, 2, LNKD, 0},
+Package() {0x0016, 3, LNKA, 0},
+
+// PCI Slot 23
+Package() {0x0017, 0, LNKC, 0},
+Package() {0x0017, 1, LNKD, 0},
+Package() {0x0017, 2, LNKA, 0},
+Package() {0x0017, 3, LNKB, 0},
+
+// PCI Slot 24
+Package() {0x0018, 0, LNKD, 0},
+Package()

[kvm-devel] [patch 03/23] QEMU/KVM: add OperationRegion and GPE handler for add/removal notification

2008-03-04 Thread Marcelo Tosatti
Use GPE _L01 to notify OSPM.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.hotplug/bios/acpi-dsdt.dsl
===
--- kvm-userspace.hotplug.orig/bios/acpi-dsdt.dsl
+++ kvm-userspace.hotplug/bios/acpi-dsdt.dsl
@@ -407,6 +407,13 @@ DefinitionBlock (
 Package() {0x001f, 2, LNKB, 0},
 })
 
+OperationRegion(PCST, SystemIO, 0xae00, 0x08)
+Field (PCST, DWordAcc, NoLock, WriteAsZeros)
+   {
+   PCIU, 32,
+   PCID, 32,
+   }
+
 Device (S1) {  // Slot 1
Name (_ADR, 0x0001)
Method (_EJ0,1) { Return (0x0) }
@@ -1142,6 +1149,256 @@ DefinitionBlock (
 Return(0x01)
 }
 Method(_L01) {
+/* Up status */
+If (And(\_SB.PCI0.PCIU, 0x2)) {
+Notify(\_SB.PCI0.S1, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x4)) {
+Notify(\_SB.PCI0.S2, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x8)) {
+Notify(\_SB.PCI0.S3, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x10)) {
+Notify(\_SB.PCI0.S4, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x20)) {
+Notify(\_SB.PCI0.S5, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x40)) {
+Notify(\_SB.PCI0.S6, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x80)) {
+Notify(\_SB.PCI0.S7, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x0100)) {
+Notify(\_SB.PCI0.S8, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x0200)) {
+Notify(\_SB.PCI0.S9, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x0400)) {
+Notify(\_SB.PCI0.S10, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x0800)) {
+Notify(\_SB.PCI0.S11, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x1000)) {
+Notify(\_SB.PCI0.S12, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x2000)) {
+Notify(\_SB.PCI0.S13, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x4000)) {
+Notify(\_SB.PCI0.S14, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x8000)) {
+Notify(\_SB.PCI0.S15, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x1)) {
+Notify(\_SB.PCI0.S16, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x2)) {
+Notify(\_SB.PCI0.S17, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x4)) {
+Notify(\_SB.PCI0.S18, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x8)) {
+Notify(\_SB.PCI0.S19, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x10)) {
+Notify(\_SB.PCI0.S20, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x20)) {
+Notify(\_SB.PCI0.S21, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x40)) {
+Notify(\_SB.PCI0.S22, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x80)) {
+Notify(\_SB.PCI0.S23, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x100)) {
+Notify(\_SB.PCI0.S24, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x200)) {
+Notify(\_SB.PCI0.S25, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x400)) {
+Notify(\_SB.PCI0.S26, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x800)) {
+Notify(\_SB.PCI0.S27, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x1000)) {
+Notify(\_SB.PCI0.S28, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x2000)) {
+Notify(\_SB.PCI0.S29, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x4000)) {
+Notify(\_SB.PCI0.S30, 0x1)
+}
+
+If (And(\_SB.PCI0.PCIU, 0x8000)) {
+Notify(\_SB.PCI0.S31, 0x1)
+}
+
+/* Down status */
+If (And(\_SB.PCI0.PCID, 0x2)) {
+Notify(\_SB.PCI0.S1, 0x3)
+}
+
+If (And(\_SB.PCI0.PCID, 0x4)) {
+Notify(\_SB.PCI0.S2, 0x3)
+}
+
+If (And(\_SB.PCI0.PCID, 0x8)) {
+Notify(\_SB.PCI0.S3, 0x3)
+}
+
+If (And(\_SB.PCI0.PCID, 0x10)) {
+Notify(\_SB.PCI0.S4, 0x3)
+}
+
+If (And(\_SB.PCI0.PCID, 0x20)) {
+Notify(\_SB.PCI0.S5, 0x3)
+}
+
+If (And(\_SB.PCI0.PCID, 0x40)) {
+Notify(\_SB.PCI0.S6, 0x3)
+}
+
+If (And(\_SB.PCI0.PCID, 0x80)) {
+Notify

[kvm-devel] [patch 00/23] [RFC] QEMU/KVM ACPI PCI hotplug

2008-03-04 Thread Marcelo Tosatti
The following patchset adds ACPI PCI hotplug support for QEMU.

It extends the number of slots with IRQ routing information from 6 to 32.

The only PCI driver which the unregister method has been added is LSI SCSI, 
would
like more comments to implement that for the remaining drivers.

-- 


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Joerg Roedel
On Tue, Mar 04, 2008 at 03:11:27PM -0300, Glauber Costa wrote:
> Joerg Roedel wrote:
> >On Tue, Mar 04, 2008 at 01:21:53PM -0300, Glauber Costa wrote:
> >>Hi guys,
> >>
> >>Here's a first series of patch aiming at vcpu pinning support in qemu.
> >>Ideally, as vcpu as just normal threads, the usual userspace tools can be 
> >>used
> >>to set cpu affinities mask.
> >>
> >>However, It makes it very difficult to _start_ a vm with vcpus pinned, since
> >>we don't know the thread ids from qemu in advance, nor do we know when are 
> >>the
> >>vcpus created.
> >>
> >>The patches introduce a -cpu-map option, that, if specified, starts the 
> >>virtual cpus
> >>with the specified affinities.
> >>
> >>Comments? Welcome. Random rants? Not welcome, but... how can I stop you? So 
> >>go ahead!
> >Cool, this goes into the same direction as I planned for KVM-NUMA
> >support. Do you plan to extend vcpu pinning into that direction?
> I don't have any immediate plans, but it is surely interesting. If the 
> patches (or something inspired in them) make it, there's something we can 
> draw support for.

There are patches for HVM-NUMA support on Xen developed by André
Przywara. I think they are easy to port to KVM. Maybe it is the better
aproach than implementing simple vcpu pinning.

Joerg

-- 
   |   AMD Saxony Limited Liability Company & Co. KG
 Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
 System|  Register Court Dresden: HRA 4896
 Research  |  General Partner authorized to represent:
 Center| AMD Saxony LLC (Wilmington, Delaware, US)
   | General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Glauber Costa
Joerg Roedel wrote:
> On Tue, Mar 04, 2008 at 01:21:53PM -0300, Glauber Costa wrote:
>> Hi guys,
>>
>> Here's a first series of patch aiming at vcpu pinning support in qemu.
>> Ideally, as vcpu as just normal threads, the usual userspace tools can be 
>> used
>> to set cpu affinities mask.
>>
>> However, It makes it very difficult to _start_ a vm with vcpus pinned, since
>> we don't know the thread ids from qemu in advance, nor do we know when are 
>> the
>> vcpus created.
>>
>> The patches introduce a -cpu-map option, that, if specified, starts the 
>> virtual cpus
>> with the specified affinities.
>>
>> Comments? Welcome. Random rants? Not welcome, but... how can I stop you? So 
>> go ahead!
> 
> Cool, this goes into the same direction as I planned for KVM-NUMA
> support. Do you plan to extend vcpu pinning into that direction?
I don't have any immediate plans, but it is surely interesting. If the 
patches (or something inspired in them) make it, there's something we 
can draw support for.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Glauber Costa
Anthony Liguori wrote:
> Glauber Costa wrote:
>> Hi guys,
>>
>> Here's a first series of patch aiming at vcpu pinning support in qemu.
>> Ideally, as vcpu as just normal threads, the usual userspace tools can 
>> be used
>> to set cpu affinities mask.
>>
>> However, It makes it very difficult to _start_ a vm with vcpus pinned, 
>> since
>> we don't know the thread ids from qemu in advance, nor do we know when 
>> are the
>> vcpus created.
>>
>> The patches introduce a -cpu-map option, that, if specified, starts 
>> the virtual cpus
>> with the specified affinities.
>>
>> Comments? Welcome. Random rants? Not welcome, but... how can I stop 
>> you? So go ahead!
>>
>>   
> 
> So why exactly is this useful?  I have a hard time constructing a 
> reasonable use-case in my mind for something like this.

My main interest is in management tools being able to specify pinning
set ups at VM creation time.

As I said, it can be done through tools like taskset, but then you'd 
have to know:
  * when are the threads created
  * which thread ids corresponds to each cpu

And of course, for an amount of time, the threads will be running in a 
"wrong" cpu, which may affect workloads running there. (which is a case 
cpu pinning usually tries to address)




-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] still seeing network freezes with rtl8139 nic

2008-03-04 Thread Eckersid SIlapaswang
david ahern  cisco.com> writes:

> I know this issue has been discussed on this list before, but I am still
> experiencing network freezes in a guest that requires a restart to clear. When
> the network freezes in the guest I no longer see the network interrupts 
> counter
> incrementing (i.e., the eth0 counter in /proc/interrupts in the guest). Using
> the crash utility, I verified that the interrupt is still enabled on the guest
> side and that no interrupts are pending. This suggests that the interrupts are
> not getting delivered to the VM.

I just wanted to let the developers know that I'm having similar problems
concerning interrupts with networking dying as well.

Running a stress test of kvm using an EnGarde Secure Linux 1.5 guest OS.
Under a heavy network email load, the guest OS networking gets knocked out
- unable to ping, ssh, etc. Can only get things started again by going
into vncviewer and restarting the networking services from there.

CPUs: 8 x Intel(R) Xeon(R) CPU E5335 @ 2.00GHz
KVM 52-1
Host Kernel: 2.6.25-rc2
Kernel Arch: x86_64
Guest OS: EnGarde Secure Linux 32bit i686, 2.4.31-1.5.60

Command Line:
/usr/bin/qemu-system -hda /root/images/bwimail01.img -boot c -m 384 -smp 4
-std-vga -net nic,vlan=0,macaddr=52:54:00:12:34:6F -net
tap,ifname=tap1,script=/etc/qemu-ifup -vnc 192.168.1.57:1 &

Please let me know if you need anymore information and if I could be of any
assistance in providing information to have this issue resolved.



-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Anthony Liguori
Glauber Costa wrote:
> Hi guys,
>
> Here's a first series of patch aiming at vcpu pinning support in qemu.
> Ideally, as vcpu as just normal threads, the usual userspace tools can be used
> to set cpu affinities mask.
>
> However, It makes it very difficult to _start_ a vm with vcpus pinned, since
> we don't know the thread ids from qemu in advance, nor do we know when are the
> vcpus created.
>
> The patches introduce a -cpu-map option, that, if specified, starts the 
> virtual cpus
> with the specified affinities.
>
> Comments? Welcome. Random rants? Not welcome, but... how can I stop you? So 
> go ahead!
>
>   

So why exactly is this useful?  I have a hard time constructing a 
reasonable use-case in my mind for something like this.

Regards,

Anthony Liguori


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH][EXTBOOT] Fix read drive parameters to solve Grub Error 18

2008-03-04 Thread Anthony Liguori
In certain circumstances, the calculated CHS can result in a total number of
sectors that is less than the actual number of sectors.  I'm not entirely
sure why this upsets grub, but it seems to be the source of the Grub Error 18
that sometimes occurs when using extboot.

The solution is to implement the read drive parameters function and return the
actual numbers of sectors.  This requires changing the QEMU <=> extboot
interface as this was not previously passed to extboot.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

diff --git a/extboot/extboot.S b/extboot/extboot.S
index 584d36d..9eb9333 100644
--- a/extboot/extboot.S
+++ b/extboot/extboot.S
@@ -351,7 +351,7 @@ disk_reset:
sub $1, %ax
mov %ax, 4(%bp)
 
-   alloca $8
+   alloca $16
 
movw $0, 0(%bx) /* read c,h,s */
push %bx
@@ -426,7 +426,7 @@ read_disk_drive_parameters:
push %bx
 
/* allocate memory for packet, pointer gets returned in bx */
-   alloca $8
+   alloca $16
 
/* issue command */
movw $0, 0(%bx) /* cmd = 0, read c,h,s */
@@ -481,7 +481,7 @@ alternate_disk_reset:
 
 read_disk_drive_size:
push %bx
-   alloca $8
+   alloca $16
 
movw $0, 0(%bx) /* cmd = 0, read c,h,s */
push %bx
@@ -572,29 +572,20 @@ extended_write_sectors:
extended_read_write_sectors $0x02
 
 get_extended_drive_parameters:
-   mov $1, %ah
-   stc
-   ret
-#if 0
-   /* this function is seriously borked */
-1:
push %ax
push %bp
push %cx
push %dx
 
-   allocbpa $8
+   allocbpa $16
 
movw $0, 0(%bp) /* read c,h,s */
push %bp
call send_command
add $2, %sp
 
-   /* check the size of the passed in data */
-   cmpw $26, 0(%si)
-   mov 0(%si), %ax
-   dump %ax
-   jle 0b
+   /* write size */
+   movw $26, 0(%si)
 
/* set flags to 2 */
movw $2, 2(%si)
@@ -617,46 +608,19 @@ get_extended_drive_parameters:
xor %ax, %ax
mov %ax, 14(%si)
 
-   /* calculate total sectors */
-
-   /* cx:dx = cylinders */
-   mov 2(%bp), %dx
-   xor %cx, %cx
-
-   /* *= heads */
-   push 4(%bp)
-   push $0
-   push %dx
-   push %cx
-   call mul32
-   add $8, %sp
-
-   /* *= sectors */
-   push 6(%bp)
-   push $0
-   push %dx
-   push %cx
-   call mul32
-   add $8, %sp
-
-   /* total number of sectors */
-   mov %dx, 16(%si)
-   mov %cx, 18(%si)
-   xor %ax, %ax
+   /* set total number of sectors */
+   mov 8(%bp), %ax
+   mov %ax, 16(%si)
+   mov 10(%bp), %ax
+   mov %ax, 18(%si)
+   mov 12(%bp), %ax
mov %ax, 20(%si)
+   mov 14(%bp), %ax
mov %ax, 22(%si)
 
/* number of bytes per sector */
movw $512, 24(%si)
 
-   /* optional segmention:offset to EDD config */
-   cmpw $30, 0(%si)
-   jl 1f
-
-   movw $0x, 26(%si)
-   movw $0x, 28(%si)
-
-1:
freebpa
 
pop %dx
@@ -667,7 +631,6 @@ get_extended_drive_parameters:
mov $0, %ah
clc
ret
-#endif
 
 terminate_disk_emulation:
mov $1, %ah
diff --git a/qemu/hw/extboot.c b/qemu/hw/extboot.c
index 8759895..056fb59 100644
--- a/qemu/hw/extboot.c
+++ b/qemu/hw/extboot.c
@@ -26,6 +26,7 @@ union extboot_cmd
uint16_t cylinders;
uint16_t heads;
uint16_t sectors;
+   uint64_t nb_sectors;
 } query_geometry;
 struct {
uint16_t type;
@@ -75,6 +76,7 @@ static void extboot_write_cmd(void *opaque, uint32_t addr, 
uint32_t value)
 union extboot_cmd *cmd = (void *)(phys_ram_base + ((value & 0x) << 4));
 BlockDriverState *bs = opaque;
 int cylinders, heads, sectors, err;
+int64_t nb_sectors;
 
 get_translated_chs(bs, &cylinders, &heads, §ors);
 
@@ -88,9 +90,11 @@ static void extboot_write_cmd(void *opaque, uint32_t addr, 
uint32_t value)
 
 switch (cmd->type) {
 case 0x00:
+   bdrv_get_geometry(bs, &nb_sectors);
cmd->query_geometry.cylinders = cylinders;
cmd->query_geometry.heads = heads;
cmd->query_geometry.sectors = sectors;
+   cmd->query_geometry.nb_sectors = nb_sectors;
cpu_physical_memory_set_dirty((value & 0x) << 4);
break;
 case 0x01:

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] loop in copy_user_generic_string

2008-03-04 Thread Andi Kleen
Avi Kivity <[EMAIL PROTECTED]> writes:
>
> Most likely movs emulation is broken for long counts.  Please post a 
> disassembly of copy_user_generic_string to make sure we're looking at 
> the same code.

Be careful -- this code is patched at runtime and what you 
see in the vmlinux is not necessarily the same that is executed

Incidentially that might cause problems.

-Andi

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Joerg Roedel
On Tue, Mar 04, 2008 at 01:21:53PM -0300, Glauber Costa wrote:
> Hi guys,
> 
> Here's a first series of patch aiming at vcpu pinning support in qemu.
> Ideally, as vcpu as just normal threads, the usual userspace tools can be used
> to set cpu affinities mask.
> 
> However, It makes it very difficult to _start_ a vm with vcpus pinned, since
> we don't know the thread ids from qemu in advance, nor do we know when are the
> vcpus created.
> 
> The patches introduce a -cpu-map option, that, if specified, starts the 
> virtual cpus
> with the specified affinities.
> 
> Comments? Welcome. Random rants? Not welcome, but... how can I stop you? So 
> go ahead!

Cool, this goes into the same direction as I planned for KVM-NUMA
support. Do you plan to extend vcpu pinning into that direction?

Joerg

-- 
   |   AMD Saxony Limited Liability Company & Co. KG
 Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
 System|  Register Court Dresden: HRA 4896
 Research  |  General Partner authorized to represent:
 Center| AMD Saxony LLC (Wilmington, Delaware, US)
   | General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] loop in copy_user_generic_string

2008-03-04 Thread Avi Kivity
Zdenek Kabelac wrote:
> Is it emulated ? I've thought it's running natively with vmx?
>
>   

In some cases (memory mapped I/O, writes to page tables) some 
instructions are emulated.  Usually they run natively.

Please post the output of 'kvm_stat -1' to ensure the problem is with 
the emulator.

-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] loop in copy_user_generic_string

2008-03-04 Thread Zdenek Kabelac
2008/3/4, Avi Kivity <[EMAIL PROTECTED]>:
> Zdenek Kabelac wrote:
>  > Hello
>  >
>  >
>  > I'm having weird problem and being a bit puzzled about where to look
>  > for this bug.
>  >
>  > I'm using T61 - C2D  2GB
>  >
>  > So I'll describe symptoms:
>  >
>  > When I run inside my 0.5G smp  qemu-kvm guest with Debian these two
>  > loops in parallel:
>  >
>  > 'while : ; do dmsetup status  ; done'
>  >
>  > and
>  >
>  > 'while : ; do cat /dev/zero >/dev/mapper/any_free_to_use_lvm_partition ; 
> done'
>  >
>  > after a while dmsetup start to loop in this place:
>  >
>
> > [  356.257323]  [] ? copy_user_generic_string+0x17/0x40
>  >
>  >
>
> > I'm using preemptible kernel and the code will stay in the
>  > copy_user_generic_string call forever eating 100%cpu - without
>  > preemption the kernel gets dead.
>  >
>  > With preemption when I run at this moment second dmsetup status in
>  > paralllel the busy-looped dmsetup gets finished and while loop starts
>  > to continue agains until next dmsetup busy-loop.
>  >
>  > I've noticed that if I change inside  drivers/md/dm-ioctl.c
>  > copy_params  the parameter tmp.data_size in the copy_from_user call to
>  > just page size (4kB) - or when I replace vmalloc to kmalloc - the busy
>  > loop will not happen.
>  >
>  > So it seems to be related to page jump somehow
>  >
>  > Anyway might have any idea - what is going on here ?
>  >
>
>
> Most likely movs emulation is broken for long counts.  Please post a
>  disassembly of copy_user_generic_string to make sure we're looking at
>  the same code.
>

Is it emulated ? I've thought it's running natively with vmx?

Anyway here is  objdump of copy_user_64.o
(loops on 0x1d7)

01c0 :
 1c0:   41 89 c8mov%ecx,%r8d
 1c3:   89 d1   mov%edx,%ecx
 1c5:   c1 e9 03shr$0x3,%ecx
 1c8:   83 e2 07and$0x7,%edx
 1cb:   74 0a   je 1d7 
 1cd:   f3 48 a5rep movsq %ds:(%rsi),%es:(%rdi)
 1d0:   89 d1   mov%edx,%ecx
 1d2:   f3 a4   rep movsb %ds:(%rsi),%es:(%rdi)
 1d4:   89 c8   mov%ecx,%eax
 1d6:   c3  retq
 1d7:   f3 48 a5rep movsq %ds:(%rsi),%es:(%rdi)
 1da:   31 c0   xor%eax,%eax
 1dc:   c3  retq
 1dd:   48 8d 04 ca lea(%rdx,%rcx,8),%rax
 1e1:   eb 02   jmp1e5 
 1e3:   89 c8   mov%ecx,%eax
 1e5:   45 85 c0test   %r8d,%r8d
 1e8:   74 08   je 1f2 
 1ea:   89 c1   mov%eax,%ecx
 1ec:   50  push   %rax
 1ed:   31 c0   xor%eax,%eax
 1ef:   f3 aa   rep stos %al,%es:(%rdi)
 1f1:   58  pop%rax
 1f2:   c3  retq

Zdenek

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 8/8] provide -cpu-map option

2008-03-04 Thread Glauber Costa
this patch introduces a -cpu-map option.
It has the form C:x,y,z..., and have the effect
of setting the affinity mask of vcpu C to processors
x,y,z...

Signed-off-by: Glauber Costa <[EMAIL PROTECTED]>
---
 qemu/vl.c |   13 +
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/qemu/vl.c b/qemu/vl.c
index 4715594..fa830dd 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -8059,6 +8059,7 @@ static void help(int exitcode)
   "-no-kvm disable KVM hardware virtualization\n"
 #endif
   "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n"
+   "-cpu-map C:x,y,z... set cpu 'C' affinity to processors x,y,z...\n"
 #endif
 #ifdef TARGET_I386
"-std-vgasimulate a standard VGA card with VESA Bochs 
Extensions\n"
@@ -8177,6 +8178,7 @@ enum {
 QEMU_OPTION_no_acpi,
 QEMU_OPTION_no_kvm,
 QEMU_OPTION_no_kvm_irqchip,
+QEMU_OPTION_cpu_map,
 QEMU_OPTION_no_reboot,
 QEMU_OPTION_show_cursor,
 QEMU_OPTION_daemonize,
@@ -8263,6 +8265,7 @@ const QEMUOption qemu_options[] = {
 { "no-kvm", 0, QEMU_OPTION_no_kvm },
 #endif
 { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip },
+{ "cpu-map", HAS_ARG, QEMU_OPTION_cpu_map },
 #endif
 #if defined(TARGET_PPC) || defined(TARGET_SPARC)
 { "g", 1, QEMU_OPTION_g },
@@ -9211,6 +9214,16 @@ int main(int argc, char **argv)
kvm_irqchip = 0;
break;
}
+case QEMU_OPTION_cpu_map: {
+int c = atoi(optarg);
+char *ptr = strchr(optarg, ':');
+cpu_set_t set;
+if (!ptr)
+fprintf(stderr, "invalid cpu mapping %s\n", ptr);
+process_cpu_set(++ptr, &set);
+kvm_store_cpu_affinity(c, &set);
+break;
+}
 #endif
 case QEMU_OPTION_usb:
 usb_enabled = 1;
-- 
1.5.0.6


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 7/8] process a cpu affinity mask

2008-03-04 Thread Glauber Costa
This patch provides a function that process a cpu affinity list
in the form x,y,z... into a cpu_set_t variable.

Signed-off-by: Glauber Costa <[EMAIL PROTECTED]>
---
 qemu/vl.c |   17 +
 1 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/qemu/vl.c b/qemu/vl.c
index 6a94724..4715594 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -8646,6 +8646,23 @@ void *qemu_alloc_physram(unsigned long memory)
 
 return area;
 }
+#ifdef USE_KVM
+#include 
+void process_cpu_set(const char *map, cpu_set_t *set)
+{
+char *ptr = map;
+int c;
+CPU_ZERO(set);
+do {
+c = atoi(ptr);
+CPU_SET(c, set);
+ptr = strchr(ptr, ',');
+if (!ptr)
+break;
+ptr++;
+} while (*ptr);
+}
+#endif
 
 int main(int argc, char **argv)
 {
-- 
1.5.0.6


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 5/8] initialize affinities

2008-03-04 Thread Glauber Costa
store default process affinities before we get the chance to process
any options

Signed-off-by: Glauber Costa <[EMAIL PROTECTED]>
---
 qemu/vl.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/qemu/vl.c b/qemu/vl.c
index f10fbd8..6a94724 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -8717,6 +8717,9 @@ int main(int argc, char **argv)
 }
 #endif
 
+#ifdef USE_KVM
+kvm_register_default_affinities();
+#endif
 register_machines();
 machine = first_machine;
 cpu_model = NULL;
-- 
1.5.0.6


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 1/8] add thread id to vcpu structure

2008-03-04 Thread Glauber Costa
This allow us to track which thread is currently running the cpu
Signed-off-by: Glauber Costa <[EMAIL PROTECTED]>
---
 qemu/qemu-kvm.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 051946e..43e0f2e 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -47,6 +47,7 @@ struct vcpu_info {
 int signalled;
 int stop;
 int stopped;
+int thread_id;
 } vcpu_info[256];
 
 CPUState *qemu_kvm_cpu_env(int index)
-- 
1.5.0.6


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 6/8] stabilish default affinity for newly created cpus

2008-03-04 Thread Glauber Costa
Signed-off-by: Glauber Costa <[EMAIL PROTECTED]>
---
 qemu/qemu-kvm.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index a36fbf6..80fe8e5 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -68,6 +68,12 @@ void kvm_register_default_affinities(void)
 }
 }
 
+static inline void kvm_default_affinity(int cpu)
+{
+sched_setaffinity(vcpu_info[cpu].thread_id, sizeof(cpu_set_t),
+  &vcpu_info[cpu].cpu_set);
+}
+
 static inline unsigned long kvm_get_thread_id(void)
 {
 return syscall(SYS_gettid);
@@ -353,6 +359,7 @@ static void *ap_main_loop(void *_env)
 vcpu = &vcpu_info[env->cpu_index];
 vcpu->env = env;
 vcpu->thread_id = kvm_get_thread_id();
+kvm_default_affinity(env->cpu_index);
 sigfillset(&signals);
 //sigdelset(&signals, SIG_IPI);
 sigprocmask(SIG_BLOCK, &signals, NULL);
@@ -400,6 +407,7 @@ int kvm_init_ap(void)
 vcpu = &vcpu_info[0];
 vcpu->env = first_cpu;
 vcpu->thread_id = kvm_get_thread_id();
+kvm_default_affinity(0);
 signal(SIG_IPI, sig_ipi_handler);
 for (i = 1; i < smp_cpus; ++i) {
 kvm_init_new_ap(i, env);
-- 
1.5.0.6


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 4/8] store and set cpu affinities

2008-03-04 Thread Glauber Costa
This patch provides a cpu_set variable to vcpu_info structure.
It stores the current cpu mask for a thread.

We also provide a wrapper for storing a provided affinity, and a
function to set default affinities: The default affinities are current
process'

Signed-off-by: Glauber Costa <[EMAIL PROTECTED]>
---
 qemu/qemu-kvm.c |   17 +
 1 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 1aeb97b..a36fbf6 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -49,8 +49,25 @@ struct vcpu_info {
 int stop;
 int stopped;
 int thread_id;
+cpu_set_t cpu_set;
 } vcpu_info[256];
 
+void kvm_store_cpu_affinity(int cpu, cpu_set_t *map)
+{
+   memcpy(&vcpu_info[cpu].cpu_set, map, sizeof(*map));
+}
+
+void kvm_register_default_affinities(void)
+{
+int i;
+cpu_set_t dfl;
+sched_getaffinity(0, sizeof(dfl), &dfl);
+
+for  (i = 0; i < 256; i++) {
+memcpy(&vcpu_info[i].cpu_set, &dfl, sizeof(dfl));
+}
+}
+
 static inline unsigned long kvm_get_thread_id(void)
 {
 return syscall(SYS_gettid);
-- 
1.5.0.6


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 2/8] provide a gettid function

2008-03-04 Thread Glauber Costa
Since glibc does not provide a gettid call directly, only through
syscall, we wrap one for kvm

Signed-off-by: Glauber Costa <[EMAIL PROTECTED]>
---
 qemu/qemu-kvm.c |6 ++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 43e0f2e..8ee3bf8 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -19,6 +19,7 @@ int kvm_irqchip = 1;
 #include 
 #include 
 #include 
+#include 
 
 extern void perror(const char *s);
 
@@ -50,6 +51,11 @@ struct vcpu_info {
 int thread_id;
 } vcpu_info[256];
 
+static inline unsigned long kvm_get_thread_id(void)
+{
+return syscall(SYS_gettid);
+}
+
 CPUState *qemu_kvm_cpu_env(int index)
 {
 return vcpu_info[index].env;
-- 
1.5.0.6


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 0/8] RFC: vcpu pinning at qemu start

2008-03-04 Thread Glauber Costa
Hi guys,

Here's a first series of patch aiming at vcpu pinning support in qemu.
Ideally, as vcpu as just normal threads, the usual userspace tools can be used
to set cpu affinities mask.

However, It makes it very difficult to _start_ a vm with vcpus pinned, since
we don't know the thread ids from qemu in advance, nor do we know when are the
vcpus created.

The patches introduce a -cpu-map option, that, if specified, starts the virtual 
cpus
with the specified affinities.

Comments? Welcome. Random rants? Not welcome, but... how can I stop you? So go 
ahead!



-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 3/8] get thread id at thread's creation

2008-03-04 Thread Glauber Costa
store the thread id through our new call as soon as
the process is created.

Signed-off-by: Glauber Costa <[EMAIL PROTECTED]>
---
 qemu/qemu-kvm.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 8ee3bf8..1aeb97b 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -335,6 +335,7 @@ static void *ap_main_loop(void *_env)
 
 vcpu = &vcpu_info[env->cpu_index];
 vcpu->env = env;
+vcpu->thread_id = kvm_get_thread_id();
 sigfillset(&signals);
 //sigdelset(&signals, SIG_IPI);
 sigprocmask(SIG_BLOCK, &signals, NULL);
@@ -381,6 +382,7 @@ int kvm_init_ap(void)
 
 vcpu = &vcpu_info[0];
 vcpu->env = first_cpu;
+vcpu->thread_id = kvm_get_thread_id();
 signal(SIG_IPI, sig_ipi_handler);
 for (i = 1; i < smp_cpus; ++i) {
 kvm_init_new_ap(i, env);
-- 
1.5.0.6


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] loop in copy_user_generic_string

2008-03-04 Thread Avi Kivity
Zdenek Kabelac wrote:
> Hello
>
>
> I'm having weird problem and being a bit puzzled about where to look
> for this bug.
>
> I'm using T61 - C2D  2GB
>
> So I'll describe symptoms:
>
> When I run inside my 0.5G smp  qemu-kvm guest with Debian these two
> loops in parallel:
>
> 'while : ; do dmsetup status  ; done'
>
> and
>
> 'while : ; do cat /dev/zero >/dev/mapper/any_free_to_use_lvm_partition ; done'
>
> after a while dmsetup start to loop in this place:
>
> [  356.257323]  [] ? copy_user_generic_string+0x17/0x40
>
>
> I'm using preemptible kernel and the code will stay in the
> copy_user_generic_string call forever eating 100%cpu - without
> preemption the kernel gets dead.
>
> With preemption when I run at this moment second dmsetup status in
> paralllel the busy-looped dmsetup gets finished and while loop starts
> to continue agains until next dmsetup busy-loop.
>
> I've noticed that if I change inside  drivers/md/dm-ioctl.c
> copy_params  the parameter tmp.data_size in the copy_from_user call to
> just page size (4kB) - or when I replace vmalloc to kmalloc - the busy
> loop will not happen.
>
> So it seems to be related to page jump somehow
>
> Anyway might have any idea - what is going on here ?
>   

Most likely movs emulation is broken for long counts.  Please post a 
disassembly of copy_user_generic_string to make sure we're looking at 
the same code.

-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [GIT PULL] KVM fixes for 2.6.25-rc3

2008-03-04 Thread Avi Kivity
Linus, please pull the kvm fixes in the repo and branch

  git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git for-linus

comprising an ABI fix, a few host crash fixes, AMD specific fixes,
a Kbuild fix for the randconfig addicts, fallout from the scaling
work, and other miscellany.

Avi Kivity (5):
  KVM: Make the supported cpuid list a host property rather than a 
vm property
  KVM: Avoid infinite-frequency local apic timer
  KVM: Route irq 0 to vcpu 0 exclusively
  KVM: MMU: Fix race when instantiating a shadow pte
  KVM: VMX: Avoid rearranging switched guest msrs while they are loaded

Izik Eidus (1):
  KVM: remove the usage of the mmap_sem for the protection of the 
memory slots.

Joerg Roedel (4):
  KVM: SVM: Fix lazy FPU switching
  KVM: SVM: set NM intercept when enabling CR0.TS in the guest
  KVM: emulate access to MSR_IA32_MCG_CTL
  KVM: SVM: fix Windows XP 64 bit installation crash

Marcelo Tosatti (2):
  KVM: move alloc_apic_access_page() outside of non-preemptable region
  KVM: make MMU_DEBUG compile again

Paul Knowles (1):
  KVM: Fix kvm_arch_vcpu_ioctl_set_sregs so that set_cr0 works properly

Randy Dunlap (1):
  x86: disable KVM for Voyager and friends

 arch/x86/Kconfig   |2 +-
 arch/x86/kvm/lapic.c   |4 ++
 arch/x86/kvm/mmu.c |   38 ++-
 arch/x86/kvm/paging_tmpl.h |   20 +---
 arch/x86/kvm/svm.c |   26 ++
 arch/x86/kvm/vmx.c |   14 --
 arch/x86/kvm/x86.c |  114 
+---
 include/linux/kvm.h|4 +-
 include/linux/kvm_host.h   |1 +
 virt/kvm/ioapic.c  |8 +++
 virt/kvm/kvm_main.c|5 +-
 11 files changed, 156 insertions(+), 80 deletions(-)

-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] loop in copy_user_generic_string

2008-03-04 Thread Zdenek Kabelac
Hello


I'm having weird problem and being a bit puzzled about where to look
for this bug.

I'm using T61 - C2D  2GB

So I'll describe symptoms:

When I run inside my 0.5G smp  qemu-kvm guest with Debian these two
loops in parallel:

'while : ; do dmsetup status  ; done'

and

'while : ; do cat /dev/zero >/dev/mapper/any_free_to_use_lvm_partition ; done'

after a while dmsetup start to loop in this place:

[  356.257323] dmsetup   R  running task0  3385   2342
[  356.257323]  81001c9fbc58 812ee85e 
0001
[  356.257323]  00070617 0001 81001be94000
81001f864000
[  356.257323]  81001be94360 00018100cddc 81001be94000
812f13b1
[  356.257323] Call Trace:
[  356.257323]  [] ? thread_return+0x99/0x51b
[  356.257323]  [] ? trace_hardirqs_on_thunk+0x35/0x3a
[  356.257323]  [] ? trace_hardirqs_on+0x139/0x1a0
[  356.257323]  [] ? trace_hardirqs_on_thunk+0x35/0x3a
[  356.257323]  [] ? restore_args+0x0/0x30
[  356.257323]  [] ? copy_user_generic_string+0x17/0x40
[  356.257323]  [] ? :dm_mod:copy_params+0x87/0xb0
[  356.257323]  [] ? __capable+0x11/0x30
[  356.257323]  [] ? :dm_mod:ctl_ioctl+0x169/0x260
[  356.257323]  [] ? :dm_mod:dm_compat_ctl_ioctl+0xd/0x20
[  356.257323]  [] ? compat_sys_ioctl+0x182/0x3d0
[  356.257323]  [] ? vfs_write+0x130/0x170
[  356.257323]  [] ? trace_hardirqs_off_thunk+0x35/0x37
[  356.257323]  [] ? sysenter_do_call+0x1b/0x70
[  356.257323]  [] ? trace_hardirqs_on_thunk+0x35/0x3a


I'm using preemptible kernel and the code will stay in the
copy_user_generic_string call forever eating 100%cpu - without
preemption the kernel gets dead.

With preemption when I run at this moment second dmsetup status in
paralllel the busy-looped dmsetup gets finished and while loop starts
to continue agains until next dmsetup busy-loop.

I've noticed that if I change inside  drivers/md/dm-ioctl.c
copy_params  the parameter tmp.data_size in the copy_from_user call to
just page size (4kB) - or when I replace vmalloc to kmalloc - the busy
loop will not happen.

So it seems to be related to page jump somehow

Anyway might have any idea - what is going on here ?

Thanks

Zdenek

Cc me please I'm not a litst member.

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] Stop the clock!

2008-03-04 Thread Joerg Roedel
On Tue, Mar 04, 2008 at 12:47:58PM +0200, Avi Kivity wrote:
> With paravirt clocksource, reboot and kexec are broken: the clock keeps 
> updating after the reboot, and the new kernel will have a random memory 
> location trampled occasionally.
> 
> So we need to stop the clock on kexec (in the guest) and reboot (in the 
> host).  On the host side, this can be done either in the kernel, or in 
> userspace via new ioctls.
> 
> Joerg, I think you mentioned you were working on a vm-wide reset ioctl()?  If 
> so, that would be the place to stop the clock on reboot.
> 
> Glauber, can you extend the interface to support stopping the clock?  It 
> needs to be done even outside kexec, for example if the the user decides to 
> stop using your clock.

Yes, I am working on that ioctl(), but don't expect a patch this week
because I am busy with an other task temporarily.

Joerg

-- 
   |   AMD Saxony Limited Liability Company & Co. KG
 Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
 System|  Register Court Dresden: HRA 4896
 Research  |  General Partner authorized to represent:
 Center| AMD Saxony LLC (Wilmington, Delaware, US)
   | General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/6] In kernel PIT patch

2008-03-04 Thread Anthony Liguori
Yang, Sheng wrote:
> Hi
>
> Here is the last in-kernel PIT patch for KVM. The mainly change from last 
> version is the supporting to save/restore. I also tested live migration.
>
> The other modifies including some date structure changed to be better for 
> supporting the save/restore. I moved the PIT timer to outside of channel 
> structure, which explicitly means only one channel (channel 0) would trigger 
> it.
>
> After fix TSC problem on SMP PAE RHEL5/5.1 guest, now the patch works well 
> without any modify of kernel parameter.
>   

How are you measuring the improvements from an in-kernel PIT?  From your 
mails, you're claiming it increases the timer accuracy.  How are you 
measuring it and how much does it improve it?

Do you expect an overall performance improvement from this or is it 
simply about improving timer accuracy?

Regards,

Anthony Liguori



-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] KVM-61/62 build fails on SLES 10

2008-03-04 Thread Tomas Rusnak
Alexey Eremenko napsal(a):
>  >and ran with your patch, and is rather faster than KVM-37 for
> networking...
> 
> Hi Michael !
> 
> If you're looking for blazing fast networking, then KVM (61+) offers 
> VirtIO - PV networking, capable of breaking the 1 Gbps barrier...
> But to achieve such speeds you'll need to compile custom kernels 
> (2.6.25) on both Linux Host & Guest.
> 
> Maybe openSUSE 11/SLES 11 will have the right kernels out-of-the-box.
> 
> Alternatively, you can try the newest Intel e1000 Gigabit Ethernet 
> emulation (also part of newest KVMs).
> This is much easier to setup. This will work for any guest.
> 
> -Technologov, 4.3.2008.
> 
> 
> 
> 
> -
> This SF.net email is sponsored by: Microsoft
> Defy all challenges. Microsoft(R) Visual Studio 2008.
> http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
> 
> 
> 
> 
> ___
> kvm-devel mailing list
> kvm-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/kvm-devel

Hello

Sorry for my intervention into your communication, but are you sure, 
when you want to use virtio, you must have 2.6.25 kernel on both - guest 
and host system?
I test this case right now, and I have running 2.6.25-rc3 only on guest 
system with kvm-62.

-- 
Tomas Rusnak, Korex Networks

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH] Use specific avail/used ring structures

2008-03-04 Thread Dor Laor
repository: /home/dor/src/merge/kvm
branch: trunk
commit bf1e6da21722a4699ea3739e587f282b9a5db418
Author: Dor Laor <[EMAIL PROTECTED]>
Date:   Tue Mar 4 08:57:52 2008 -0500

Use specific avail/used ring structures

This is more accurate than using page_size,
as suggested by Anthony Liguori.

Signed-off-by: Dor Laor <[EMAIL PROTECTED]>

diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index 1367af1..c1095fc 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -203,8 +203,8 @@ void virtio_dev_save(QEMUFile *f, VirtIODevice *vdev)
 
 /* Save the descriptor ring instead of constantly mark them dirty */
 qemu_put_buffer(f, (uint8_t*)vdev->vq[i].vring.desc, 
vdev->vq[i].vring.num * sizeof(VRingDesc));
-qemu_put_buffer(f, (uint8_t*)vdev->vq[i].vring.avail, 
TARGET_PAGE_SIZE);
-qemu_put_buffer(f, (uint8_t*)vdev->vq[i].vring.used, TARGET_PAGE_SIZE);
+qemu_put_buffer(f, (uint8_t*)vdev->vq[i].vring.avail, 
vdev->vq[i].vring.num * 2 + sizeof(VRingAvail));
+qemu_put_buffer(f, (uint8_t*)vdev->vq[i].vring.used, 
vdev->vq[i].vring.num * sizeof(VRingUsedElem) + sizeof(VRingUsed));
 }
 }
 
@@ -236,8 +236,8 @@ int virtio_dev_load(QEMUFile *f, VirtIODevice *vdev, int 
version_id)
  virtqueue_init(&vdev->vq[i], phys_ram_base +
   (vdev->vq[i].pfn << TARGET_PAGE_BITS));
  qemu_get_buffer(f, (uint8_t*)vdev->vq[i].vring.desc, 
vdev->vq[i].vring.num * sizeof(VRingDesc));
- qemu_get_buffer(f, (uint8_t*)vdev->vq[i].vring.avail, 
TARGET_PAGE_SIZE);
- qemu_get_buffer(f, (uint8_t*)vdev->vq[i].vring.used, 
TARGET_PAGE_SIZE);
+ qemu_get_buffer(f, (uint8_t*)vdev->vq[i].vring.avail, 
vdev->vq[i].vring.num * 2 + sizeof(VRingAvail));
+ qemu_get_buffer(f, (uint8_t*)vdev->vq[i].vring.used, 
vdev->vq[i].vring.num * sizeof(VRingUsedElem) + sizeof(VRingUsed));
 }
 
 return 0;


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


  1   2   >