[PATCH] kvm: Fix kvm startup script

2008-08-13 Thread Avi Kivity
From: Sheng Yang [EMAIL PROTECTED]

Signed-off-by: Sheng Yang [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/kvm b/kvm
index 2a7dc85..cb9ecf8 100755
--- a/kvm
+++ b/kvm
@@ -18,6 +18,14 @@ config = ShellConfigParser()
 config.read('config.mak')
 
 external_module = config.get('shell', 'want_module')
+
+arch = config.get('shell', 'arch')
+p = re.compile(^i\d86$)
+if len(p.findall(arch)):
+arch = 'x86_64'
+if arch != 'x86_64' and arch != 'ia64':
+raise Exception('unsupported architecture %s' % arch)
+
 privileged = os.getuid() == 0
 
 optparser = optparse.OptionParser()
@@ -153,8 +161,12 @@ def remove_module(module):
 raise Exception('failed to remove %s module' % (module,))
 
 def insert_module(module):
+if arch == 'x86_64':
+   archdir = 'x86'
+elif arch == 'ia64':
+   archdir = 'ia64'
 if os.spawnl(os.P_WAIT, '/sbin/insmod', 'insmod',
- 'kernel/%s.ko' % (module,)) != 0:
+ 'kernel/' + archdir + '/%s.ko' % (module,)) != 0:
 raise Exception('failed to load kvm module')
 
 def probe_module(module):
@@ -197,8 +209,6 @@ bootdisk = 'c'
 if options.install:
 bootdisk = 'd'
 
-arch = 'x86_64'
-
 if arch == 'x86_64':
 cmd = 'qemu-system-' + arch
 else:
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: qemu: Remove virtio_net tx ring-full heuristic

2008-08-13 Thread Avi Kivity
From: Mark McLoughlin [EMAIL PROTECTED]

virtio_net tries to guess when it has received a tx
notification from the guest whether it indicates that the
guest has no more room in the tx ring and it should
immediately flush the queued buffers.

The heuristic is based on the fact that there are 128
buffer entries in the ring and each packet uses 2 buffers
(i.e. the virtio_net_hdr and the packet's linear data).

Using GSO or increasing the size of the rings will break
that heuristic, so let's remove it and assume that any
notification from the guest after we've disabled
notifications indicates that we should flush our buffers.

Signed-off-by: Mark McLoughlin [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 3a39c8f..b001475 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -175,8 +175,7 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, 
VirtQueue *vq)
 {
 VirtIONet *n = to_virtio_net(vdev);
 
-if (n-tx_timer_active 
-   (vq-vring.avail-idx - vq-last_avail_idx) == 64) {
+if (n-tx_timer_active) {
vq-vring.used-flags = ~VRING_USED_F_NO_NOTIFY;
qemu_del_timer(n-tx_timer);
n-tx_timer_active = 0;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: qemu: Fix virtio_net tx timer

2008-08-13 Thread Avi Kivity
From: Mark McLoughlin [EMAIL PROTECTED]

The current virtio_net tx timer is 2ns, which doesn't make
any sense. Set it to a more reasonable 250us instead.

However, even though we were requesting a 2ns tx timer, it
was actually getting limited to MIN_TIMER_REARM_US which is
currently 250us.

So, even though the timer itself would only fire after
250us, expire_time was only set to +2ns, so we'd get the
timeout callback next time qemu_run_timers() was called from
the mainloop.

This probably accounted for a lot of the jitter in the
throughput numbers - the effective tx timer length was
anywhere between 2ns and 250us depending on e.g. whether
there was rx data available on the tap fd.

Signed-off-by: Mark McLoughlin [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 2e57e5a..3a39c8f 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -26,7 +26,7 @@
 #define VIRTIO_NET_F_MAC   5
 #define VIRTIO_NET_F_GS0   6
 
-#define TX_TIMER_INTERVAL (1000 / 500)
+#define TX_TIMER_INTERVAL 25 /* 250 us */
 
 /* The config defining mac address (6 bytes) */
 struct virtio_net_config
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: qemu: Add VIRTIO_F_NOTIFY_ON_EMPTY

2008-08-13 Thread Avi Kivity
From: Mark McLoughlin [EMAIL PROTECTED]

Set the VIRTIO_F_NOTIFY_ON_EMPTY feature bit so the
guest can rely on us notifying them when the queue
is empty.

Also, only notify when the available queue is empty
*and* when we've finished with all the buffers we
had detached. Right now, when the queue is empty,
we notify the guest for every used buffer.

Signed-off-by: Mark McLoughlin [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index 3429ac8..e035e4e 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -138,6 +138,7 @@ void virtqueue_push(VirtQueue *vq, const VirtQueueElement 
*elem,
 /* Make sure buffer is written before we update index. */
 wmb();
 vq-vring.used-idx++;
+vq-inuse--;
 }
 
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
@@ -187,6 +188,8 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 
 elem-index = head;
 
+vq-inuse++;
+
 return elem-in_num + elem-out_num;
 }
 
@@ -275,6 +278,7 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t 
addr)
 switch (addr) {
 case VIRTIO_PCI_HOST_FEATURES:
ret = vdev-get_features(vdev);
+   ret |= (1  VIRTIO_F_NOTIFY_ON_EMPTY);
break;
 case VIRTIO_PCI_GUEST_FEATURES:
ret = vdev-features;
@@ -431,7 +435,7 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int 
queue_size,
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
 /* Always notify when queue is empty */
-if (vq-vring.avail-idx != vq-last_avail_idx 
+if ((vq-inuse || vq-vring.avail-idx != vq-last_avail_idx) 
(vq-vring.avail-flags  VRING_AVAIL_F_NO_INTERRUPT))
return;
 
diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h
index 61f5038..1adaed3 100644
--- a/qemu/hw/virtio.h
+++ b/qemu/hw/virtio.h
@@ -30,6 +30,10 @@
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED 0x80
 
+/* We notify when the ring is completely used, even if the guest is supressing
+ * callbacks */
+#define VIRTIO_F_NOTIFY_ON_EMPTY24
+
 /* from Linux's linux/virtio_ring.h */
 
 /* This marks a buffer as continuing via the next field. */
@@ -86,6 +90,7 @@ struct VirtQueue
 VRing vring;
 uint32_t pfn;
 uint16_t last_avail_idx;
+int inuse;
 void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
 };
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: qemu: Disable recv notifications until avail buffers exhausted

2008-08-13 Thread Avi Kivity
From: Mark McLoughlin [EMAIL PROTECTED]

Once we know we have buffers available on the receive ring, we can
safely disable notifications.

Signed-off-by: Mark McLoughlin [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index b001475..47349ce 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -106,9 +106,12 @@ static int virtio_net_can_receive(void *opaque)
!(n-vdev.status  VIRTIO_CONFIG_S_DRIVER_OK))
return 0;
 
-if (n-rx_vq-vring.avail-idx == n-rx_vq-last_avail_idx)
+if (n-rx_vq-vring.avail-idx == n-rx_vq-last_avail_idx) {
+   n-rx_vq-vring.used-flags = ~VRING_USED_F_NO_NOTIFY;
return 0;
+}
 
+n-rx_vq-vring.used-flags |= VRING_USED_F_NO_NOTIFY;
 return 1;
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: qemu: Move some code around for the next commit

2008-08-13 Thread Avi Kivity
From: Mark McLoughlin [EMAIL PROTECTED]

Signed-off-by: Mark McLoughlin [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/qemu/vl.c b/qemu/vl.c
index 126944d..f5aacf0 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -4369,19 +4369,6 @@ typedef struct TAPState {
 unsigned int has_vnet_hdr : 1;
 } TAPState;
 
-static void tap_receive(void *opaque, const uint8_t *buf, int size)
-{
-TAPState *s = opaque;
-int ret;
-for(;;) {
-ret = write(s-fd, buf, size);
-if (ret  0  (errno == EINTR || errno == EAGAIN)) {
-} else {
-break;
-}
-}
-}
-
 static ssize_t tap_receive_iov(void *opaque, const struct iovec *iov,
   int iovcnt)
 {
@@ -4395,6 +4382,19 @@ static ssize_t tap_receive_iov(void *opaque, const 
struct iovec *iov,
 return len;
 }
 
+static void tap_receive(void *opaque, const uint8_t *buf, int size)
+{
+TAPState *s = opaque;
+int ret;
+for(;;) {
+ret = write(s-fd, buf, size);
+if (ret  0  (errno == EINTR || errno == EAGAIN)) {
+} else {
+break;
+}
+}
+}
+
 static int tap_can_send(void *opaque)
 {
 TAPState *s = opaque;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: VMX: Clean up magic number 0x66 in init_rmode_tss

2008-08-13 Thread Avi Kivity
From: Sheng Yang [EMAIL PROTECTED]

Signed-off-by: Sheng Yang [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c4510fe..337670b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1732,7 +1732,8 @@ static int init_rmode_tss(struct kvm *kvm)
if (r  0)
goto out;
data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
-   r = kvm_write_guest_page(kvm, fn++, data, 0x66, sizeof(u16));
+   r = kvm_write_guest_page(kvm, fn++, data,
+   TSS_IOPB_BASE_OFFSET, sizeof(u16));
if (r  0)
goto out;
r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: remove unused field from the assigned dev struct

2008-08-13 Thread Avi Kivity
From: Ben-Ami Yassour [EMAIL PROTECTED]

Remove unused field: struct kvm_assigned_pci_dev assigned_dev
from struct: struct kvm_assigned_dev_kernel

Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 24805dc..5dd2f35 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -341,7 +341,6 @@ struct kvm_assigned_dev_kernel {
struct kvm_irq_ack_notifier ack_notifier;
struct work_struct interrupt_work;
struct list_head list;
-   struct kvm_assigned_pci_dev assigned_dev;
int assigned_dev_id;
int host_busnr;
int host_devfn;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: set debug registers after schedulable section

2008-08-13 Thread Avi Kivity
From: Marcelo Tosatti [EMAIL PROTECTED]

The vcpu thread can be preempted after the guest_debug_pre() callback,
resulting in invalid debug registers on the new vcpu.

Move it inside the non-preemptable section.

Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a6299e6..ee005a6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3113,10 +3113,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
down_read(vcpu-kvm-slots_lock);
vapic_enter(vcpu);
 
-preempted:
-   if (vcpu-guest_debug.enabled)
-   kvm_x86_ops-guest_debug_pre(vcpu);
-
 again:
if (vcpu-requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, vcpu-requests))
@@ -3170,6 +3166,9 @@ again:
goto out;
}
 
+   if (vcpu-guest_debug.enabled)
+   kvm_x86_ops-guest_debug_pre(vcpu);
+
vcpu-guest_mode = 1;
/*
 * Make sure that guest_mode assignment won't happen after
@@ -3244,7 +3243,7 @@ out:
if (r  0) {
kvm_resched(vcpu);
down_read(vcpu-kvm-slots_lock);
-   goto preempted;
+   goto again;
}
 
post_kvm_run_save(vcpu, kvm_run);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: configure: fix qemu options with multiple arguments

2008-08-13 Thread Avi Kivity
From: Avi Kivity [EMAIL PROTECTED]

Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/configure b/configure
index 72337c9..3bb10ce 100755
--- a/configure
+++ b/configure
@@ -8,7 +8,7 @@ objcopy=objcopy
 want_module=1
 qemu_cflags=
 qemu_ldflags=
-qemu_opts=
+qemu_opts=()
 cross_prefix=
 arch=`uname -m`
 target_exec=
@@ -40,9 +40,11 @@ EOF
 while [[ $1 = -* ]]; do
 opt=$1; shift
 arg=
+hasarg=
 if [[ $opt = *=* ]]; then
arg=${opt#*=}
opt=${opt%%=*}
+   hasarg=1
 fi
 case $opt in
--prefix)
@@ -70,7 +72,7 @@ while [[ $1 = -* ]]; do
usage
;;
*)
-   qemu_opts=$qemu_opts $opt
+   qemu_opts=([EMAIL PROTECTED] $opt${hasarg:+=$arg})
;;
 esac
 done
@@ -114,7 +116,7 @@ fi
 --kernel-path=$libkvm_kerneldir \
 --prefix=$prefix \
 ${cross_prefix:+--cross-prefix=$cross_prefix} \
-${cross_prefix:+--cpu=$arch} $qemu_opts
+${cross_prefix:+--cpu=$arch} [EMAIL PROTECTED]
 ) || usage
 
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: qemu: use proper open call for init file

2008-08-13 Thread Avi Kivity
From: Philippe Gerum [EMAIL PROTECTED]

This patch fixes misspelled calls to qemu_fopen_file().

Signed-off-by: Philippe Gerum [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/qemu/hw/ds1225y.c b/qemu/hw/ds1225y.c
index 3b91b4f..b1d0284 100644
--- a/qemu/hw/ds1225y.c
+++ b/qemu/hw/ds1225y.c
@@ -171,13 +171,13 @@ void *ds1225y_init(target_phys_addr_t mem_base, const 
char *filename)
 s-protection = 7;
 
 /* Read current file */
-file = qemu_fopen(filename, rb);
+file = qemu_fopen_file(filename, rb);
 if (file) {
 /* Read nvram contents */
 qemu_get_buffer(file, s-contents, s-chip_size);
 qemu_fclose(file);
 }
-s-file = qemu_fopen(filename, wb);
+s-file = qemu_fopen_file(filename, wb);
 if (s-file) {
 /* Write back contents, as 'wb' mode cleaned the file */
 qemu_put_buffer(s-file, s-contents, s-chip_size);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2042889 ] guest: device offline, then kernel panic

2008-08-13 Thread SourceForge.net
Bugs item #2042889, was opened at 2008-08-08 13:16
Message generated for change (Comment added) made by ravpl
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2042889group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Rafal Wijata (ravpl)
Assigned to: Nobody/Anonymous (nobody)
Summary: guest: device offline, then kernel panic

Initial Comment:
host: kvm71, 64bit 2.6.18-92.1.6.el5, 16Gram, 2*X5450(8cores)
guest: 64bit 2.6.18-92.1.6.el5, 3.5Gram, 2cpus, 5hdds on raw partitions(!).

In the guest, i'm getting quite often messages like
kernel: sd 0:0:0:0: ABORT operation started.
kernel: sd 0:0:0:0: ABORT operation timed-out.
[many times like that]
[there was more messages concerning the device is offline, but I lost them, 
will update if it happens again]
then filesystem gets remounted read-only, then kernel panics with message(part 
of the message only, that's what i got on the screen)
FS:  () GS:8039f000() knlGS:
CS:  0010 DS:  ES:  CR0: 8005003b
CR2: 00040013 CR3: 00201000 CR4: 06e0
Process sshd (pid: 23911, threadinfo 81006f53a000, task 8100dc2ca0c0)
Stack:
800075dc
8100dc1ba960
8100dc1ba688
810096b52300
8100dd15acc0
8100dc1ba758
8100dc1ba758
810003f2a680
8000d11c
0008
0008
8100dd15acc0
Call Trace:
[800075dc] kmem_cache_free+0x13c/0x1dd
[8000d11c] dput+0xf6/0x114
[800125f3] __fput+0x16c/0x198
[8001a6a7] remove_vma+0x3d/0x64
[80039c60] exit_mmap+0xcf/0xf3
[8003bd73] mmput+0x30/0x83
[800151b6] do_exit+0x28b/0x8d0
[80048a1c] cpuset_exit+0x0/0x6c
[8005d28d] tracesys+0xd5/0xe0
  Code: f0 ff 0f 0f 88 6c 01 00 00 c3 f0 81 2f 00 00 00 01 
74 05 e8
RIP  [80064a2d] _spin_lock+0x0/0xa
RSP 81006f53be10
CR2: 00040013
0Kernel panic - not syncing: Fatal exception 

Even though the kernel panic, the kvm process was still taking 100% CPU. gdb 
shows following info - no clue though if it's helpful in any way.

Thread 4 (Thread 1938626880 (LWP 17006)):
#0  0x00368bec6fa7 in ioctl () from /lib64/libc.so.6
#1  0x0050f726 in kvm_run (kvm=0x11b15010, vcpu=0) at libkvm.c:903
#2  0x004e9426 in kvm_cpu_exec (env=value optimized out) at 
/usr/src/kvm-71/qemu/qemu-kvm.c:218
#3  0x004e9700 in ap_main_loop (_env=value optimized out) at 
/usr/src/kvm-71/qemu/qemu-kvm.c:407
#4  0x00368ca062e7 in start_thread () from /lib64/libpthread.so.0
#5  0x00368bece3bd in clone () from /lib64/libc.so.6

Thread 3 (Thread 1087498560 (LWP 17007)):
#0  0x00368bec6fa7 in ioctl () from /lib64/libc.so.6
#1  0x0050f726 in kvm_run (kvm=0x11b15010, vcpu=1) at libkvm.c:903
#2  0x004e9426 in kvm_cpu_exec (env=value optimized out) at 
/usr/src/kvm-71/qemu/qemu-kvm.c:218
#3  0x004e9700 in ap_main_loop (_env=value optimized out) at 
/usr/src/kvm-71/qemu/qemu-kvm.c:407
#4  0x00368ca062e7 in start_thread () from /lib64/libpthread.so.0
#5  0x00368bece3bd in clone () from /lib64/libc.so.6

Thread 2 (Thread 1949133120 (LWP 17014)):
#0  0x00368ca0a687 in pthread_cond_timedwait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0
#1  0x003692202ee5 in handle_fildes_io () from /lib64/librt.so.1
#2  0x00368ca062e7 in start_thread () from /lib64/libpthread.so.0
#3  0x00368bece3bd in clone () from /lib64/libc.so.6

Thread 1 (Thread 47523282295136 (LWP 16990)):
#0  0x00368bec7922 in select () from /lib64/libc.so.6
#1  0x004094b2 in main_loop_wait (timeout=value optimized out) at 
/usr/src/kvm-71/qemu/vl.c:7545
#2  0x004e9342 in kvm_main_loop () at 
/usr/src/kvm-71/qemu/qemu-kvm.c:587
#3  0x00411662 in main (argc=20, argv=0x7fffca7a9b38) at 
/usr/src/kvm-71/qemu/vl.c:7705
#0  0x00368bec7922 in select () from /lib64/libc.so.6


--

Comment By: Rafal Wijata (ravpl)
Date: 2008-08-13 08:40

Message:
Logged In: YES 
user_id=996150
Originator: YES

[guest] And finally the device gets offline
[guest] sd 0:0:0:0: rejecting I/O to offline device

Is it possible, that those problems come from the fact, that I have
configured raw devices as kvm disks? Eg:
-drive media=disk,if=scsi,boot=on,file=/dev/sdb2 -drive
media=disk,if=scsi,boot=off,file=/dev/sdc2 ...

--

Comment By: Rafal Wijata (ravpl)
Date: 2008-08-11 15:26

Message:
Logged In: YES 
user_id=996150
Originator: YES

Update_1:
while guest was panicking, I was able to 

[PATCH] kvm: Fix kvm startup script

2008-08-13 Thread Yang, Sheng
From 4993375c0d67fd5eb47891dee48300880aceff1e Mon Sep 17 00:00:00 2001
From: Sheng Yang [EMAIL PROTECTED]
Date: Wed, 13 Aug 2008 15:39:29 +0800
Subject: [PATCH] kvm: Fix kvm startup script


Signed-off-by: Sheng Yang [EMAIL PROTECTED]
---
 kvm |   16 +---
 1 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/kvm b/kvm
index 2a7dc85..cb9ecf8 100755
--- a/kvm
+++ b/kvm
@@ -18,6 +18,14 @@ config = ShellConfigParser()
 config.read('config.mak')

 external_module = config.get('shell', 'want_module')
+
+arch = config.get('shell', 'arch')
+p = re.compile(^i\d86$)
+if len(p.findall(arch)):
+arch = 'x86_64'
+if arch != 'x86_64' and arch != 'ia64':
+raise Exception('unsupported architecture %s' % arch)
+
 privileged = os.getuid() == 0

 optparser = optparse.OptionParser()
@@ -153,8 +161,12 @@ def remove_module(module):
 raise Exception('failed to remove %s module' % 
(module,))

 def insert_module(module):
+if arch == 'x86_64':
+   archdir = 'x86'
+elif arch == 'ia64':
+   archdir = 'ia64'
 if os.spawnl(os.P_WAIT, '/sbin/insmod', 'insmod',
- 'kernel/%s.ko' % (module,)) != 0:
+ 'kernel/' + archdir + '/%s.ko' % (module,)) != 0:
 raise Exception('failed to load kvm module')

 def probe_module(module):
@@ -197,8 +209,6 @@ bootdisk = 'c'
 if options.install:
 bootdisk = 'd'

-arch = 'x86_64'
-
 if arch == 'x86_64':
 cmd = 'qemu-system-' + arch
 else:
--
1.5.6

From 4993375c0d67fd5eb47891dee48300880aceff1e Mon Sep 17 00:00:00 2001
From: Sheng Yang [EMAIL PROTECTED]
Date: Wed, 13 Aug 2008 15:39:29 +0800
Subject: [PATCH] kvm: Fix kvm startup script


Signed-off-by: Sheng Yang [EMAIL PROTECTED]
---
 kvm |   16 +---
 1 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/kvm b/kvm
index 2a7dc85..cb9ecf8 100755
--- a/kvm
+++ b/kvm
@@ -18,6 +18,14 @@ config = ShellConfigParser()
 config.read('config.mak')
 
 external_module = config.get('shell', 'want_module')
+
+arch = config.get('shell', 'arch')
+p = re.compile(^i\d86$)
+if len(p.findall(arch)):
+arch = 'x86_64'
+if arch != 'x86_64' and arch != 'ia64':
+raise Exception('unsupported architecture %s' % arch)
+
 privileged = os.getuid() == 0
 
 optparser = optparse.OptionParser()
@@ -153,8 +161,12 @@ def remove_module(module):
 raise Exception('failed to remove %s module' % (module,))
 
 def insert_module(module):
+if arch == 'x86_64':
+	archdir = 'x86'
+elif arch == 'ia64':
+	archdir = 'ia64'
 if os.spawnl(os.P_WAIT, '/sbin/insmod', 'insmod',
- 'kernel/%s.ko' % (module,)) != 0:
+ 'kernel/' + archdir + '/%s.ko' % (module,)) != 0:
 raise Exception('failed to load kvm module')
 
 def probe_module(module):
@@ -197,8 +209,6 @@ bootdisk = 'c'
 if options.install:
 bootdisk = 'd'
 
-arch = 'x86_64'
-
 if arch == 'x86_64':
 cmd = 'qemu-system-' + arch
 else:
-- 
1.5.6



[PATCH 1/2] KVM: trace: Add event mask support

2008-08-13 Thread Yang, Sheng
From bb7cc3fd1e549d38d6bda10a06feda9339de9d3e Mon Sep 17 00:00:00 2001
From: Sheng Yang [EMAIL PROTECTED]
Date: Wed, 13 Aug 2008 14:29:43 +0800
Subject: [PATCH] KVM: trace: Add event mask support

Allow user space application to specify one or morefilter masks to 
limit the events being captured via it.

Signed-off-by: Feng (Eric) Liu [EMAIL PROTECTED]
Signed-off-by: Sheng Yang [EMAIL PROTECTED]
---
 include/linux/kvm.h  |7 +++
 virt/kvm/kvm_trace.c |   30 +++---
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index d9ef7d3..05713f2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -18,6 +18,9 @@
 struct kvm_user_trace_setup {
__u32 buf_size; /* sub_buffer size of each per-cpu */
__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
+   __u16 cat_mask; /* the tracing categories are enabled */
+   __u16 pad1[3];
+   __u64 act_bitmap[16]; /* the actions are enabled for each category 
*/
 };

 /* for KVM_CREATE_MEMORY_REGION */
@@ -292,6 +295,7 @@ struct kvm_s390_interrupt {
 };

 #define KVM_TRC_SHIFT   16
+#define KVM_TRC_CAT_NR_BITS 12
 /*
  * kvm trace categories
  */
@@ -305,6 +309,9 @@ struct kvm_s390_interrupt {
 #define KVM_TRC_VMEXIT  (KVM_TRC_ENTRYEXIT + 0x02)
 #define KVM_TRC_PAGE_FAULT  (KVM_TRC_HANDLER + 0x01)

+#define KVM_TRC_CAT(evt)(((evt)  KVM_TRC_SHIFT)  0x0fff)
+#define KVM_TRC_ACT(evt)((evt)  (~0u  KVM_TRC_SHIFT))
+
 #define KVM_TRC_HEAD_SIZE   12
 #define KVM_TRC_CYCLE_SIZE  8
 #define KVM_TRC_EXTRA_MAX   7
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index 41dcc84..bb4046c 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -27,6 +27,8 @@

 struct kvm_trace {
int trace_state;
+   u16 cat_mask;
+   u64 act_bitmap[16];
struct rchan *rchan;
struct dentry *lost_file;
atomic_t lost_records;
@@ -40,6 +42,23 @@ struct kvm_trace_probe {
marker_probe_func *probe_func;
 };

+static inline int check_event_mask(struct kvm_trace *kt, u32 event)
+{
+   unsigned long category;
+   int i;
+
+   category = KVM_TRC_CAT(event);
+   if (!(category  kt-cat_mask))
+   return 1;
+
+   i = find_first_bit(category, KVM_TRC_CAT_NR_BITS);
+
+   if (!test_bit(KVM_TRC_ACT(event), kt-act_bitmap[i]))
+   return 1;
+
+   return 0;
+}
+
 static inline int calc_rec_size(int timestamp, int extra)
 {
int rec_size = KVM_TRC_HEAD_SIZE;
@@ -56,12 +75,15 @@ static void kvm_add_trace(void *probe_private, 
void *call_data,
struct kvm_trace_rec rec;
struct kvm_vcpu *vcpu;
inti, size;
-   u32extra;
+   u32extra, event;

if (unlikely(kt-trace_state != KVM_TRACE_STATE_RUNNING))
return;

-   rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32));
+   event   = TRACE_REC_EVENT_ID(va_arg(*args, u32));
+   if (check_event_mask(kt, event))
+   return;
+
vcpu= va_arg(*args, struct kvm_vcpu *);
rec.pid = current-tgid;
rec.vcpu_id = vcpu-vcpu_id;
@@ -70,7 +92,7 @@ static void kvm_add_trace(void *probe_private, void 
*call_data,
WARN_ON(!(extra = KVM_TRC_EXTRA_MAX));
extra   = min_t(u32, extra, KVM_TRC_EXTRA_MAX);

-   rec.rec_val |= TRACE_REC_TCS(p-timestamp_in)
+   rec.rec_val = event | TRACE_REC_TCS(p-timestamp_in)
| TRACE_REC_NUM_DATA_ARGS(extra);

if (p-timestamp_in) {
@@ -177,6 +199,8 @@ static int do_kvm_trace_enable(struct 
kvm_user_trace_setup *kuts)
if (!kt-rchan)
goto err;

+   kt-cat_mask = kuts-cat_mask;
+   memcpy(kt-act_bitmap, kuts-act_bitmap, sizeof(kuts-act_bitmap));
kvm_trace = kt;

for (i = 0; i  ARRAY_SIZE(kvm_trace_probes); i++) {
--
1.5.6

From bb7cc3fd1e549d38d6bda10a06feda9339de9d3e Mon Sep 17 00:00:00 2001
From: Sheng Yang [EMAIL PROTECTED]
Date: Wed, 13 Aug 2008 14:29:43 +0800
Subject: [PATCH] KVM: trace: Add event mask support.

Allow user space application to specify one or morefilter masks to limit the
events being captured via it.

Signed-off-by: Feng (Eric) Liu [EMAIL PROTECTED]
Signed-off-by: Sheng Yang [EMAIL PROTECTED]
---
 include/linux/kvm.h  |7 +++
 virt/kvm/kvm_trace.c |   30 +++---
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index d9ef7d3..05713f2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -18,6 +18,9 @@
 struct kvm_user_trace_setup {
 	__u32 buf_size; /* sub_buffer size of each per-cpu */
 	__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
+	__u16 cat_mask; /* the tracing categories are enabled */
+	__u16 pad1[3];
+	__u64 act_bitmap[16]; /* the actions are enabled for each category */
 };
 
 /* for 

Re: [PATCH 1/2] KVM: trace: Add event mask support

2008-08-13 Thread Avi Kivity

Yang, Sheng wrote:

From bb7cc3fd1e549d38d6bda10a06feda9339de9d3e Mon Sep 17 00:00:00 2001
From: Sheng Yang [EMAIL PROTECTED]
Date: Wed, 13 Aug 2008 14:29:43 +0800
Subject: [PATCH] KVM: trace: Add event mask support

Allow user space application to specify one or morefilter masks to 
limit the events being captured via it.


  


Sorry about not responding to this earlier.  In light of the ltt-ng 
discussion, it looks like almost all of kvmtrace can be moved to common 
code, with only the trace markers themselves remaining.


So I'd like to wait with this until we've made the transition (and 
afterwards, we'll either have the functionality automatically, or we'll 
need to make the changes to core code).



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: Fix kvm startup script

2008-08-13 Thread Avi Kivity

Yang, Sheng wrote:

From 4993375c0d67fd5eb47891dee48300880aceff1e Mon Sep 17 00:00:00 2001
From: Sheng Yang [EMAIL PROTECTED]
Date: Wed, 13 Aug 2008 15:39:29 +0800
Subject: [PATCH] kvm: Fix kvm startup script

  


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] KVM: trace: Add event mask support

2008-08-13 Thread Yang, Sheng
On Wednesday 13 August 2008 16:04:59 Avi Kivity wrote:
 Yang, Sheng wrote:
  From bb7cc3fd1e549d38d6bda10a06feda9339de9d3e Mon Sep 17 00:00:00
  2001 From: Sheng Yang [EMAIL PROTECTED]
  Date: Wed, 13 Aug 2008 14:29:43 +0800
  Subject: [PATCH] KVM: trace: Add event mask support
 
  Allow user space application to specify one or morefilter masks
  to limit the events being captured via it.

 Sorry about not responding to this earlier.  In light of the ltt-ng
 discussion, it looks like almost all of kvmtrace can be moved to
 common code, with only the trace markers themselves remaining.

 So I'd like to wait with this until we've made the transition (and
 afterwards, we'll either have the functionality automatically, or
 we'll need to make the changes to core code).

OK, that's fine. Looking forward to it. :)

-- 
regards
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] Fix dependencies to run dyngen early enough

2008-08-13 Thread Avi Kivity

Philippe Gerum wrote:

Add dependency to get dyngen-opc.h generated prior to compiling qemu/exec.c.

  


qemu-devel material as well.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] Fix --disable-kvm build

2008-08-13 Thread Avi Kivity

Philippe Gerum wrote:

 static void vga_map(PCIDevice *pci_dev, int region_num,
 uint32_t addr, uint32_t size, int type)
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index 7e28428..9ba81a3 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -114,6 +114,10 @@ extern kvm_context_t kvm_context;
 #define kvm_enabled() (0)
 #define qemu_kvm_irqchip_in_kernel() (0)
 #define qemu_kvm_pit_in_kernel() (0)
+#define qemu_kvm_cpu_env(cpu)  ({ (void)cpu; NULL; })
+#define kvm_save_registers(cpu)do { (void)cpu; } while(0)
+#define kvm_load_registers(env)do { (void)env; } while(0)
+#define kvm_init_new_ap(cpu, env)  do { (void)cpu; (void)env; } while(0)
 #endif
 
  


Please use static inline functions rather #defines, especially with 
those statement expressions.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [REGRESSION?] -no-kvm-irqchip broken with kvm.git

2008-08-13 Thread Jan Kiszka
Avi Kivity wrote:
 Jan Kiszka wrote:
 I thought I tested out -no-kvm-irqchip, but apprently not well
 enough. I'll try to see what went wrong.
 

 Should this issue have been fixed meanwhile? I just gave latest git a
 try and - as far as I recall my tests before holiday correctly - things
 look the same. At least some Linux 2.6.23 kernel still hangs here during
 early boot with -no-kvm-irqchip.

   
 
 
 No, I was on vacation, and haven't recovered completely since I've
 returned.
 

No problem, same here. :)

Jan



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 2/4] Provide dummy cpu_vendor_string

2008-08-13 Thread Avi Kivity

Philippe Gerum wrote:

This patch defines cpu_vendor_string for linux-user builds.

  


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/1] Fix --disable-kvm build

2008-08-13 Thread Philippe Gerum
Avi Kivity wrote:
 Philippe Gerum wrote:
  static void vga_map(PCIDevice *pci_dev, int region_num,
  uint32_t addr, uint32_t size, int type)
 diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
 index 7e28428..9ba81a3 100644
 --- a/qemu/qemu-kvm.h
 +++ b/qemu/qemu-kvm.h
 @@ -114,6 +114,10 @@ extern kvm_context_t kvm_context;
  #define kvm_enabled() (0)
  #define qemu_kvm_irqchip_in_kernel() (0)
  #define qemu_kvm_pit_in_kernel() (0)
 +#define qemu_kvm_cpu_env(cpu)({ (void)cpu; NULL; })
 +#define kvm_save_registers(cpu)do { (void)cpu; } while(0)
 +#define kvm_load_registers(env)do { (void)env; } while(0)
 +#define kvm_init_new_ap(cpu, env)do { (void)cpu; (void)env; }
 while(0)
  #endif
  
   
 
 Please use static inline functions rather #defines, especially with
 those statement expressions.
 

Here is a better patch. Hopefully.

Signed-off-by: Philippe Gerum [EMAIL PROTECTED]
---
 qemu/gdbstub.c  |2 +-
 qemu/hw/acpi.c  |6 +-
 qemu/hw/vga.c   |2 ++
 qemu/qemu-kvm.h |   33 ++---
 4 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/qemu/gdbstub.c b/qemu/gdbstub.c
index d828844..2af7830 100644
--- a/qemu/gdbstub.c
+++ b/qemu/gdbstub.c
@@ -33,8 +33,8 @@
 #include qemu-char.h
 #include sysemu.h
 #include gdbstub.h
-#include qemu-kvm.h
 #endif
+#include qemu-kvm.h
 
 #include qemu_socket.h
 #ifdef _WIN32
diff --git a/qemu/hw/acpi.c b/qemu/hw/acpi.c
index e3cd8d7..35bac86 100644
--- a/qemu/hw/acpi.c
+++ b/qemu/hw/acpi.c
@@ -23,10 +23,8 @@
 #include sysemu.h
 #include i2c.h
 #include smbus.h
-#ifdef USE_KVM
-#include qemu-kvm.h
-#endif
 #include string.h
+#include qemu-kvm.h
 
 //#define DEBUG
 
@@ -723,9 +721,7 @@ void qemu_system_cpu_hot_add(int cpu, int state)
 fprintf(stderr, cpu %d creation failed\n, cpu);
 return;
 }
-#ifdef USE_KVM
 kvm_init_new_ap(cpu, env);
-#endif
 }
 
 qemu_set_irq(pm_state-irq, 1);
diff --git a/qemu/hw/vga.c b/qemu/hw/vga.c
index 95d6033..f5c472c 100644
--- a/qemu/hw/vga.c
+++ b/qemu/hw/vga.c
@@ -1981,6 +1981,7 @@ typedef struct PCIVGAState {
 VGAState vga_state;
 } PCIVGAState;
 
+#ifdef USE_KVM
 void vga_update_vram_mapping(VGAState *s, unsigned long vga_ram_begin,
 unsigned long vga_ram_end)
 {
@@ -2010,6 +2011,7 @@ void vga_update_vram_mapping(VGAState *s, unsigned long 
vga_ram_begin,
s-map_end  = vga_ram_end;
 }
 }
+#endif
 
 static void vga_map(PCIDevice *pci_dev, int region_num,
 uint32_t addr, uint32_t size, int type)
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index 7e28428..e4b5e16 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -15,11 +15,8 @@
 int kvm_main_loop(void);
 int kvm_qemu_init(void);
 int kvm_qemu_create_context(void);
-void kvm_init_new_ap(int cpu, CPUState *env);
 int kvm_init_ap(void);
 void kvm_qemu_destroy(void);
-void kvm_load_registers(CPUState *env);
-void kvm_save_registers(CPUState *env);
 void kvm_load_mpstate(CPUState *env);
 void kvm_save_mpstate(CPUState *env);
 int kvm_cpu_exec(CPUState *env);
@@ -59,8 +56,6 @@ int kvm_arch_try_push_interrupts(void *opaque);
 void kvm_arch_update_regs_for_sipi(CPUState *env);
 void kvm_arch_cpu_reset(CPUState *env);
 
-CPUState *qemu_kvm_cpu_env(int index);
-
 void qemu_kvm_aio_wait_start(void);
 void qemu_kvm_aio_wait(void);
 void qemu_kvm_aio_wait_end(void);
@@ -110,11 +105,35 @@ extern kvm_context_t kvm_context;
 #define kvm_enabled() (kvm_allowed)
 #define qemu_kvm_irqchip_in_kernel() kvm_irqchip_in_kernel(kvm_context)
 #define qemu_kvm_pit_in_kernel() kvm_pit_in_kernel(kvm_context)
-#else
+
+CPUState *qemu_kvm_cpu_env(int index);
+void kvm_load_registers(CPUState *env);
+void kvm_save_registers(CPUState *env);
+void kvm_init_new_ap(int cpu, CPUState *env);
+
+#else  /* !USE_KVM */
+
 #define kvm_enabled() (0)
 #define qemu_kvm_irqchip_in_kernel() (0)
 #define qemu_kvm_pit_in_kernel() (0)
-#endif
+
+static inline CPUState *qemu_kvm_cpu_env(int index)
+{
+   return NULL;
+}
+
+static inline void kvm_save_registers(CPUState *env)
+{
+}
+
+static inline void kvm_load_registers(CPUState *env)
+{
+}
+
+static inline void kvm_init_new_ap(int cpu, CPUState *env)
+{
+}
+#endif /* USE_KVM */
 
 void kvm_mutex_unlock(void);
 void kvm_mutex_lock(void);
-- 
1.5.4.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2042889 ] guest: device offline, then kernel panic

2008-08-13 Thread SourceForge.net
Bugs item #2042889, was opened at 2008-08-08 13:16
Message generated for change (Comment added) made by ravpl
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2042889group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Rafal Wijata (ravpl)
Assigned to: Nobody/Anonymous (nobody)
Summary: guest: device offline, then kernel panic

Initial Comment:
host: kvm71, 64bit 2.6.18-92.1.6.el5, 16Gram, 2*X5450(8cores)
guest: 64bit 2.6.18-92.1.6.el5, 3.5Gram, 2cpus, 5hdds on raw partitions(!).

In the guest, i'm getting quite often messages like
kernel: sd 0:0:0:0: ABORT operation started.
kernel: sd 0:0:0:0: ABORT operation timed-out.
[many times like that]
[there was more messages concerning the device is offline, but I lost them, 
will update if it happens again]
then filesystem gets remounted read-only, then kernel panics with message(part 
of the message only, that's what i got on the screen)
FS:  () GS:8039f000() knlGS:
CS:  0010 DS:  ES:  CR0: 8005003b
CR2: 00040013 CR3: 00201000 CR4: 06e0
Process sshd (pid: 23911, threadinfo 81006f53a000, task 8100dc2ca0c0)
Stack:
800075dc
8100dc1ba960
8100dc1ba688
810096b52300
8100dd15acc0
8100dc1ba758
8100dc1ba758
810003f2a680
8000d11c
0008
0008
8100dd15acc0
Call Trace:
[800075dc] kmem_cache_free+0x13c/0x1dd
[8000d11c] dput+0xf6/0x114
[800125f3] __fput+0x16c/0x198
[8001a6a7] remove_vma+0x3d/0x64
[80039c60] exit_mmap+0xcf/0xf3
[8003bd73] mmput+0x30/0x83
[800151b6] do_exit+0x28b/0x8d0
[80048a1c] cpuset_exit+0x0/0x6c
[8005d28d] tracesys+0xd5/0xe0
  Code: f0 ff 0f 0f 88 6c 01 00 00 c3 f0 81 2f 00 00 00 01 
74 05 e8
RIP  [80064a2d] _spin_lock+0x0/0xa
RSP 81006f53be10
CR2: 00040013
0Kernel panic - not syncing: Fatal exception 

Even though the kernel panic, the kvm process was still taking 100% CPU. gdb 
shows following info - no clue though if it's helpful in any way.

Thread 4 (Thread 1938626880 (LWP 17006)):
#0  0x00368bec6fa7 in ioctl () from /lib64/libc.so.6
#1  0x0050f726 in kvm_run (kvm=0x11b15010, vcpu=0) at libkvm.c:903
#2  0x004e9426 in kvm_cpu_exec (env=value optimized out) at 
/usr/src/kvm-71/qemu/qemu-kvm.c:218
#3  0x004e9700 in ap_main_loop (_env=value optimized out) at 
/usr/src/kvm-71/qemu/qemu-kvm.c:407
#4  0x00368ca062e7 in start_thread () from /lib64/libpthread.so.0
#5  0x00368bece3bd in clone () from /lib64/libc.so.6

Thread 3 (Thread 1087498560 (LWP 17007)):
#0  0x00368bec6fa7 in ioctl () from /lib64/libc.so.6
#1  0x0050f726 in kvm_run (kvm=0x11b15010, vcpu=1) at libkvm.c:903
#2  0x004e9426 in kvm_cpu_exec (env=value optimized out) at 
/usr/src/kvm-71/qemu/qemu-kvm.c:218
#3  0x004e9700 in ap_main_loop (_env=value optimized out) at 
/usr/src/kvm-71/qemu/qemu-kvm.c:407
#4  0x00368ca062e7 in start_thread () from /lib64/libpthread.so.0
#5  0x00368bece3bd in clone () from /lib64/libc.so.6

Thread 2 (Thread 1949133120 (LWP 17014)):
#0  0x00368ca0a687 in pthread_cond_timedwait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0
#1  0x003692202ee5 in handle_fildes_io () from /lib64/librt.so.1
#2  0x00368ca062e7 in start_thread () from /lib64/libpthread.so.0
#3  0x00368bece3bd in clone () from /lib64/libc.so.6

Thread 1 (Thread 47523282295136 (LWP 16990)):
#0  0x00368bec7922 in select () from /lib64/libc.so.6
#1  0x004094b2 in main_loop_wait (timeout=value optimized out) at 
/usr/src/kvm-71/qemu/vl.c:7545
#2  0x004e9342 in kvm_main_loop () at 
/usr/src/kvm-71/qemu/qemu-kvm.c:587
#3  0x00411662 in main (argc=20, argv=0x7fffca7a9b38) at 
/usr/src/kvm-71/qemu/vl.c:7705
#0  0x00368bec7922 in select () from /lib64/libc.so.6


--

Comment By: Rafal Wijata (ravpl)
Date: 2008-08-13 10:53

Message:
Logged In: YES 
user_id=996150
Originator: YES

Another crash with guest bt, please advise how to debug?

R13: 8100dd107000 R14: 80077090 R15: 80418e80
FS:  () GS:8039f000()
knlGS:
CS:  0010 DS: 0018 ES: 0018 CR0: 8005003b
CR2: 2aec1f42e000 CR3: d49e4000 CR4: 06e0

Call Trace:
IRQ  [8003eadd] dev_watchdog+0x98/0xc0
[800953c2] run_timer_softirq+0x133/0x1af
[80011ed2] __do_softirq+0x5e/0xd6
[8005e2fc] call_softirq+0x1c/0x28
[8006c6e4] 

Re: [PATCH 1/1] KVM: remove unused field from the assigned dev struct

2008-08-13 Thread Avi Kivity

Ben-Ami Yassour wrote:

Remove unused field: struct kvm_assigned_pci_dev assigned_dev
from struct: struct kvm_assigned_dev_kernel
  


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] VT-d: changes to support KVM

2008-08-13 Thread Yang, Sheng
On Thursday 07 August 2008 22:14:46 Ben-Ami Yassour wrote:
 From: Kay, Allen M [EMAIL PROTECTED]

 This patch extends the VT-d driver to support KVM

Seems OK to me.

Avi, is this patch good enough to send to PCI guys to have a review?

-- 
regards
Yang, Sheng

 [Ben: fixed memory pinning]

 Signed-off-by: Kay, Allen M [EMAIL PROTECTED]
 Signed-off-by: Weidong Han [EMAIL PROTECTED]
 Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
 ---
  drivers/pci/dmar.c   |4 +-
  drivers/pci/intel-iommu.c|  117
 +- drivers/pci/iova.c  
 |2 +-
  {drivers/pci = include/linux}/intel-iommu.h |   11 +++
  {drivers/pci = include/linux}/iova.h|0
  5 files changed, 127 insertions(+), 7 deletions(-)
  rename {drivers/pci = include/linux}/intel-iommu.h (94%)
  rename {drivers/pci = include/linux}/iova.h (100%)

 diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
 index 8bf86ae..1df28ea 100644
 --- a/drivers/pci/dmar.c
 +++ b/drivers/pci/dmar.c
 @@ -26,8 +26,8 @@

  #include linux/pci.h
  #include linux/dmar.h
 -#include iova.h
 -#include intel-iommu.h
 +#include linux/iova.h
 +#include linux/intel-iommu.h

  #undef PREFIX
  #define PREFIX DMAR:
 diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
 index 8d0e60a..1eefc60 100644
 --- a/drivers/pci/intel-iommu.c
 +++ b/drivers/pci/intel-iommu.c
 @@ -20,6 +20,7 @@
   * Author: Anil S Keshavamurthy [EMAIL PROTECTED]
   */

 +#undef DEBUG
  #include linux/init.h
  #include linux/bitmap.h
  #include linux/debugfs.h
 @@ -33,8 +34,8 @@
  #include linux/dma-mapping.h
  #include linux/mempool.h
  #include linux/timer.h
 -#include iova.h
 -#include intel-iommu.h
 +#include linux/iova.h
 +#include linux/intel-iommu.h
  #include asm/proto.h /* force_iommu in this header in x86-64*/
  #include asm/cacheflush.h
  #include asm/iommu.h
 @@ -160,7 +161,7 @@ static inline void *alloc_domain_mem(void)
   return iommu_kmem_cache_alloc(iommu_domain_cache);
  }

 -static inline void free_domain_mem(void *vaddr)
 +static void free_domain_mem(void *vaddr)
  {
   kmem_cache_free(iommu_domain_cache, vaddr);
  }
 @@ -1414,7 +1415,7 @@ static void domain_remove_dev_info(struct
 dmar_domain *domain) * find_domain
   * Note: we use struct pci_dev-dev.archdata.iommu stores the info
   */
 -struct dmar_domain *
 +static struct dmar_domain *
  find_domain(struct pci_dev *pdev)
  {
   struct device_domain_info *info;
 @@ -2430,3 +2431,111 @@ int __init intel_iommu_init(void)
   return 0;
  }

 +void intel_iommu_domain_exit(struct dmar_domain *domain)
 +{
 + u64 end;
 +
 + /* Domain 0 is reserved, so dont process it */
 + if (!domain)
 + return;
 +
 + end = DOMAIN_MAX_ADDR(domain-gaw);
 + end = end  (~PAGE_MASK_4K);
 +
 + /* clear ptes */
 + dma_pte_clear_range(domain, 0, end);
 +
 + /* free page tables */
 + dma_pte_free_pagetable(domain, 0, end);
 +
 + iommu_free_domain(domain);
 + free_domain_mem(domain);
 +}
 +EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
 +
 +struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
 +{
 + struct dmar_drhd_unit *drhd;
 + struct dmar_domain *domain;
 + struct intel_iommu *iommu;
 +
 + drhd = dmar_find_matched_drhd_unit(pdev);
 + if (!drhd) {
 + printk(KERN_ERR intel_iommu_domain_alloc: drhd == NULL\n);
 + return NULL;
 + }
 +
 + iommu = drhd-iommu;
 + if (!iommu) {
 + printk(KERN_ERR
 + intel_iommu_domain_alloc: iommu == NULL\n);
 + return NULL;
 + }
 + domain = iommu_alloc_domain(iommu);
 + if (!domain) {
 + printk(KERN_ERR
 + intel_iommu_domain_alloc: domain == NULL\n);
 + return NULL;
 + }
 + if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
 + printk(KERN_ERR
 + intel_iommu_domain_alloc: domain_init() failed\n);
 + intel_iommu_domain_exit(domain);
 + return NULL;
 + }
 + return domain;
 +}
 +EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
 +
 +int intel_iommu_context_mapping(
 + struct dmar_domain *domain, struct pci_dev *pdev)
 +{
 + int rc;
 + rc = domain_context_mapping(domain, pdev);
 + return rc;
 +}
 +EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
 +
 +int intel_iommu_page_mapping(
 + struct dmar_domain *domain, dma_addr_t iova,
 + u64 hpa, size_t size, int prot)
 +{
 + int rc;
 + rc = domain_page_mapping(domain, iova, hpa, size, prot);
 + return rc;
 +}
 +EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
 +
 +void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8
 devfn) +{
 + detach_domain_for_dev(domain, bus, devfn);
 +}
 +EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
 +
 +struct dmar_domain *
 +intel_iommu_find_domain(struct pci_dev *pdev)
 +{
 + return find_domain(pdev);
 +}
 

Re: [PATCH 1/2] VT-d: changes to support KVM

2008-08-13 Thread Avi Kivity

Yang, Sheng wrote:

On Thursday 07 August 2008 22:14:46 Ben-Ami Yassour wrote:
  

From: Kay, Allen M [EMAIL PROTECTED]

This patch extends the VT-d driver to support KVM



Seems OK to me.

Avi, is this patch good enough to send to PCI guys to have a review?

  


I think so.  There are some trivial coding style issues, but nothing 
serious.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] KVM: Device assignemnt with VT-d

2008-08-13 Thread Yang, Sheng
On Thursday 07 August 2008 22:14:47 Ben-Ami Yassour wrote:
 Based on a patch by: Kay, Allen M [EMAIL PROTECTED]

 This patch enables pci device assignment based on VT-d support.
 When a device is assigned to the guest, the guest memory is pinned
 and the mapping is updated in the VT-d IOMMU.


I am afraid there still some compatible problem...

 Signed-off-by: Kay, Allen M [EMAIL PROTECTED]
 Signed-off-by: Weidong Han [EMAIL PROTECTED]
 Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
 ---
  arch/x86/kvm/Makefile  |3 +
  arch/x86/kvm/vtd.c |  203
[snip]
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index a97157c..5cfc21a 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -35,6 +35,7 @@
  #include linux/module.h
  #include linux/mman.h
  #include linux/highmem.h
 +#include linux/intel-iommu.h

This broken external kernel modules before 2.6.27... If we wrapped it 
with CONFIG_DMAR, it would also broken the commit before the patch 
checked in and after DMAR enabled in kernel... Need a version number 
judgement?

 diff --git a/include/asm-x86/kvm_host.h
 b/include/asm-x86/kvm_host.h index ef019b5..b141949 100644
 --- a/include/asm-x86/kvm_host.h
 +++ b/include/asm-x86/kvm_host.h
 @@ -354,6 +354,7 @@ struct kvm_arch{
*/
   struct list_head active_mmu_pages;
   struct list_head assigned_dev_head;
 + struct dmar_domain *intel_iommu_domain;

Need wrapped by CONFIG_DMAR?

-- 
regards
Yang, Sheng


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] KVM: Device assignemnt with VT-d

2008-08-13 Thread Avi Kivity

Yang, Sheng wrote:

On Thursday 07 August 2008 22:14:47 Ben-Ami Yassour wrote:
  

Based on a patch by: Kay, Allen M [EMAIL PROTECTED]

This patch enables pci device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned
and the mapping is updated in the VT-d IOMMU.




I am afraid there still some compatible problem...

  

Signed-off-by: Kay, Allen M [EMAIL PROTECTED]
Signed-off-by: Weidong Han [EMAIL PROTECTED]
Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
---
 arch/x86/kvm/Makefile  |3 +
 arch/x86/kvm/vtd.c |  203


[snip]
  

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a97157c..5cfc21a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include linux/module.h
 #include linux/mman.h
 #include linux/highmem.h
+#include linux/intel-iommu.h



This broken external kernel modules before 2.6.27... If we wrapped it 
with CONFIG_DMAR, it would also broken the commit before the patch 
checked in and after DMAR enabled in kernel... Need a version number 
judgement?


  


kernel patches should not consider external module issues.  That keeps 
the code clean (at the expense of making the external module's 
maintainer's life mode difficult, but that's their problem).



diff --git a/include/asm-x86/kvm_host.h
b/include/asm-x86/kvm_host.h index ef019b5..b141949 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -354,6 +354,7 @@ struct kvm_arch{
 */
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
+   struct dmar_domain *intel_iommu_domain;



Need wrapped by CONFIG_DMAR?

  


I guess we can keep this, one pointer is not that expensive.  But we 
should make sure all the iommu functions are available when iommu is 
unconfigured.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Xen-devel] Re: Announcing: Open OVF project source code availibility

2008-08-13 Thread Ian Jackson
Daniel P. Berrange writes ([Xen-devel] Re: Announcing: Open OVF project source 
code availibility):
  Building an XML-RPC
 interface just to call into functions for manipulating OVF files is
 rather overkill. 

Also it is far from clear whether merely changing the concrete
representation of the interface (eg from in-program function call to
XML-RPC, but with identical semantics) can be used to evade the GPL's
`work as a whole' provisions in this way.

This question is disputed and as far as I know has never been tested
in court.

Ian.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] KVM: Device assignemnt with VT-d

2008-08-13 Thread Yang, Sheng
On Wednesday 13 August 2008 17:46:03 Avi Kivity wrote:
 Yang, Sheng wrote:
  On Thursday 07 August 2008 22:14:47 Ben-Ami Yassour wrote:
  Based on a patch by: Kay, Allen M [EMAIL PROTECTED]
 
  This patch enables pci device assignment based on VT-d support.
  When a device is assigned to the guest, the guest memory is
  pinned and the mapping is updated in the VT-d IOMMU.
 
  I am afraid there still some compatible problem...
 
  Signed-off-by: Kay, Allen M [EMAIL PROTECTED]
  Signed-off-by: Weidong Han [EMAIL PROTECTED]
  Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
  ---
   arch/x86/kvm/Makefile  |3 +
   arch/x86/kvm/vtd.c |  203
 
  [snip]
 
  diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
  index a97157c..5cfc21a 100644
  --- a/arch/x86/kvm/x86.c
  +++ b/arch/x86/kvm/x86.c
  @@ -35,6 +35,7 @@
   #include linux/module.h
   #include linux/mman.h
   #include linux/highmem.h
  +#include linux/intel-iommu.h
 
  This broken external kernel modules before 2.6.27... If we
  wrapped it with CONFIG_DMAR, it would also broken the commit
  before the patch checked in and after DMAR enabled in kernel...
  Need a version number judgement?

 kernel patches should not consider external module issues.  That
 keeps the code clean (at the expense of making the external
 module's maintainer's life mode difficult, but that's their
 problem).

Yeah, thanks for point it out. It's indeed complicate to consider this 
kind of issues... :)

And I think now our aptitude towards external modules is not 
encouraging? For after this patch, we can discard 
external-modules-compat.h as well. :)

-- 
regards
Yang, Sheng

  diff --git a/include/asm-x86/kvm_host.h
  b/include/asm-x86/kvm_host.h index ef019b5..b141949 100644
  --- a/include/asm-x86/kvm_host.h
  +++ b/include/asm-x86/kvm_host.h
  @@ -354,6 +354,7 @@ struct kvm_arch{
  */
 struct list_head active_mmu_pages;
 struct list_head assigned_dev_head;
  +  struct dmar_domain *intel_iommu_domain;
 
  Need wrapped by CONFIG_DMAR?

 I guess we can keep this, one pointer is not that expensive.  But
 we should make sure all the iommu functions are available when
 iommu is unconfigured.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 0/2] do not run halted vcpu's

2008-08-13 Thread Avi Kivity

Marcelo Tosatti wrote:

Avi Kivity wrote:

  

Any reason this is not in __vcpu_run()?

Our main loop could look like

while (no reason to stop)
  if (runnable)
   enter guest
  else
   block
  deal with aftermath

kvm_emulate_halt would then simply modify the mp state.



Like this?

- I don't think it is necessary to test for pending signals inside irq
safe section, so move that to exit processing.

  



It is.  We may have received a signal after ioctl processing started but 
before entry.  If we don't don't check before entry, nothing ensures 
we'll ever exit (or we may exit due to some other reason, but the exit 
will be delayed).



- Same for need_resched().

  


Incorrect for the same reason.  There's no guarantee we will ever exit 
if we ignore the rescheduling IPI.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] KVM: x86: set debug registers after schedulable section

2008-08-13 Thread Avi Kivity

Marcelo Tosatti wrote:

The vcpu thread can be preempted after the guest_debug_pre() callback,
resulting in invalid debug registers on the new vcpu.

Move it inside the non-preemptable section.
  


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/2] KVM: x86: do not execute halted vcpus (v2)

2008-08-13 Thread Avi Kivity

Avi Kivity wrote:

Marcelo Tosatti wrote:

Offline or uninitialized vcpu's can be executed if requested to perform
userspace work.
Follow Avi's suggestion to handle halted vcpu's in the main loop,
simplifying kvm_emulate_halt(). Introduce a new vcpu-requests bit to
indicate events that promote state from halted to running.

Also standardize vcpu wake sites.
  


Apart from moving the entry checks to the exit, this looks fine (if 
scary... this code is sensitive).




btw, this is a step forward for big real mode.  We can later split the 
guest entry to have an emulation path if the guest state is not hardware 
friendly.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/9] substitute is_allocated_mem with more general is_containing_region

2008-08-13 Thread Avi Kivity

Glauber Costa wrote:

Signed-off-by: Glauber Costa [EMAIL PROTECTED]
---
 libkvm/libkvm.c |   34 +-
 libkvm/libkvm.h |2 +-
 qemu/qemu-kvm.c |2 +-
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index 33f00b7..c885dee 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -140,6 +140,27 @@ int get_intersecting_slot(unsigned long phys_addr)
return -1;
 }
 
+/* Returns -1 if this slot is not totally contained on any other,

+ * and the number of the slot otherwise */
+int get_container_slot(uint64_t phys_addr, unsigned long size)
+{
+   int i;
+
+   for (i = 0; i  KVM_MAX_NUM_MEM_REGIONS ; ++i)
+   if (slots[i].len  slots[i].phys_addr = phys_addr 
+   (slots[i].phys_addr + slots[i].len) = phys_addr + size)
+   return i;
+   return -1;
+}
+
  


What about partially containing (or: overlapping) slots?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC 0/9] Memory registration rework

2008-08-13 Thread Avi Kivity

Glauber Costa wrote:

Hi folks,

The following series contain a proposal for our memory registration
framework. This is by no means complete, and rather, a first step only.

This first step, btw, has the goal of taking the kvm-specific memory 
registration
functions from all over the code, so we can make the merging with qemu easier.

Note that I'm putting kvm_cpu_register_phys_memory() _inside_ 
cpu_register_phys_memory().
To do that, we need to be resilient against the same region being registered 
multiple times,
and should be able to interpret the flags embedded in phys_offset in a 
meaninful way.
Although arguably with some bugs yet unknown, this series does exactly that.

For that to work, we have to be sure that we'll never reach a situation in 
which we
register a piece of memory, and later on, register another region that contains 
it. Current
code does that, so we're fine. The oposite situation, namely, registering a 
large piece of memory
and then re-registering pieces of it, is perfectly valid.

In the to-be-merged version, if it ever exists, I intend to comment all those 
issues very well,
to get an as predictable interface as possible.

There's another option of doing this, as anthony pointed out in earlier private 
comments to me,
which is scanning the already registered regions right before starting 
execution, and building our
maps. While this is valid, we can't run away from doing what I'm doing, because 
some areas are
manipulated _after_ the machine has started. For example, the pci region, for 
the hotplug case.

Note that this is not tested in anything but x86. 

  


Looks good.  The current duplication of memory registration is very 
annoying.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/9] substitute is_allocated_mem with more general is_containing_region

2008-08-13 Thread Glauber Costa
On Wed, Aug 13, 2008 at 8:41 AM, Avi Kivity [EMAIL PROTECTED] wrote:
 Glauber Costa wrote:

 Signed-off-by: Glauber Costa [EMAIL PROTECTED]
 ---
  libkvm/libkvm.c |   34 +-
  libkvm/libkvm.h |2 +-
  qemu/qemu-kvm.c |2 +-
  3 files changed, 23 insertions(+), 15 deletions(-)

 diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
 index 33f00b7..c885dee 100644
 --- a/libkvm/libkvm.c
 +++ b/libkvm/libkvm.c
 @@ -140,6 +140,27 @@ int get_intersecting_slot(unsigned long phys_addr)
return -1;
  }
  +/* Returns -1 if this slot is not totally contained on any other,
 + * and the number of the slot otherwise */
 +int get_container_slot(uint64_t phys_addr, unsigned long size)
 +{
 +   int i;
 +
 +   for (i = 0; i  KVM_MAX_NUM_MEM_REGIONS ; ++i)
 +   if (slots[i].len  slots[i].phys_addr = phys_addr 
 +   (slots[i].phys_addr + slots[i].len) = phys_addr +
 size)
 +   return i;
 +   return -1;
 +}
 +


 What about partially containing (or: overlapping) slots?

That would be handled by kvm_is_intersecting_memory. Ideally, I would
like to have a single function for both.
Now that the default action for intersecting memory is to do nothing
but return, that would be possible.

I'll make sure I address that in a later version

 --
 error compiling committee.c: too many arguments to function

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html




-- 
Glauber Costa.
Free as in Freedom
http://glommer.net

The less confident you are, the more serious you have to act.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: Device assignment: Check for privileges before assigning irq

2008-08-13 Thread Amit Shah
Even though we don't share irqs at the moment, we should ensure
regular user processes don't try to allocate system resources.

We check for capability to access IO devices (CAP_SYS_RAWIO) before
we request_irq on behalf of the guest.

Noticed by Avi.

Signed-off-by: Amit Shah [EMAIL PROTECTED]
---
 arch/x86/kvm/x86.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ee005a6..fb32c3d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -191,6 +191,11 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
  kvm_assigned_dev_interrupt_work_handler);
 
if (irqchip_in_kernel(kvm)) {
+   if (!capable(CAP_SYS_RAWIO)) {
+   return -EPERM;
+   goto out;
+   }
+
if (assigned_irq-host_irq)
match-host_irq = assigned_irq-host_irq;
else
-- 
1.5.6.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/9] move kvm_cpu_register_memory_area into qemu's

2008-08-13 Thread Anthony Liguori

Glauber Costa wrote:

Turn the explicit calls to kvm_cpu_register_memoy_area()
an empty function. Provide a __kvm_cpu_register_memory_area()
that is called from within cpu_register_memory_area().
To avoid registering mmio regions to the hypervisor, since we depend on
them faulting, we keep track of what regions are mmio regions too.

This is to be bisection friendly. Direct calls are to be removed
in a later commit.

diff --git a/qemu/exec.c b/qemu/exec.c
index 7a68062..14c3852 100644
--- a/qemu/exec.c
+++ b/qemu/exec.c
@@ -2196,6 +2196,9 @@ void cpu_register_physical_memory(target_phys_addr_t 
start_addr,
 kqemu_set_phys_mem(start_addr, size, phys_offset);
 }
 #endif
+
+__kvm_cpu_register_physical_memory(start_addr, size, phys_offset);

  


This is a great place to add a callback of some sort (like QEMUAccel).  
Then it can be shared by both kqemu and kvm.


Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/4] Use proper open call for init file

2008-08-13 Thread Anthony Liguori

Avi Kivity wrote:

Philippe Gerum wrote:

This patch fixes misspelled calls to qemu_fopen_file().

  


Please send to qemu-devel as this is a generic qemu issue, not kvm 
specific.


qemu_fopen_file() only exists in KVM (it's one of the live migration 
changes).


Regards,

Anthony Liguori


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] kvm: qemu: Handle tap fds with IFF_VNET_HDR

2008-08-13 Thread Avi Kivity

Mark McLoughlin wrote:

Okay, I've just sent a patch to add TUNGETIFF:

  http://marc.info/?l=linux-netdevm=121863813904363

See below patch for how we'd use it. Don't apply this until the kernel
patch is accepted, obviously.

  


Please repost once that's in.


The -help string is what libvirt would use to detect whether qemu has
support for this; clearly libvirt would also need to detect the
availability of TUNGETIFF in the running kernel.

  


Neat trick.  It would be nice to have a formal qemu capabilities query, 
like we have for kvm (and a proper forwards and backwards compatible 
qemu monitor protocol, and...)


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] kvm: qemu: Handle tap fds with IFF_VNET_HDR

2008-08-13 Thread Daniel P. Berrange
On Wed, Aug 13, 2008 at 07:24:30PM +0300, Avi Kivity wrote:
 Mark McLoughlin wrote:
 Okay, I've just sent a patch to add TUNGETIFF:
 
   http://marc.info/?l=linux-netdevm=121863813904363
 
 See below patch for how we'd use it. Don't apply this until the kernel
 patch is accepted, obviously.
 
   
 
 Please repost once that's in.
 
 The -help string is what libvirt would use to detect whether qemu has
 support for this; clearly libvirt would also need to detect the
 availability of TUNGETIFF in the running kernel.
 
   
 
 Neat trick.  It would be nice to have a formal qemu capabilities query, 
 like we have for kvm (and a proper forwards and backwards compatible 
 qemu monitor protocol, and...)

s/neat trick/sick hack/ but it has been working reasonably well so far. We
have maintained compat with QEMU from 0.8.0 onwards.

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


KVM Guest detection

2008-08-13 Thread jd
Hi

Is there a way to detect if a particular machine is a VM running on KVM 
hypervisor or not. i.e from within a guest can one figure out if it is running 
on top of a hypervisor and not physical machine ? [For both Linux guests and 
Windows guests ]

/Jd



  
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM Guest detection

2008-08-13 Thread Anthony Liguori

jd wrote:

Hi

Is there a way to detect if a particular machine is a VM running on KVM 
hypervisor or not. i.e from within a guest can one figure out if it is running 
on top of a hypervisor and not physical machine ? [For both Linux guests and 
Windows guests ]
  


The preferred way of doing this is checking CPUID leaf 0x4000  for 
the signature KVMKVMKVM.


Most hypervisors (KVM, Xen, VMware, and Hyper-V) are adopting this as 
the preferred detection mechanism.


Regards,

Anthony Liguori


/Jd



  
--

To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: SR-IOV: patches are available for Linux kernel [4/4]

2008-08-13 Thread Randy Dunlap
On Tue, 12 Aug 2008 16:46:39 +0800 Zhao, Yu wrote:

 [PATCH 4/4] PCI: document SR-IOV
 
 SR-IOV Documentation.
 
 Signed-off-by: Yu Zhao [EMAIL PROTECTED]
 Signed-off-by:  Eddie Dong [EMAIL PROTECTED]
 
 ---
  Documentation/ABI/testing/sysfs-bus-pci |   13 ++
  Documentation/PCI/00-INDEX  |2 
  Documentation/PCI/pci-iov-howto.txt |  170 
 +++
  3 files changed, 185 insertions(+), 0 deletions(-)
 
 diff --git a/Documentation/ABI/testing/sysfs-bus-pci 
 b/Documentation/ABI/testing/sysfs-bus-pci
 index ceddcff..9ada27b 100644
 --- a/Documentation/ABI/testing/sysfs-bus-pci
 +++ b/Documentation/ABI/testing/sysfs-bus-pci
 @@ -9,3 +9,16 @@ Description:
   that some devices may have malformatted data.  If the
   underlying VPD has a writable section then the
   corresponding section of this file will be writable.
 +
 +What:/sys/bus/pci/devices/.../iov
 +Date:August 2008
 +Contact: Yu Zhao [EMAIL PROTECTED]
 +Description:
 + This file will appear when SR-IOV capability is enabled
 + by the device driver if supported. It holds number of
 + available Virtual Functions and Bus, Device, Function
 + number and status of these Virtual Functions that belong
 + to this device (Physical Function). This file can be

This one file contains available VFs, Bus:dev:Func number, and status?
Sounds like a misuse (abuse) of sysfs one value per file mantra, but I'll
read below to see how it's done.

[added GregKH to cc: list]


 + written using same format as what can be read out, to
 + change the number of available Virtual Functions and to
 + enable or disable a Virtual Functions.

 diff --git a/Documentation/PCI/pci-iov-howto.txt 
 b/Documentation/PCI/pci-iov-howto.txt
 new file mode 100644
 index 000..2d7ae64
 --- /dev/null
 +++ b/Documentation/PCI/pci-iov-howto.txt
 @@ -0,0 +1,170 @@
 + PCI Express Single Root I/O Virtualization HOWTO
 + Copyright (C) 2008 Intel Corporation
 + Yu Zhao [EMAIL PROTECTED]
 +
 +
 +1. Overview
 +
 +1.1 What is SR-IOV
 +
 +SR-IOV is PCI Express Extended Capability, which makes one physical device

   SR-IOV is a PCI Express Extended Capability which makes one physical device

 +becomes multiple virtual devices. The physical device is referred as Physical

   become | appear as | function as multiple virtual devices.
 The physical device is referred to as the 
Physical

 +Function while the virtual devices are refereed as Virtual Functions.

  referred to as Virtual Functions.

 +Allocation of Virtual Functions can be dynamically controlled by Physical
 +Function via registers encapsulated in the capability. By default, this
 +feature is not enabled and the Physical Function behaves as traditional PCIe
 +device. Once it's turned on, each Virtual Function's PCI configuration space
 +can be accessed by its own Bus, Device and Function Number (Routing ID). And
 +each Virtual Function also has PCI Memory Space, which is used to map its
 +register set. Virtual Function device driver operates on the register set so
 +it can be functional and appear as a real existing PCI device.
 +
 +1.2 What is ARI
 +
 +Alternative Routing-ID Interpretation allows a PCI Express Endpoint to use
 +its device number field as part of function number. Traditionally, an
 +Endpoint can only have 8 functions, and the device number of all Endpoints
 +is zero. With ARI enabled, an Endpoint can have up to 256 functions. ARI is
 +managed via a ARI Forwarding bit in the Device Capabilities 2 register of

   managed via the ARI Forwarding bit

 +the PCI Express Capability on the Root Port or the Downstream Port and a new
 +ARI Capability on the Endpoint.
 +
 +
 +2. User Guide
 +
 +2.1 How can I manage SR-IOV
 +
 +SR-IOV can be managed by reading or writing /sys/bus/pci/devices/.../iov.
 +Legal operations on this file include:
 + - Read: will get number of available VFs and a list of them.
 + - Write: bb:dd.f={1|0} will enable or disable a VF.
 + - Write: NumVFs=N will change number of available VFs.
 +
 +2.2 How can I use Virtual Functions
 +
 +Virtual Functions can be treated as hot-plugged PCI devices in the kernel,
 +so they should be able to work in the same way as real PCI devices.
 +NOTE: Virtual Function device driver must be loaded to make it work.
 +
 +
 +3. Developer Guide
 +
 +3.1 SR-IOV APIs
 +
 +To enable SR-IOV, Physical Function device driver needs to call:
 + int pci_iov_enable(struct pci_dev *dev, int nvfs,
 + int (*cb)(struct pci_dev *, int, int))
 +NOTE: this function sleeps 2 seconds waiting on hardware transaction
 +completion according to SR-IOV specification.
 +
 +To disable SR-IOV, Physical Function device driver needs to 

Re: KVM Guest detection

2008-08-13 Thread jd
Thanks. Good to know that most hypervisors are using this mechanism.

/Jd

--- On Wed, 8/13/08, Anthony Liguori [EMAIL PROTECTED] wrote:

 From: Anthony Liguori [EMAIL PROTECTED]
 Subject: Re: KVM  Guest detection
 To: [EMAIL PROTECTED]
 Cc: KVM List kvm@vger.kernel.org
 Date: Wednesday, August 13, 2008, 11:59 AM
 jd wrote:
  Hi
 
  Is there a way to detect if a particular machine is a
 VM running on KVM hypervisor or not. i.e from within a guest
 can one figure out if it is running on top of a hypervisor
 and not physical machine ? [For both Linux guests and
 Windows guests ]

 
 The preferred way of doing this is checking CPUID leaf
 0x4000  for 
 the signature KVMKVMKVM.
 
 Most hypervisors (KVM, Xen, VMware, and Hyper-V) are
 adopting this as 
 the preferred detection mechanism.
 
 Regards,
 
 Anthony Liguori
 
  /Jd
 
 
 

  --
  To unsubscribe from this list: send the line
 unsubscribe kvm in
  the body of a message to [EMAIL PROTECTED]
  More majordomo info at 
 http://vger.kernel.org/majordomo-info.html

 
 --
 To unsubscribe from this list: send the line
 unsubscribe kvm in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at 
 http://vger.kernel.org/majordomo-info.html


  
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: bios: Put AP boot up code to 0x1000

2008-08-13 Thread Sebastian Herbszt

Avi Kivity wrote:


The AP enters HALT state with IF=0. What can make it resume at the
following jmp 1b? NMI? SMI?



Yes, but also INIT.


Are you sure the AP will continue after the hlt instruction on INIT?
I think it will start to execute code from the reset vector (0xfff0).


We might ignore IF=0 for hlt (which would be a bug).


I think i read something about a need for 4kb alignment for the
startup vector in the mp specs. AP_BOOT_ADDR is currently
at 0x1 (16 x 4kb).



Yes, the SIPI address is 4K x sipi vector.



- Sebastian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: SR-IOV: patches are available for Linux kernel [4/4]

2008-08-13 Thread Greg KH
On Wed, Aug 13, 2008 at 01:09:16PM -0700, Randy Dunlap wrote:
 On Tue, 12 Aug 2008 16:46:39 +0800 Zhao, Yu wrote:
 
  [PATCH 4/4] PCI: document SR-IOV
  
  SR-IOV Documentation.
  
  Signed-off-by: Yu Zhao [EMAIL PROTECTED]
  Signed-off-by:  Eddie Dong [EMAIL PROTECTED]
  
  ---
   Documentation/ABI/testing/sysfs-bus-pci |   13 ++
   Documentation/PCI/00-INDEX  |2 
   Documentation/PCI/pci-iov-howto.txt |  170 
  +++
   3 files changed, 185 insertions(+), 0 deletions(-)
  
  diff --git a/Documentation/ABI/testing/sysfs-bus-pci 
  b/Documentation/ABI/testing/sysfs-bus-pci
  index ceddcff..9ada27b 100644
  --- a/Documentation/ABI/testing/sysfs-bus-pci
  +++ b/Documentation/ABI/testing/sysfs-bus-pci
  @@ -9,3 +9,16 @@ Description:
  that some devices may have malformatted data.  If the
  underlying VPD has a writable section then the
  corresponding section of this file will be writable.
  +
  +What:  /sys/bus/pci/devices/.../iov
  +Date:  August 2008
  +Contact:   Yu Zhao [EMAIL PROTECTED]
  +Description:
  +   This file will appear when SR-IOV capability is enabled
  +   by the device driver if supported. It holds number of
  +   available Virtual Functions and Bus, Device, Function
  +   number and status of these Virtual Functions that belong
  +   to this device (Physical Function). This file can be
 
 This one file contains available VFs, Bus:dev:Func number, and status?
 Sounds like a misuse (abuse) of sysfs one value per file mantra, but I'll
 read below to see how it's done.
 
 [added GregKH to cc: list]

I agree, why not just display the device tree of available devices like
all other busses do?

thanks,

greg k-h
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC][PATCH v2] VMX: Invalid guest state emulation

2008-08-13 Thread Mohammed Gamal
This patch aims to allow emulation whenever guest state is not valid for VMX 
operation. 
This usually happens in mode switches with guests such as older versions of 
gfxboot and FreeDOS with HIMEM.

The patch aims to address this issue, it introduces the following:

- A function that invokes the x86 emulator when the guest state is not valid 
(borrowed from Guillaume Thouvenin's real mode patches)
- A function that checks that guest register state is VMX compliant
- A module parameter that enables these operations. It is disabled by default, 
in order not to intervene with KVM's normal operation

This version adds the following:

- An emulation required flag, which is set on mode switches
- Improved guest state checking functions
- Emulation is done on guest entry, rather than directly on mode switching 
utilising the emulation flag.

Signed-off-by: Laurent Vivier [EMAIL PROTECTED]
Signed-off-by: Guillaume Thouvenin [EMAIL PROTECTED]
Signed-off-by: Mohammed Gamal [EMAIL PROTECTED]
---
 arch/x86/kvm/vmx.c |  511 +++-
 1 files changed, 381 insertions(+), 130 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c4510fe..bc23db4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -49,6 +49,9 @@ module_param(flexpriority_enabled, bool, 0);
 static int enable_ept = 1;
 module_param(enable_ept, bool, 0);
 
+static int emulate_invalid_guest_state = 0;
+module_param(emulate_invalid_guest_state, bool, 0);
+
 struct vmcs {
u32 revision_id;
u32 abort;
@@ -86,6 +89,7 @@ struct vcpu_vmx {
} irq;
} rmode;
int vpid;
+   bool emulation_required;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -1294,7 +1298,9 @@ static void fix_pmode_dataseg(int seg, struct 
kvm_save_segment *save)
 static void enter_pmode(struct kvm_vcpu *vcpu)
 {
unsigned long flags;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+   vmx-emulation_required = 1;
vcpu-arch.rmode.active = 0;
 
vmcs_writel(GUEST_TR_BASE, vcpu-arch.rmode.tr.base);
@@ -1311,17 +1317,19 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
update_exception_bitmap(vcpu);
 
-   fix_pmode_dataseg(VCPU_SREG_ES, vcpu-arch.rmode.es);
-   fix_pmode_dataseg(VCPU_SREG_DS, vcpu-arch.rmode.ds);
-   fix_pmode_dataseg(VCPU_SREG_GS, vcpu-arch.rmode.gs);
-   fix_pmode_dataseg(VCPU_SREG_FS, vcpu-arch.rmode.fs);
+   if(!emulate_invalid_guest_state) {
+   fix_pmode_dataseg(VCPU_SREG_ES, vcpu-arch.rmode.es);
+   fix_pmode_dataseg(VCPU_SREG_DS, vcpu-arch.rmode.ds);
+   fix_pmode_dataseg(VCPU_SREG_GS, vcpu-arch.rmode.gs);
+   fix_pmode_dataseg(VCPU_SREG_FS, vcpu-arch.rmode.fs);
 
-   vmcs_write16(GUEST_SS_SELECTOR, 0);
-   vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
+   vmcs_write16(GUEST_SS_SELECTOR, 0);
+   vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
 
-   vmcs_write16(GUEST_CS_SELECTOR,
-vmcs_read16(GUEST_CS_SELECTOR)  ~SELECTOR_RPL_MASK);
-   vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+   vmcs_write16(GUEST_CS_SELECTOR,
+vmcs_read16(GUEST_CS_SELECTOR)  
~SELECTOR_RPL_MASK);
+   vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+   }
 }
 
 static gva_t rmode_tss_base(struct kvm *kvm)
@@ -1351,7 +1359,9 @@ static void fix_rmode_seg(int seg, struct 
kvm_save_segment *save)
 static void enter_rmode(struct kvm_vcpu *vcpu)
 {
unsigned long flags;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+   vmx-emulation_required = 1;
vcpu-arch.rmode.active = 1;
 
vcpu-arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
@@ -1373,20 +1383,22 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
update_exception_bitmap(vcpu);
 
-   vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE)  4);
-   vmcs_write32(GUEST_SS_LIMIT, 0x);
-   vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
+   if(!emulate_invalid_guest_state) {
+   vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE)  4);
+   vmcs_write32(GUEST_SS_LIMIT, 0x);
+   vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
 
-   vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
-   vmcs_write32(GUEST_CS_LIMIT, 0x);
-   if (vmcs_readl(GUEST_CS_BASE) == 0x)
-   vmcs_writel(GUEST_CS_BASE, 0xf);
-   vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE)  4);
+   vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
+   vmcs_write32(GUEST_CS_LIMIT, 0x);
+   if (vmcs_readl(GUEST_CS_BASE) == 0x)
+   vmcs_writel(GUEST_CS_BASE, 0xf);
+   vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE)  4);
 
-   fix_rmode_seg(VCPU_SREG_ES, vcpu-arch.rmode.es);
-   fix_rmode_seg(VCPU_SREG_DS, 

[PATCH 4/5] uhci: rewrite UHCI emulator, fully async operation with multiple outstanding transactions

2008-08-13 Thread Max Krasnyansky
This is esentially a re-write of the QEMU UHCI layer. My initial goal
was to support fully async operation with multiple outstanding async
transactions. Along the way I realized that I can greatly simplify
and cleanup the overall logic. There was a lot of duplicate and confusing
code in the UHCI data structure parsing and other places.
We were actually violating UHCI spec in handling async ISOC transaction
(host controller is not supposed to write into the frame pointer).

The reason I wanted to support fully async operation is because current
synchronous version is unusable with most devices exported from host
(via usb-linux.c). Transactions take a long time and the whole VM becomes
slow as hell.

Current async support is very rudimentory and for the most part
non-functional. Single transaction at a time is simply not enough. I have
a device for which XP driver submits both IN and OUT packets at the same
time. IN packet always times out unless OUT packet makes it to the device.
Hence we must be able to process both in order for that device to work.

The new code is backwards compatible and was first tested agains original
synchronous usb-linux.c and builtin usb devices like tablet which is also
synchronous. Rewrite of the usb-linux.c is coming up next.

Async support was tested against various XP versions (ie XP, SP2, SP3) and
a bunch of different USB devices: serial port controllers, mice, keyboard,
JTAG dongles (from Xilinx and Altera).

ISOC support was only lighly tested and needs more work. It's not any worse
than current code though.

UHCI parser changes are probably somewhat hard to review without the
understanding of the UHCI spec.
The async design should be fairly easy to follow. Basically we have a list
of async objects for each pending transfer. Async objects are tagged with
the original TD (transfer descriptor) address and token. We now support
unlimited number of outstanding isoc and one outstanding bulk/intr/ctrl
transfer per QH (queue head). UHCI spec does not have a clear protocol for
the cancelation of the trasfer requests. Driver can yank out TDs on any
frame boundary. In oder to handle that I added somewhat fancy TD validation
logic logic to avoid unnecessary cancelations.

Signed-off-by: Max Krasnyansky [EMAIL PROTECTED]
---
 hw/usb-uhci.c |  897 ++---
 1 files changed, 533 insertions(+), 364 deletions(-)

diff --git a/hw/usb-uhci.c b/hw/usb-uhci.c
index b55fd84..c6e7751 100644
--- a/hw/usb-uhci.c
+++ b/hw/usb-uhci.c
@@ -3,6 +3,10 @@
  *
  * Copyright (c) 2005 Fabrice Bellard
  *
+ * Copyright (c) 2008 Max Krasnyansky
+ * Magor rewrite of the UHCI data structures parser and frame processor
+ * Support for fully async operation and multiple outstanding transactions
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the Software), to 
deal
  * in the Software without restriction, including without limitation the rights
@@ -27,8 +31,7 @@
 #include qemu-timer.h
 
 //#define DEBUG
-//#define DEBUG_PACKET
-//#define DEBUG_ISOCH
+//#define DEBUG_DUMP_DATA
 
 #define UHCI_CMD_FGR  (1  4)
 #define UHCI_CMD_EGSM (1  3)
@@ -66,6 +69,52 @@
 
 #define NB_PORTS 2
 
+#ifdef DEBUG
+#define dprintf printf
+
+const char *pid2str(int pid)
+{
+switch (pid) {
+case USB_TOKEN_SETUP: return SETUP;
+case USB_TOKEN_IN:return IN;
+case USB_TOKEN_OUT:   return OUT;
+}
+return ?;
+}
+
+#else
+#define dprintf(...)
+#endif
+
+#ifdef DEBUG_DUMP_DATA
+static void dump_data(const uint8_t *data, int len)
+{
+int i;
+
+printf(uhci: data: );
+for(i = 0; i  len; i++)
+printf( %02x, data[i]);
+printf(\n);
+}
+#else
+static void dump_data(const uint8_t *data, int len) {}
+#endif
+
+/* 
+ * Pending async transaction.
+ * 'packet' must be the first field because completion
+ * handler does (UHCIAsync *) pkt cast.
+ */
+typedef struct UHCIAsync {
+USBPacket packet;
+struct UHCIAsync *next;
+uint32_t  td;
+uint32_t  token;
+int8_tvalid;
+uint8_t   done;
+uint8_t   buffer[2048];
+} UHCIAsync;
+
 typedef struct UHCIPort {
 USBPort port;
 uint16_t ctrl;
@@ -85,16 +134,10 @@ typedef struct UHCIState {
 
 /* Interrupts that should be raised at the end of the current frame.  */
 uint32_t pending_int_mask;
-/* For simplicity of implementation we only allow a single pending USB
-   request.  This means all usb traffic on this controller is effectively
-   suspended until that transfer completes.  When the transfer completes
-   the next transfer from that queue will be processed.  However
-   other queues will not be processed until the next frame.  The solution
-   is to allow multiple pending requests.  */
-uint32_t async_qh;
-uint32_t async_frame_addr;
-USBPacket usb_packet;
-uint8_t usb_buf[2048];
+
+/* Active packets */
+

[PATCH 5/5] husb: rewrite Linux host USB layer, fully async operation

2008-08-13 Thread Max Krasnyansky
This is a follow up to the async UHCI patch. Both BULK and ISOC transactions
are now fully asynchrounous. I left CONTROL synchronous for now, ideally
we want it to be async too and it should not be that hard to do now.

This patch obviously requires UHCI patch. The combo has been tested with
various devices. See the UHCI patch description for list of the devices.
Most of the testing was done with the KVM flavor of QEMU.

Signed-off-by: Max Krasnyansky [EMAIL PROTECTED]
---
 usb-linux.c |  460 +++
 1 files changed, 182 insertions(+), 278 deletions(-)

diff --git a/usb-linux.c b/usb-linux.c
index 622255c..786ef1b 100644
--- a/usb-linux.c
+++ b/usb-linux.c
@@ -3,8 +3,9 @@
  *
  * Copyright (c) 2005 Fabrice Bellard
  *
- * Support for host device auto connect  disconnect
- *   Copyright (c) 2008 Max Krasnyansky
+ * Copyright (c) 2008 Max Krasnyansky
+ *  Support for host device auto connect  disconnect
+ *  Magor rewrite to support fully async operation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the Software), to 
deal
@@ -55,12 +56,15 @@ static int usb_host_find_device(int *pbus_num, int *paddr,
 const char *devname);
 
 //#define DEBUG
-//#define DEBUG_ISOCH
-//#define USE_ASYNCIO
+
+#ifdef DEBUG
+#define dprintf printf
+#else
+#define dprintf(...)
+#endif
 
 #define USBDEVFS_PATH /proc/bus/usb
 #define PRODUCT_NAME_SZ 32
-#define SIG_ISOCOMPLETE (SIGRTMIN+7)
 #define MAX_ENDPOINTS 16
 
 struct sigaction sigact;
@@ -68,21 +72,18 @@ struct sigaction sigact;
 /* endpoint association data */
 struct endp_data {
 uint8_t type;
+uint8_t halted;
 };
 
-
-
-/* FIXME: move USBPacket to PendingURB */
 typedef struct USBHostDevice {
 USBDevice dev;
-int fd;
-int pipe_fds[2];
-USBPacket *packet;
+int   fd;
+
+uint8_t   descr[1024];
+int   descr_len;
+int   configuration;
+
 struct endp_data endp_table[MAX_ENDPOINTS];
-int configuration;
-uint8_t descr[1024];
-int descr_len;
-int urbs_ready;
 
 QEMUTimer *timer;
 
@@ -93,6 +94,26 @@ typedef struct USBHostDevice {
 struct USBHostDevice *next;
 } USBHostDevice;
 
+static int is_isoc(USBHostDevice *s, int ep)
+{
+return s-endp_table[ep - 1].type == USBDEVFS_URB_TYPE_ISO;
+}
+
+static int is_halted(USBHostDevice *s, int ep)
+{
+return s-endp_table[ep - 1].halted;
+}
+
+static void clear_halt(USBHostDevice *s, int ep)
+{
+s-endp_table[ep - 1].halted = 0;
+}
+
+static void set_halt(USBHostDevice *s, int ep)
+{
+s-endp_table[ep - 1].halted = 1;
+}
+
 static USBHostDevice *hostdev_list;
 
 static void hostdev_link(USBHostDevice *dev)
@@ -128,64 +149,94 @@ static USBHostDevice *hostdev_find(int bus_num, int addr)
 return NULL;
 }
 
-typedef struct PendingURB {
-struct usbdevfs_urb *urb;
-int status;
-struct PendingURB *next;
-} PendingURB;
+/* 
+ * Async URB state.
+ * We always allocate one isoc descriptor even for bulk transfers
+ * to simplify allocation and casts. 
+ */
+typedef struct AsyncURB
+{
+struct usbdevfs_urb urb;
+struct usbdevfs_iso_packet_desc isocpd;
 
-static PendingURB *pending_urbs = NULL;
+USBPacket *packet;
+USBHostDevice *hdev;
+} AsyncURB;
 
-static int add_pending_urb(struct usbdevfs_urb *urb)
+static AsyncURB *async_alloc(void)
 {
-PendingURB *purb = qemu_mallocz(sizeof(PendingURB));
-if (purb) {
-purb-urb = urb;
-purb-status = 0;
-purb-next = pending_urbs;
-pending_urbs = purb;
-return 1;
-}
-return 0;
+return (AsyncURB *) qemu_mallocz(sizeof(AsyncURB));
 }
 
-static int del_pending_urb(struct usbdevfs_urb *urb)
+static void async_free(AsyncURB *aurb)
 {
-PendingURB *purb = pending_urbs;
-PendingURB *prev = NULL;
+qemu_free(aurb);
+}
 
-while (purb  purb-urb != urb) {
-prev = purb;
-purb = purb-next;
-}
+static void async_complete(void *opaque)
+{
+USBHostDevice *s = opaque;
+AsyncURB *aurb;
+
+while (1) {
+   USBPacket *p;
 
-if (purb  purb-urb == urb) {
-if (prev) {
-prev-next = purb-next;
-} else {
-pending_urbs = purb-next;
+   int r = ioctl(s-fd, USBDEVFS_REAPURBNDELAY, aurb);
+if (r  0) {
+if (errno == EAGAIN)
+return;
+
+if (errno == ENODEV) {
+printf(husb: device %d.%d disconnected\n, s-bus_num, 
s-addr);
+   usb_device_del_addr(0, s-dev.addr);
+return;
+}
+
+dprintf(husb: async. reap urb failed errno %d\n, errno);
+return;
 }
-qemu_free(purb);
-return 1;
+
+p = aurb-packet;
+
+   dprintf(husb: async completed. aurb %p status %d alen %d\n, 
+aurb, aurb-urb.status,