[Qemu-devel] [PATCH] Out off array access in usb-net

2010-11-08 Thread Gleb Natapov
Properly check array bounds before accessing array element.

Signed-off-by: Gleb Natapov 
diff --git a/hw/usb-net.c b/hw/usb-net.c
index 70f9263..84e2d79 100644
--- a/hw/usb-net.c
+++ b/hw/usb-net.c
@@ -1142,7 +1142,7 @@ static int usb_net_handle_control(USBDevice *dev, int 
request, int value,
 break;
 
 default:
-if (usb_net_stringtable[value & 0xff]) {
+if (ARRAY_SIZE(usb_net_stringtable) > (value & 0xff)) {
 ret = set_usb_string(data,
 usb_net_stringtable[value & 0xff]);
 break;
--
Gleb.



Re: [Qemu-devel] smp: linux guest only sees 1 CPU when not using KVM

2010-11-08 Thread Gleb Natapov
On Mon, Nov 08, 2010 at 11:30:52PM +0100, Lluís wrote:
> Lennart Sorensen writes:
> 
> > On Mon, Nov 08, 2010 at 10:57:19PM +0100, Lluís wrote:
> >> Well, thre's nothing more to add than the contents in the subject.
> >> 
> >> I tried booting up with -smp 2, and /proc/cpuinfo on the linux guest
> >> only return one CPU, while booting with "-smp 2 -enable-kvm" shows 2
> >> CPUs.
> 
> > I was under the impression qemu didn't emulate SMP.  KVM on the other
> > hand does support running on real SMP hardware.  It doesn't have to
> > emulate SMP after all.
> 
> Well, I thought it did when I saw the "-smp" option, and it does in fact
> create multiple CPUState objects, as if the "extra" cpus where
> unplugged.
> 
> In fact, cpus.c:cpu_exec_all does indeed loop through all the CPUState
> objects, calling qemu_cpu_exec on each.
> 
Yes. Qemu should emulate smp fine. What "info cpus" in monitor shows?
Anything interesting in dmesg?

--
Gleb.



[Qemu-devel] Re: [SeaBIOS] [PATCH] fix virtio-blk failure after reboot

2010-11-08 Thread Gleb Natapov
On Mon, Nov 08, 2010 at 06:59:37PM -0500, Kevin O'Connor wrote:
> On Wed, Sep 15, 2010 at 06:31:44PM +0200, Gleb Natapov wrote:
> > vring_virtqueue should be zeroed otherwise old values will be reused
> > after reboot.
> > 
> > Signed-off-by: Gleb Natapov 
> > diff --git a/src/virtio-blk.c b/src/virtio-blk.c
> > index 34d7863..7a25826 100644
> > --- a/src/virtio-blk.c
> > +++ b/src/virtio-blk.c
> > @@ -109,6 +109,7 @@ init_virtio_blk(u16 bdf)
> >  goto fail;
> >  }
> >  memset(vdrive_g, 0, sizeof(*vdrive_g));
> > +memset(vq, 0, sizeof(*vq));
> >  vdrive_g->drive.type = DTYPE_VIRTIO;
> >  vdrive_g->drive.cntl_id = bdf;
> >  vdrive_g->vq = vq;
> 
> This didn't make it into SeaBIOS v0.6.1.  Should we add this to the
> stable branch as v0.6.1.2?  Any other bugfixes that need to go in to
> the stable branch (maybe Isaku's pci overflow patches)?
> 
Yes. Please add it to stable branch.

--
Gleb.



Re: [Qemu-devel] [PATCH 3/3] Add helper functions to enable virtio-9p make use of the threadlets

2010-11-08 Thread Venkateswararao Jujjuri (JV)
On 11/8/2010 2:47 AM, Arun R Bharadwaj wrote:
> From: Gautham R Shenoy 
> 
> infrastructure for offloading blocking tasks such as making posix calls on
> to the helper threads and handle the post_posix_operations() from the
> context of the iothread. This frees the vcpu thread to process any other guest
> operations while the processing of v9fs_io is in progress.
> 
> Signed-off-by: Gautham R Shenoy 
> Signed-off-by: Sripathi Kodi 
> Signed-off-by: Arun R Bharadwaj 
> ---
>  hw/virtio-9p.c |  165 
> 
>  posix-aio-compat.c |   33 +++---
>  qemu-threadlets.c  |   21 +++
>  qemu-threadlets.h  |1 
>  vl.c   |3 +
>  5 files changed, 200 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
> index 7c59988..61b65fd 100644
> --- a/hw/virtio-9p.c
> +++ b/hw/virtio-9p.c
> @@ -18,6 +18,7 @@
>  #include "fsdev/qemu-fsdev.h"
>  #include "virtio-9p-debug.h"
>  #include "virtio-9p-xattr.h"
> +#include "qemu-threadlets.h"
> 
>  int debug_9p_pdu;
> 
> @@ -33,6 +34,146 @@ enum {
>  Oappend = 0x80,
>  };
> 
> +struct v9fs_post_op {
> +QTAILQ_ENTRY(v9fs_post_op) node;
> +void (*func)(void *arg);
> +void *arg;
> +};
> +
> +static struct {
> +int rfd;
> +int wfd;
> +QemuMutex lock;
> +QTAILQ_HEAD(, v9fs_post_op) post_op_list;
> +} v9fs_async_struct;
> +
> +static void die2(int err, const char *what)
> +{
> +fprintf(stderr, "%s failed: %s\n", what, strerror(err));
> +abort();
> +}
> +
> +static void die(const char *what)
> +{
> +die2(errno, what);
> +}
> +
> +#define ASYNC_MAX_PROCESS   5
> +
> +/**
> + * v9fs_process_post_ops: Process any pending v9fs_post_posix_operation
> + * @arg: Not used.
> + *
> + * This function serves as a callback to the iothread to be called into 
> whenever
> + * the v9fs_async_struct.wfd is written into. This thread goes through the 
> list
> + * of v9fs_post_posix_operations() and executes them. In the process, it 
> might
> + * queue more job on the asynchronous thread pool.
> + */
> +static void v9fs_process_post_ops(void *arg)
> +{
> +int count = 0;
> +struct v9fs_post_op *post_op;
> +int ret;
> +char byte;
> +
> +qemu_mutex_lock(&v9fs_async_struct.lock);
> +do {
> +ret = read(v9fs_async_struct.rfd, &byte, sizeof(byte));
> +} while (ret >= 0 && errno != EAGAIN);
> +
> +for (count = 0; count < ASYNC_MAX_PROCESS; count++) {
> +if (QTAILQ_EMPTY(&(v9fs_async_struct.post_op_list))) {
> +   break;
> +}
> +post_op = QTAILQ_FIRST(&(v9fs_async_struct.post_op_list));
> +QTAILQ_REMOVE(&(v9fs_async_struct.post_op_list), post_op, node);
> +
> +qemu_mutex_unlock(&v9fs_async_struct.lock);
> +post_op->func(post_op->arg);
> +qemu_free(post_op);
> +qemu_mutex_lock(&v9fs_async_struct.lock);
> +}
> +qemu_mutex_unlock(&v9fs_async_struct.lock);
> +}
> +
> +/**
> + * v9fs_async_signal: Inform the io-thread of completion of async job.
> + *
> + * This function is used to inform the iothread that a particular
> + * async-operation pertaining to v9fs has been completed and that the io 
> thread
> + * can handle the v9fs_post_posix_operation.
> + *
> + * This is based on the aio_signal_handler
> + */
> +static inline void v9fs_async_signal(void)
> +{
> +char byte = 0;
> +ssize_t ret;
> +int tries = 0;
> +
> +qemu_mutex_lock(&v9fs_async_struct.lock);
> +do {
> +assert(tries != 100);
> +   ret = write(v9fs_async_struct.wfd, &byte, sizeof(byte));
> +tries++;
> +} while (ret < 0 && errno == EAGAIN);
> +qemu_mutex_unlock(&v9fs_async_struct.lock);
> +
> +if (ret < 0 && errno != EAGAIN)
> +die("write() in v9fs");
> +
> +if (kill(getpid(), SIGUSR2)) die("kill failed");
> +}
> +
> +/**
> + * v9fs_async_helper_done: Marks the completion of the v9fs_async job
> + * @func: v9fs_post_posix_func() for post-processing invoked in the context 
> of
> + *the io-thread
> + * @arg: Argument to func.
> + *
> + * This function is called from the context of one of the asynchronous 
> threads
> + * in the thread pool. This is called when the asynchronous thread has 
> finished
> + * executing a v9fs_posix_operation. It's purpose is to initiate the process 
> of
> + * informing the io-thread that the v9fs_posix_operation has completed.
> + */
> +static void v9fs_async_helper_done(void (*func)(void *arg), void *arg)
> +{
> +struct v9fs_post_op *post_op;
> +
> +post_op = qemu_mallocz(sizeof(*post_op));
> +post_op->func = func;
> +post_op->arg = arg;
> +
> +qemu_mutex_lock(&v9fs_async_struct.lock);
> +QTAILQ_INSERT_TAIL(&(v9fs_async_struct.post_op_list), post_op, node);
> +qemu_mutex_unlock(&v9fs_async_struct.lock);
> +
> +v9fs_async_signal();
> +}
> +
> +/**
> + * v9fs_do_async_posix: Offload v9fs_posix_operation onto async thread.
> + * @vs: V9fsOPState variable for the OP o

[Qemu-devel] Re: [patch 0/3] block migration fixes

2010-11-08 Thread Yoshiaki Tamura
Marcelo Tosatti wrote:
> Following patchset fixes block migration corruption issues.

Hi Marcelo,

Thanks for looking into this issue.  Although we tried your patches, we're still
seeing the corruption.  If we execute block migration while copying a file
locally, md5sum of the copied file doesn't match with the original.  Sometimes,
the filesystem returns an I/O error.

Could you let us know how you tested and debugged?  Did you use blkverify?

Thanks,

Yoshi



Re: [Qemu-devel] Re: [PATCH] PCI: Bus number from the bridge, not the device

2010-11-08 Thread Isaku Yamahata
On Mon, Nov 08, 2010 at 06:26:33PM +0200, Michael S. Tsirkin wrote:
> Replace bus number with slot numbers of parent bridges up to the root.
> This works for root bridge in a compatible way because bus number there
> is hard-coded to 0.
> IMO nested bridges are broken anyway, no way to be compatible there.
> 
> 
> Gleb, Markus, I think the following should be sufficient for PCI.  What
> do you think?  Also - do we need to update QMP/monitor to teach them to
> work with these paths?
> 
> This is on top of Alex's patch, completely untested.
> 
> 
> pci: fix device path for devices behind nested bridges
> 
> We were using bus number in the device path, which is clearly
> broken as this number is guest-assigned for all devices
> except the root.
> 
> Fix by using hierarchical list of slots, walking the path
> from root down to device, instead. Add :00 as bus number
> so that if there are no nested bridges, this is compatible
> with what we have now.

This format, Domain:00:Slot:Slot:Slot.Function, doesn't work
because pci-to-pci bridge is pci function.
So the format should be
Domain:00:Slot.Function:Slot.Function:Slot.Function

thanks,

> 
> Signed-off-by: Michael S. Tsirkin 
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 7d12473..fa98d94 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -1826,13 +1826,45 @@ static void pcibus_dev_print(Monitor *mon, 
> DeviceState *dev, int indent)
>  
>  static char *pcibus_get_dev_path(DeviceState *dev)
>  {
> -PCIDevice *d = (PCIDevice *)dev;
> -char path[16];
> -
> -snprintf(path, sizeof(path), "%04x:%02x:%02x.%x",
> - pci_find_domain(d->bus), pci_bus_num(d->bus),
> - PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
> -
> -return strdup(path);
> +PCIDevice *d = container_of(dev, PCIDevice, qdev);
> +PCIDevice *t;
> +int slot_depth;
> +/* Path format: Domain:00:Slot:Slot:Slot.Function.
> + * 00 is added here to make this format compatible with
> + * domain:Bus:Slot.Func for systems without nested PCI bridges.
> + * Slot list specifies the slot numbers for all devices on the
> + * path from root to the specific device. */
> +int domain_len = strlen(":00");
> +int func_len = strlen(".F");
> +int slot_len = strlen(":SS");
> +int path_len;
> +char *path, *p;
> +
> +/* Calculate # of slots on path between device and root. */;
> +slot_depth = 0;
> +for (t = d; t; t = t->bus->parent_dev)
> +++slot_depth;
> +
> +path_len = domain_len + bus_len + slot_len * slot_depth + func_len;
> +
> +/* Allocate memory, fill in the terminating null byte. */
> +path = malloc(path_len + 1 /* For '\0' */);
> +path[path_len] = '\0';
> +
> +/* First field is the domain. */
> +snprintf(path, domain_len, "%04x", pci_find_domain(d->bus));
> +
> +/* Leave space for slot numbers and fill in function number. */
> +p = path + domain_len + slot_len * slot_depth;
> +snprintf(p, func_len, ".%02x", PCI_FUNC(d->devfn));
> +
> +/* Fill in slot numbers. We walk up from device to root, so need to print
> + * them in the reverse order, last to first. */
> +for (t = d; t; t = t->bus->parent_dev) {
> +p -= slot_len;
> +snprintf(p, slot_len, ":%x", PCI_SLOT(t->devfn));
> +}
> +
> +return path;
>  }
>  
> 

-- 
yamahata



[Qemu-devel] [PATCH 1/2] Fix Block Hotplug race with drive_del()

2010-11-08 Thread Ryan Harper
Block hot unplug is racy since the guest is required to acknowlege the ACPI
unplug event; this may not happen synchronously with the device removal command

This series aims to close a gap where by mgmt applications that assume the
block resource has been removed without confirming that the guest has
acknowledged the removal may re-assign the underlying device to a second guest
leading to data leakage.

This series introduces a new montor command to decouple asynchornous device
removal from restricting guest access to a block device.  We do this by creating
a new monitor command drive_del which maps to a bdrv_unplug() command which
does a qemu_aio_flush; bdrv_flush() and bdrv_close().  Once complete, subsequent
IO is rejected from the device and the guest will get IO errors but continue to
function.  In addition to preventing further IO, we clean up state pointers
between host (BlockDriverState) and guest (DeviceInfo).

A subsequent device removal command can be issued to remove the device, to which
the guest may or maynot respond, but as long as the unplugged bit is set, no IO
will be sumbitted.

Signed-off-by: Ryan Harper 
---
 block.c |7 +++
 block.h |1 +
 blockdev.c  |   36 
 blockdev.h  |1 +
 hmp-commands.hx |   18 ++
 5 files changed, 63 insertions(+), 0 deletions(-)

diff --git a/block.c b/block.c
index 6b505fb..c76a796 100644
--- a/block.c
+++ b/block.c
@@ -1328,6 +1328,13 @@ void bdrv_set_removable(BlockDriverState *bs, int 
removable)
 }
 }
 
+void bdrv_unplug(BlockDriverState *bs)
+{
+qemu_aio_flush();
+bdrv_flush(bs);
+bdrv_close(bs);
+}
+
 int bdrv_is_removable(BlockDriverState *bs)
 {
 return bs->removable;
diff --git a/block.h b/block.h
index 78ecfac..581414c 100644
--- a/block.h
+++ b/block.h
@@ -171,6 +171,7 @@ void bdrv_set_on_error(BlockDriverState *bs, 
BlockErrorAction on_read_error,
BlockErrorAction on_write_error);
 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read);
 void bdrv_set_removable(BlockDriverState *bs, int removable);
+void bdrv_unplug(BlockDriverState *bs);
 int bdrv_is_removable(BlockDriverState *bs);
 int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
diff --git a/blockdev.c b/blockdev.c
index 6cb179a..ee8c2ec 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -14,6 +14,8 @@
 #include "qemu-option.h"
 #include "qemu-config.h"
 #include "sysemu.h"
+#include "hw/qdev.h"
+#include "block_int.h"
 
 static QTAILQ_HEAD(drivelist, DriveInfo) drives = 
QTAILQ_HEAD_INITIALIZER(drives);
 
@@ -597,3 +599,37 @@ int do_change_block(Monitor *mon, const char *device,
 }
 return monitor_read_bdrv_key_start(mon, bs, NULL, NULL);
 }
+
+int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
+{
+const char *id = qdict_get_str(qdict, "id");
+BlockDriverState *bs;
+Property *prop;
+
+bs = bdrv_find(id);
+if (!bs) {
+qerror_report(QERR_DEVICE_NOT_FOUND, id);
+return -1;
+}
+
+/* quiesce block driver; prevent further io */
+bdrv_unplug(bs);
+
+/* clean up guest state from pointing to host resource by
+ * finding and removing DeviceState "drive" property */
+for (prop = bs->peer->info->props; prop && prop->name; prop++) {
+if ((prop->info->type == PROP_TYPE_DRIVE) && 
+(*(BlockDriverState **)qdev_get_prop_ptr(bs->peer, prop) == bs)) {
+if (prop->info->free) {
+prop->info->free(bs->peer, prop);
+}
+}
+}
+
+/* clean up host state pointing to guest resource by removing
+ * pointers to guest device in the BlockDriverState */
+bdrv_delete(bs);
+
+return 0;
+}
+ 
diff --git a/blockdev.h b/blockdev.h
index 653affc..2a0559e 100644
--- a/blockdev.h
+++ b/blockdev.h
@@ -51,5 +51,6 @@ int do_eject(Monitor *mon, const QDict *qdict, QObject 
**ret_data);
 int do_block_set_passwd(Monitor *mon, const QDict *qdict, QObject **ret_data);
 int do_change_block(Monitor *mon, const char *device,
 const char *filename, const char *fmt);
+int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data);
 
 #endif
diff --git a/hmp-commands.hx b/hmp-commands.hx
index e5585ba..d6dc18c 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -68,6 +68,24 @@ Eject a removable medium (use -f to force it).
 ETEXI
 
 {
+.name   = "drive_del",
+.args_type  = "id:s",
+.params = "device",
+.help   = "remove host block device",
+.user_print = monitor_user_noop,
+.mhandler.cmd_new = do_drive_del,
+},
+
+STEXI
+...@item delete @var{device}
+...@findex delete
+Remove host block device.  The result is that guest generated IO is no longer
+submitted against the host device underlying the disk.  Once a drive has
+been deleted, the QEMU Block layer returns -EIO which results in IO 
+errors in the gues

[Qemu-devel] [PATCH 0/2] v6 Decouple block device removal from device removal

2010-11-08 Thread Ryan Harper
After *much* discussion, here's version 6.

This patch series decouples the detachment of a block device from the removal
of the backing pci-device.  Removal of a hotplugged pci device requires the
guest to respond before qemu tears down the block device. In some cases, the
guest may not respond leaving the guest with continued access to the block
device.  

The new monitor command, drive_del, will revoke a guests access to the
block device independently of the removal of the pci device.

The first patch implements drive_del and bdrv_unplug, the second patch
implements the qmp version of the monitor command.

Changes since v5:
- Removed dangling pointers in guest and host state.  This ensures things like 
  info block no longer displays the deleted drive; though info pci will
  continue to display the pci device until the guest responds to the removal
  request.
- Renamed drive_unplug -> drive_del
Changes since v4:
- Droppped drive_get_by_id patch and use bdrv_find() instead
- Added additional details about drive_unplug to hmp/qmp interface

Changes since v3:
- Moved QMP command for drive_unplug() to separate patch

Changes since v2:
- Added QMP command for drive_unplug()

Changes since v1:
- CodingStyle fixes
- Added qemu_aio_flush() to bdrv_unplug()

Signed-off-by: Ryan Harper 



[Qemu-devel] [PATCH 2/2] Add qmp version of drive_del

2010-11-08 Thread Ryan Harper
Signed-off-by: Ryan Harper 
---
 qmp-commands.hx |   29 +
 1 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/qmp-commands.hx b/qmp-commands.hx
index 793cf1c..1e0d4e9 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -338,6 +338,35 @@ Example:
 EQMP
 
 {
+.name   = "drive_del",
+.args_type  = "id:s",
+.params = "device",
+.help   = "remove host block device",
+.user_print = monitor_user_noop,
+.mhandler.cmd_new = do_drive_del,
+},
+
+SQMP
+drive del
+--
+
+Remove host block device.  The result is that guest generated IO is no longer
+submitted against the host device underlying the disk.  Once a drive has
+been deleted, the QEMU Block layer returns -EIO which results in IO 
+errors in the guest for applications that are reading/writing to the device.
+
+Arguments:
+
+- "id": the device's ID (json-string)
+
+Example:
+
+-> { "execute": "drive_del", "arguments": { "id": "drive-virtio-blk1" } }
+<- { "return": {} }
+
+EQMP
+
+{
 .name   = "cpu",
 .args_type  = "index:i",
 .params = "index",
-- 
1.6.3.3




[Qemu-devel] [PATCH v2 2/2] rtl8139: add vlan tag extraction

2010-11-08 Thread Benjamin Poirier
Add support to the emulated hardware to remove vlan tags in packets
going from the network to the guest.

Signed-off-by: Benjamin Poirier 
Cc: Igor V. Kovalenko 

---
Changes since v1:
* moved the debug print statement inside the if block and reworded
  accordingly. (as suggested by Igor)

AFAIK, extraction is optional to get vlans working. The driver
requests rx detagging but should not assume that it was done. Under
Linux, the mac layer will catch the vlan ethertype. I only added this
part for completeness (to emulate the hardware more truthfully..?).

 hw/rtl8139.c |   40 +---
 1 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index b599945..7762dbb 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -1024,6 +1024,43 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, 
const uint8_t *buf, size_
 
 target_phys_addr_t rx_addr = rtl8139_addr64(rxbufLO, rxbufHI);
 
+if (s->CpCmd & CPlusRxVLAN && size >= ETHER_ADDR_LEN * 2 +
+VLAN_HDR_LEN && be16_to_cpup((uint16_t *) &buf[ETHER_ADDR_LEN *
+2]) == ETHERTYPE_VLAN)
+{
+size_t new_size = size - VLAN_HDR_LEN;
+
+rxdw1 &= ~CP_RX_VLAN_TAG_MASK;
+rxdw1 |= CP_RX_TAVA |
+le16_to_cpup((uint16_t *)&buf[ETHER_HDR_LEN]);
+
+if (buf == buf1 || new_size < MIN_BUF_SIZE)
+{
+/* move the end and pad */
+memmove((uint8_t *)buf + ETHER_ADDR_LEN * 2, buf +
+ETHER_ADDR_LEN * 2 + VLAN_HDR_LEN, new_size -
+ETHER_ADDR_LEN * 2);
+memset((uint8_t *)buf + new_size, 0, MIN_BUF_SIZE - new_size);
+size = MIN_BUF_SIZE;
+}
+else
+{
+/* move the beginning */
+memmove((uint8_t *)buf + VLAN_HDR_LEN, buf, ETHER_ADDR_LEN *
+2);
+buf += VLAN_HDR_LEN;
+size = new_size;
+}
+
+DEBUG_PRINT(("RTL8139: C+ Rx mode : extracted vlan tag with tci: "
+"%u\n", bswap16(rxdw1 & CP_RX_VLAN_TAG_MASK)));
+}
+else
+{
+/* reset VLAN tag flag */
+rxdw1 &= ~CP_RX_TAVA;
+}
+
 /* receive/copy to target memory */
 cpu_physical_memory_write( rx_addr, buf, size );
 
@@ -1082,9 +1119,6 @@ static ssize_t rtl8139_do_receive(VLANClientState *nc, 
const uint8_t *buf, size_
 rxdw0 &= ~CP_RX_BUFFER_SIZE_MASK;
 rxdw0 |= (size+4);
 
-/* reset VLAN tag flag */
-rxdw1 &= ~CP_RX_TAVA;
-
 /* update ring data */
 val = cpu_to_le32(rxdw0);
 cpu_physical_memory_write(cplus_rx_ring_desc,(uint8_t *)&val, 4);
-- 
1.7.2.3




[Qemu-devel] [PATCH v2 1/2] rtl8139: add vlan tag insertion

2010-11-08 Thread Benjamin Poirier
Add support to the emulated hardware to add vlan tags in packets going
from the guest to the network.

Signed-off-by: Benjamin Poirier 
Cc: Igor V. Kovalenko 
---
Changes since v1:
* moved the debug print statement inside the if block and reworded
  accordingly. (as suggested by Igor)

 hw/rtl8139.c |   45 ++---
 1 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index d92981d..b599945 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -47,6 +47,8 @@
  *  Darwin)
  */
 
+#include 
+
 #include "hw.h"
 #include "pci.h"
 #include "qemu-timer.h"
@@ -58,6 +60,10 @@
 
 #define PCI_FREQUENCY 3300L
 
+/* bytes in VLAN tag */
+#define VLAN_TCI_LEN 2
+#define VLAN_HDR_LEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
+
 /* debug RTL8139 card C+ mode only */
 //#define DEBUG_RTL8139CP 1
 
@@ -1913,7 +1919,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 
 cpu_physical_memory_read(cplus_tx_ring_desc,(uint8_t *)&val, 4);
 txdw0 = le32_to_cpu(val);
-/* TODO: implement VLAN tagging support, VLAN tag data is read to txdw1 */
 cpu_physical_memory_read(cplus_tx_ring_desc+4,  (uint8_t *)&val, 4);
 txdw1 = le32_to_cpu(val);
 cpu_physical_memory_read(cplus_tx_ring_desc+8,  (uint8_t *)&val, 4);
@@ -1925,9 +1930,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
descriptor,
txdw0, txdw1, txbufLO, txbufHI));
 
-/* TODO: the following discard cast should clean clang analyzer output */
-(void)txdw1;
-
 /* w0 ownership flag */
 #define CP_TX_OWN (1<<31)
 /* w0 end of ring flag */
@@ -1951,8 +1953,8 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 /* w0 bits 0...15 : buffer size */
 #define CP_TX_BUFFER_SIZE (1<<16)
 #define CP_TX_BUFFER_SIZE_MASK (CP_TX_BUFFER_SIZE - 1)
-/* w1 tag available flag */
-#define CP_RX_TAGC (1<<17)
+/* w1 add tag flag */
+#define CP_TX_TAGC (1<<17)
 /* w1 bits 0...15 : VLAN tag */
 #define CP_TX_VLAN_TAG_MASK ((1<<16) - 1)
 /* w2 low  32bit of Rx buffer ptr */
@@ -1978,12 +1980,21 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 
 DEBUG_PRINT(("RTL8139: +++ C+ Tx mode : transmitting from descriptor 
%d\n", descriptor));
 
+int vlan_extra_size = 0;
 if (txdw0 & CP_TX_FS)
 {
 DEBUG_PRINT(("RTL8139: +++ C+ Tx mode : descriptor %d is first segment 
descriptor\n", descriptor));
 
 /* reset internal buffer offset */
 s->cplus_txbuffer_offset = 0;
+
+if (txdw1 & CP_TX_TAGC)
+{
+vlan_extra_size = VLAN_HDR_LEN;
+
+DEBUG_PRINT(("RTL8139: +++ C+ Tx mode : inserting vlan tag with "
+"tci: %u\n", bswap16(txdw1 & CP_TX_VLAN_TAG_MASK)));
+}
 }
 
 int txsize = txdw0 & CP_TX_BUFFER_SIZE_MASK;
@@ -1992,14 +2003,15 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 /* make sure we have enough space to assemble the packet */
 if (!s->cplus_txbuffer)
 {
-s->cplus_txbuffer_len = CP_TX_BUFFER_SIZE;
+s->cplus_txbuffer_len = CP_TX_BUFFER_SIZE + VLAN_HDR_LEN;
 s->cplus_txbuffer = qemu_malloc(s->cplus_txbuffer_len);
 s->cplus_txbuffer_offset = 0;
 
 DEBUG_PRINT(("RTL8139: +++ C+ mode transmission buffer allocated space 
%d\n", s->cplus_txbuffer_len));
 }
 
-while (s->cplus_txbuffer && s->cplus_txbuffer_offset + txsize >= 
s->cplus_txbuffer_len)
+while (s->cplus_txbuffer && s->cplus_txbuffer_offset + txsize +
+vlan_extra_size >= s->cplus_txbuffer_len)
 {
 s->cplus_txbuffer_len += CP_TX_BUFFER_SIZE;
 s->cplus_txbuffer = qemu_realloc(s->cplus_txbuffer, 
s->cplus_txbuffer_len);
@@ -2025,6 +2037,20 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 DEBUG_PRINT(("RTL8139: +++ C+ mode transmit reading %d bytes from host 
memory at %016" PRIx64 " to offset %d\n",
  txsize, (uint64_t)tx_addr, s->cplus_txbuffer_offset));
 
+if (vlan_extra_size && txsize >= 2 * ETHER_ADDR_LEN)
+{
+/* copy addresses */
+cpu_physical_memory_read(tx_addr, s->cplus_txbuffer, 2 *
+ETHER_ADDR_LEN);
+tx_addr += 2 * ETHER_ADDR_LEN;
+txsize -= 2 * ETHER_ADDR_LEN;
+/* insert vlan tag */
+*(uint16_t *)(s->cplus_txbuffer + 2 * ETHER_ADDR_LEN) =
+cpu_to_be16(ETHERTYPE_VLAN);
+*(uint16_t *)(s->cplus_txbuffer + 2 * ETHER_ADDR_LEN + ETHER_TYPE_LEN)
+= cpu_to_le16(txdw1 & CP_TX_VLAN_TAG_MASK);
+s->cplus_txbuffer_offset += 2 * ETHER_ADDR_LEN + VLAN_HDR_LEN;
+}
 cpu_physical_memory_read(tx_addr, s->cplus_txbuffer + 
s->cplus_txbuffer_offset, txsize);
 s->cplus_txbuffer_offset += txsize;
 
@@ -2053,9 +2079,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 /* update ring data */
 val = cpu_to_le32(txdw0);
 cpu_physical_memory_write(cplus_tx_ring_desc,(uint8_t *)&val, 4);
-/* TODO: implement VLA

Re: [Qemu-devel] [PATCH 1/2] rtl8139: add vlan tag insertion

2010-11-08 Thread Benjamin Poirier
On 08/11/10 11:34 AM, Stefan Hajnoczi wrote:
> On Sun, Nov 7, 2010 at 9:25 PM, Benjamin Poirier
>  wrote:
>> Add support to the emulated hardware to add vlan tags in packets going
>> from the guest to the network.
>>
>> Signed-off-by: Benjamin Poirier 
>> Cc: Igor V. Kovalenko 
>> ---
>>  hw/rtl8139.c |   46 +++---
>>  1 files changed, 35 insertions(+), 11 deletions(-)
>>
>> diff --git a/hw/rtl8139.c b/hw/rtl8139.c
>> index d92981d..ac294da 100644
>> --- a/hw/rtl8139.c
>> +++ b/hw/rtl8139.c
>> @@ -47,6 +47,8 @@
>>  *  Darwin)
>>  */
>>
>> +#include 
>> +
>>  #include "hw.h"
>>  #include "pci.h"
>>  #include "qemu-timer.h"
>> @@ -58,6 +60,10 @@
>>
>>  #define PCI_FREQUENCY 3300L
>>
>> +/* bytes in VLAN tag */
>> +#define VLAN_TCI_LEN 2
>> +#define VLAN_HDR_LEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
>> +
>>  /* debug RTL8139 card C+ mode only */
>>  //#define DEBUG_RTL8139CP 1
>>
>> @@ -1913,7 +1919,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
>>
>> cpu_physical_memory_read(cplus_tx_ring_desc,(uint8_t *)&val, 4);
>> txdw0 = le32_to_cpu(val);
>> -/* TODO: implement VLAN tagging support, VLAN tag data is read to txdw1 
>> */
>> cpu_physical_memory_read(cplus_tx_ring_desc+4,  (uint8_t *)&val, 4);
>> txdw1 = le32_to_cpu(val);
>> cpu_physical_memory_read(cplus_tx_ring_desc+8,  (uint8_t *)&val, 4);
>> @@ -1925,9 +1930,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
>>descriptor,
>>txdw0, txdw1, txbufLO, txbufHI));
>>
>> -/* TODO: the following discard cast should clean clang analyzer output 
>> */
>> -(void)txdw1;
>> -
>>  /* w0 ownership flag */
>>  #define CP_TX_OWN (1<<31)
>>  /* w0 end of ring flag */
>> @@ -1951,8 +1953,8 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
>>  /* w0 bits 0...15 : buffer size */
>>  #define CP_TX_BUFFER_SIZE (1<<16)
>>  #define CP_TX_BUFFER_SIZE_MASK (CP_TX_BUFFER_SIZE - 1)
>> -/* w1 tag available flag */
>> -#define CP_RX_TAGC (1<<17)
>> +/* w1 add tag flag */
>> +#define CP_TX_TAGC (1<<17)
>>  /* w1 bits 0...15 : VLAN tag */
>>  #define CP_TX_VLAN_TAG_MASK ((1<<16) - 1)
>>  /* w2 low  32bit of Rx buffer ptr */
>> @@ -1978,12 +1980,22 @@ static int rtl8139_cplus_transmit_one(RTL8139State 
>> *s)
>>
>> DEBUG_PRINT(("RTL8139: +++ C+ Tx mode : transmitting from descriptor 
>> %d\n", descriptor));
>>
>> +int vlan_extra_size = 0;
>> if (txdw0 & CP_TX_FS)
>> {
>> DEBUG_PRINT(("RTL8139: +++ C+ Tx mode : descriptor %d is first 
>> segment descriptor\n", descriptor));
>>
>> +DEBUG_PRINT(("RTL8139: +++ C+ Tx mode : add vlan tag: %u tci: %u\n",
>> +!!(txdw1 & CP_TX_TAGC), bswap16(txdw1 &
>> +CP_TX_VLAN_TAG_MASK)));
> 
> Please use leXX_to_cpu() and cpu_to_leXX() instead of bswapXX() for
> little-endian conversion.  This is more explicit than unconditional
> byteswapping and work on both big- and little-endian QEMU host
> architectures.

I believe the use of flat out swapping is warranted here. The data has
been adjusted for host-endianness when the register was read:
txdw1 = le32_to_cpu(val);
We have to mask and swap to accomodate the format used by the card's
registers, which is always the same regardless of machine endianness.

txdw1 is extracted from a 32 bit register like such (when the descriptor
specifies tx tagging), in increasing address order:
tci8-15 tci0-7 0x01 0x00

-Ben

> 
> Stefan
> 




Re: [Qemu-devel] [Bug 671831] [NEW] Sparc guest assert error

2010-11-08 Thread Nigel Horne
Stefan,
> You can grab the code like this:
> git clone -b scsi_assert git://repo.or.cz/qemu/stefanha.git
>
> If the assert triggers in that world then data corruption was
> previously possible but hidden (a SCSI request has a one data buffer
> and concurrent reads are being issued to the same buffer!).  This
> would mean an existing bug that needs to be fixed has been exposed
>

> If the assert doesn't trigger, then the issue was introduced recently
> and we can dig more into that.
>

I built your tree - it does not trigger the assertion.  Linux/sparc/2.4 
boots and seems to run fine.

Thanks for your help.

-Nigel

-- 
Sparc guest assert error
https://bugs.launchpad.net/bugs/671831
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
The latest version in git (d33ea50a958b2e050d2b28e5f17e3b55e91c6d74) crashes 
with an assert error when booting a Sparc/Linux guest.

The last time I tried it (about a week ago) it worked fine.  Yesterdai, I did a 
git pull, make clean, reran configure and compiled.

Host OS: Debian Linux/x86_64 5.0
C Compiler: 4.4.5
Guest OS: Linux/Sparc (2.4)
Command Line: qemu-system-sparc -hda ~njh/qemu/sparc/debian.img -nographic -m 
128
Build Configure: ./configure --enable-linux-aio --enable-io-thread --enable-kvm
GIT commit: d33ea50a958b2e050d2b28e5f17e3b55e91c6d74

Output:

Adding Swap: 122532k swap-space (priority -1)
.
Will now check root file system:fsck 1.40-WIP (14-Nov-2006)
[/sbin/fsck.ext3 (1) -- /] fsck.ext3 -a -C0 /dev/sda2 
qemu-system-sparc: /home/njh/src/qemu/hw/scsi-disk.c:201: scsi_read_data: 
Assertion `r->req.aiocb == ((void *)0)' failed.


It crashes in the same place every time.

(gdb) thread apply all bt:

Thread 3 (Thread 17643):
#0  0x7f4db21bc8d3 in select () at ../sysdeps/unix/syscall-template.S:82
#1  0x004d02c4 in main_loop_wait (nonblocking=)
at /home/njh/src/qemu/vl.c:1246
#2  0x004d0e57 in main_loop (argc=, 
argv=, envp=)
at /home/njh/src/qemu/vl.c:1309
#3  main (argc=, argv=, 
envp=) at /home/njh/src/qemu/vl.c:2999

Thread 2 (Thread 17645):
#0  pthread_cond_timedwait@@GLIBC_2.3.2 ()
at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:211
#1  0x0042450b in cond_timedwait (unused=)
at posix-aio-compat.c:104
#2  aio_thread (unused=) at posix-aio-compat.c:325
#3  0x7f4db3b818ba in start_thread (arg=)
at pthread_create.c:300
#4  0x7f4db21c302d in clone ()
at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#5  0x in ?? ()
Current language:  auto
The current source language is "auto; currently asm".

Thread 1 (Thread 17644):
#0  0x7f4db2126165 in *__GI_raise (sig=)
at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
#1  0x7f4db2128f70 in *__GI_abort () at abort.c:92
#2  0x7f4db211f2b1 in *__GI___assert_fail (
assertion=0x52690a "r->req.aiocb == ((void *)0)", 
file=, line=201, function=0x527480 "scsi_read_data")
at assert.c:81
#3  0x0044f363 in scsi_read_data (d=, tag=0)
at /home/njh/src/qemu/hw/scsi-disk.c:201
#4  0x004ebd6c in esp_do_dma (s=0x20679d0)
at /home/njh/src/qemu/hw/esp.c:377
#5  0x004ec781 in handle_ti (opaque=0x20679d0, 
addr=, val=)
at /home/njh/src/qemu/hw/esp.c:443
#6  esp_mem_writeb (opaque=0x20679d0, addr=, 
val=) at /home/njh/src/qemu/hw/esp.c:595
#7  0x41b2d971 in ?? ()
#8  0x in ?? ()
#9  0x031ad000 in ?? ()
#10 0x000301adfa20 in ?? ()
#11 0x1007 in ?? ()
#12 0x7f4daf80e8a0 in ?? ()
#13 0x0001 in ?? ()
#14 0x in ?? ()





[Qemu-devel] Re: [SeaBIOS] [PATCH] fix virtio-blk failure after reboot

2010-11-08 Thread Kevin O'Connor
On Wed, Sep 15, 2010 at 06:31:44PM +0200, Gleb Natapov wrote:
> vring_virtqueue should be zeroed otherwise old values will be reused
> after reboot.
> 
> Signed-off-by: Gleb Natapov 
> diff --git a/src/virtio-blk.c b/src/virtio-blk.c
> index 34d7863..7a25826 100644
> --- a/src/virtio-blk.c
> +++ b/src/virtio-blk.c
> @@ -109,6 +109,7 @@ init_virtio_blk(u16 bdf)
>  goto fail;
>  }
>  memset(vdrive_g, 0, sizeof(*vdrive_g));
> +memset(vq, 0, sizeof(*vq));
>  vdrive_g->drive.type = DTYPE_VIRTIO;
>  vdrive_g->drive.cntl_id = bdf;
>  vdrive_g->vq = vq;

This didn't make it into SeaBIOS v0.6.1.  Should we add this to the
stable branch as v0.6.1.2?  Any other bugfixes that need to go in to
the stable branch (maybe Isaku's pci overflow patches)?

-Kevin



[Qemu-devel] Single 64bit memory transaction instead of two 32bit memory transaction.

2010-11-08 Thread Adnan Khaleel
In the file exec.c:

The memory Write/Read functions are declared as an array of 4 entries where the 
index values of 0,1,2 correspond to 8,16 and 32bit write and read functions 
respectively.

CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];

Is there any reason why we can't extend this to include 64bit writes and read 
by increasing the array size? This is because 64bit reads are currently handled 
as two separate 32bit reads for eg: sommu_template.h

static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
  target_ulong addr,
  void *retaddr)
{
:
res = io_mem_read[index][2](io_mem_opaque[index], physaddr);
res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) 
<< 32;
:
return res;
}

I'm sure this is happening in other places as well. Is there a reason for this 
or could we arbitrarily increase this (within limits ofcourse)?

Thanks

AK

[Qemu-devel] running Uboot under QEMU

2010-11-08 Thread William Estrada

Hi group,

  I'm trying to build and run Uboot on my Fedora 13 laptop. I'm lost.  
Any 'How To'?


--
William Estrada
Mt Umunhum, CA, USA
HTTP://64.124.13.3 ( Mt-Umunhum-Wireless.net )
Skype: MrUmunhum



[Qemu-devel] Re: [PATCH] intel-hda: add msi support

2010-11-08 Thread malc
On Mon, 8 Nov 2010, Gerd Hoffmann wrote:

> This patch adds MSI support to the intel hda audio driver.  It is
> enabled by default, use '-device intel-hda,msi=0' to disable it.
> 
> Signed-off-by: Gerd Hoffmann 
> ---
>  hw/intel-hda.c |   30 --
>  1 files changed, 28 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/intel-hda.c b/hw/intel-hda.c
> index e478e67..8990ebb 100644
> --- a/hw/intel-hda.c
> +++ b/hw/intel-hda.c
> @@ -19,6 +19,7 @@
>  
>  #include "hw.h"
>  #include "pci.h"
> +#include "msi.h"
>  #include "qemu-timer.h"
>  #include "audiodev.h"
>  #include "intel-hda.h"
> @@ -188,6 +189,7 @@ struct IntelHDAState {
>  
>  /* properties */
>  uint32_t debug;
> +uint32_t msi;
>  };
>  
>  struct IntelHDAReg {
> @@ -268,6 +270,7 @@ static void intel_hda_update_int_sts(IntelHDAState *d)
>  
>  static void intel_hda_update_irq(IntelHDAState *d)
>  {
> +int msi = d->msi && msi_enabled(&d->pci);
>  int level;
>  
>  intel_hda_update_int_sts(d);
> @@ -276,8 +279,15 @@ static void intel_hda_update_irq(IntelHDAState *d)
>  } else {
>  level = 0;
>  }
> -dprint(d, 2, "%s: level %d\n", __FUNCTION__, level);
> -qemu_set_irq(d->pci.irq[0], level);
> +dprint(d, 2, "%s: level %d [%s]\n", __FUNCTION__,
> +   level, msi ? "msi" : "intx");
> +if (msi) {
> +if (level) {
> +msi_notify(&d->pci, 0);
> +}
> +} else {
> +qemu_set_irq(d->pci.irq[0], level);
> +}
>  }
>  
>  static int intel_hda_send_command(IntelHDAState *d, uint32_t verb)
> @@ -1148,6 +1158,8 @@ static int intel_hda_init(PCIDevice *pci)
>intel_hda_mmio_write, d);
>  pci_register_bar(&d->pci, 0, 0x4000, PCI_BASE_ADDRESS_SPACE_MEMORY,
>   intel_hda_map);
> +if (d->msi)
> +msi_init(&d->pci, 0x50, 1, true, false);

Braces

>  
>  hda_codec_bus_init(&d->pci.qdev, &d->codecs,
> intel_hda_response, intel_hda_xfer);
> @@ -1159,10 +1171,22 @@ static int intel_hda_exit(PCIDevice *pci)
>  {
>  IntelHDAState *d = DO_UPCAST(IntelHDAState, pci, pci);
>  
> +if (d->msi)
> +msi_uninit(&d->pci);

Likewise

>  cpu_unregister_io_memory(d->mmio_addr);
>  return 0;
>  }
>  
> +static void intel_hda_write_config(PCIDevice *pci, uint32_t addr,
> +   uint32_t val, int len)
> +{
> +IntelHDAState *d = DO_UPCAST(IntelHDAState, pci, pci);
> +
> +pci_default_write_config(pci, addr, val, len);
> +if (d->msi)
> +msi_write_config(pci, addr, val, len);
> +}
> +
>  static int intel_hda_post_load(void *opaque, int version)
>  {
>  IntelHDAState* d = opaque;
> @@ -1246,8 +1270,10 @@ static PCIDeviceInfo intel_hda_info = {
>  .qdev.reset   = intel_hda_reset,
>  .init = intel_hda_init,
>  .exit = intel_hda_exit,
> +.config_write = intel_hda_write_config,
>  .qdev.props   = (Property[]) {
>  DEFINE_PROP_UINT32("debug", IntelHDAState, debug, 0),
> +DEFINE_PROP_UINT32("msi", IntelHDAState, msi, 1),
>  DEFINE_PROP_END_OF_LIST(),
>  }
>  };
> 

-- 
mailto:av1...@comtv.ru



[Qemu-devel] [PATCH 2/2] trace: enable all events by default

2010-11-08 Thread Lluís
Enable all trace events by default, assuming their frequency is relatively low,
so there will be no measurable performace impact.

Signed-off-by: Lluís Vilanova 
---
 trace-events |  251 +-
 1 files changed, 124 insertions(+), 127 deletions(-)

diff --git a/trace-events b/trace-events
index 947f8b0..c0eb28f 100644
--- a/trace-events
+++ b/trace-events
@@ -17,9 +17,6 @@
 # Example: qemu_malloc(size_t size) "size %zu"
 #
 # The "disable" keyword will build without the trace event.
-# In case of 'simple' trace backend, it will allow the trace event to be
-# compiled, but this would be turned off by default. It can be toggled on via
-# the monitor.
 #
 # The  must be a valid as a C function name.
 #
@@ -29,163 +26,163 @@
 # The  should be a sprintf()-compatible format string.
 
 # qemu-malloc.c
-disable qemu_malloc(size_t size, void *ptr) "size %zu ptr %p"
-disable qemu_realloc(void *ptr, size_t size, void *newptr) "ptr %p size %zu 
newptr %p"
-disable qemu_free(void *ptr) "ptr %p"
+qemu_malloc(size_t size, void *ptr) "size %zu ptr %p"
+qemu_realloc(void *ptr, size_t size, void *newptr) "ptr %p size %zu newptr %p"
+qemu_free(void *ptr) "ptr %p"
 
 # osdep.c
-disable qemu_memalign(size_t alignment, size_t size, void *ptr) "alignment %zu 
size %zu ptr %p"
-disable qemu_vmalloc(size_t size, void *ptr) "size %zu ptr %p"
-disable qemu_vfree(void *ptr) "ptr %p"
+qemu_memalign(size_t alignment, size_t size, void *ptr) "alignment %zu size 
%zu ptr %p"
+qemu_vmalloc(size_t size, void *ptr) "size %zu ptr %p"
+qemu_vfree(void *ptr) "ptr %p"
 
 # hw/virtio.c
-disable virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned 
int idx) "vq %p elem %p len %u idx %u"
-disable virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
-disable virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int 
out_num) "vq %p elem %p in_num %u out_num %u"
-disable virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p"
-disable virtio_irq(void *vq) "vq %p"
-disable virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
+virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) 
"vq %p elem %p len %u idx %u"
+virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
+virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) 
"vq %p elem %p in_num %u out_num %u"
+virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p"
+virtio_irq(void *vq) "vq %p"
+virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
 
 # block.c
-disable multiwrite_cb(void *mcb, int ret) "mcb %p ret %d"
-disable bdrv_aio_multiwrite(void *mcb, int num_callbacks, int num_reqs) "mcb 
%p num_callbacks %d num_reqs %d"
-disable bdrv_aio_multiwrite_earlyfail(void *mcb) "mcb %p"
-disable bdrv_aio_multiwrite_latefail(void *mcb, int i) "mcb %p i %d"
-disable bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void 
*opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
-disable bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void 
*opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
+multiwrite_cb(void *mcb, int ret) "mcb %p ret %d"
+bdrv_aio_multiwrite(void *mcb, int num_callbacks, int num_reqs) "mcb %p 
num_callbacks %d num_reqs %d"
+bdrv_aio_multiwrite_earlyfail(void *mcb) "mcb %p"
+bdrv_aio_multiwrite_latefail(void *mcb, int i) "mcb %p i %d"
+bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs 
%p sector_num %"PRId64" nb_sectors %d opaque %p"
+bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) 
"bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 
 # hw/virtio-blk.c
-disable virtio_blk_req_complete(void *req, int status) "req %p status %d"
-disable virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
-disable virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) 
"req %p sector %"PRIu64" nsectors %zu"
+virtio_blk_req_complete(void *req, int status) "req %p status %d"
+virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
+virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p 
sector %"PRIu64" nsectors %zu"
 
 # posix-aio-compat.c
-disable paio_submit(void *acb, void *opaque, int64_t sector_num, int 
nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type 
%d"
+paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int 
type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
 
 # ioport.c
-disable cpu_in(unsigned int addr, unsigned int val) "addr %#x value %u"
-disable cpu_out(unsigned int addr, unsigned int val) "addr %#x value %u"
+cpu_in(unsigned int addr, unsigned int val) "addr %#x value %u"
+cpu_out(unsigned int addr, unsigned int val) "addr %#x value %u"
 
 # balloon.c
 # Since requests are raised via monitor, not many tracepoints are needed.
-disable balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu"
+balloo

[Qemu-devel] [PATCH 1/2] trace: always use the "nop" backend on events with the "disable" keyword

2010-11-08 Thread Lluís
Any event with the keyword/property "disable" generates an empty trace event
using the "nop" backend, regardless of the current backend.

Generalize the "property" concept in the trace-events file, so tracetool now
has:

* get_name: Get only the event name
* get_property: Return if a propery event is set (a keyword before the event
  name)

Signed-off-by: Lluís Vilanova 
---
 tracetool |   66 +
 1 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/tracetool b/tracetool
index 7010858..9b6f801 100755
--- a/tracetool
+++ b/tracetool
@@ -31,7 +31,31 @@ EOF
 # Get the name of a trace event
 get_name()
 {
-echo ${1%%\(*}
+local name
+name=${1%%\(*}
+echo ${name##* }
+}
+
+# Get the given property of a trace event
+# 1: trace-events line
+# 2: property name
+# -> "1" if property is present; "0" otherwise
+get_property()
+{
+local props prop
+props=${1%%\(*}
+props=${props% *}
+if [ -z "$props" ]; then
+echo "0"
+return
+fi
+for prop in $props; do
+if [ "$prop" = "$2" ]; then
+echo "1"
+return
+fi
+done
+echo "0"
 }
 
 # Get the argument list of a trace event, including types and names
@@ -88,20 +112,6 @@ get_fmt()
 echo "$fmt"
 }
 
-# Get the state of a trace event
-get_state()
-{
-local str disable state
-str=$(get_name "$1")
-disable=${str##disable }
-if [ "$disable" = "$str" ] ; then
-state=1
-else
-state=0
-fi
-echo "$state"
-}
-
 linetoh_begin_nop()
 {
 return
@@ -161,14 +171,10 @@ cast_args_to_uint64_t()
 
 linetoh_simple()
 {
-local name args argc trace_args state
+local name args argc trace_args
 name=$(get_name "$1")
 args=$(get_args "$1")
 argc=$(get_argc "$1")
-state=$(get_state "$1")
-if [ "$state" = "0" ]; then
-name=${name##disable }
-fi
 
 trace_args="$simple_event_num"
 if [ "$argc" -gt 0 ]
@@ -207,14 +213,10 @@ EOF
 
 linetoc_simple()
 {
-local name state
+local name
 name=$(get_name "$1")
-state=$(get_state "$1")
-if [ "$state" = "0" ] ; then
-name=${name##disable }
-fi
 cat <

Re: [Qemu-devel] smp: linux guest only sees 1 CPU when not using KVM

2010-11-08 Thread Lluís
Lennart Sorensen writes:

> On Mon, Nov 08, 2010 at 10:57:19PM +0100, Lluís wrote:
>> Well, thre's nothing more to add than the contents in the subject.
>> 
>> I tried booting up with -smp 2, and /proc/cpuinfo on the linux guest
>> only return one CPU, while booting with "-smp 2 -enable-kvm" shows 2
>> CPUs.

> I was under the impression qemu didn't emulate SMP.  KVM on the other
> hand does support running on real SMP hardware.  It doesn't have to
> emulate SMP after all.

Well, I thought it did when I saw the "-smp" option, and it does in fact
create multiple CPUState objects, as if the "extra" cpus where
unplugged.

In fact, cpus.c:cpu_exec_all does indeed loop through all the CPUState
objects, calling qemu_cpu_exec on each.

Lluis

-- 
 "And it's much the same thing with knowledge, for whenever you learn
 something new, the whole world becomes that much richer."
 -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
 Tollbooth



[Qemu-devel] [PATCH] intel-hda: add msi support

2010-11-08 Thread Gerd Hoffmann
This patch adds MSI support to the intel hda audio driver.  It is
enabled by default, use '-device intel-hda,msi=0' to disable it.

Signed-off-by: Gerd Hoffmann 
---
 hw/intel-hda.c |   30 --
 1 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index e478e67..8990ebb 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -19,6 +19,7 @@
 
 #include "hw.h"
 #include "pci.h"
+#include "msi.h"
 #include "qemu-timer.h"
 #include "audiodev.h"
 #include "intel-hda.h"
@@ -188,6 +189,7 @@ struct IntelHDAState {
 
 /* properties */
 uint32_t debug;
+uint32_t msi;
 };
 
 struct IntelHDAReg {
@@ -268,6 +270,7 @@ static void intel_hda_update_int_sts(IntelHDAState *d)
 
 static void intel_hda_update_irq(IntelHDAState *d)
 {
+int msi = d->msi && msi_enabled(&d->pci);
 int level;
 
 intel_hda_update_int_sts(d);
@@ -276,8 +279,15 @@ static void intel_hda_update_irq(IntelHDAState *d)
 } else {
 level = 0;
 }
-dprint(d, 2, "%s: level %d\n", __FUNCTION__, level);
-qemu_set_irq(d->pci.irq[0], level);
+dprint(d, 2, "%s: level %d [%s]\n", __FUNCTION__,
+   level, msi ? "msi" : "intx");
+if (msi) {
+if (level) {
+msi_notify(&d->pci, 0);
+}
+} else {
+qemu_set_irq(d->pci.irq[0], level);
+}
 }
 
 static int intel_hda_send_command(IntelHDAState *d, uint32_t verb)
@@ -1148,6 +1158,8 @@ static int intel_hda_init(PCIDevice *pci)
   intel_hda_mmio_write, d);
 pci_register_bar(&d->pci, 0, 0x4000, PCI_BASE_ADDRESS_SPACE_MEMORY,
  intel_hda_map);
+if (d->msi)
+msi_init(&d->pci, 0x50, 1, true, false);
 
 hda_codec_bus_init(&d->pci.qdev, &d->codecs,
intel_hda_response, intel_hda_xfer);
@@ -1159,10 +1171,22 @@ static int intel_hda_exit(PCIDevice *pci)
 {
 IntelHDAState *d = DO_UPCAST(IntelHDAState, pci, pci);
 
+if (d->msi)
+msi_uninit(&d->pci);
 cpu_unregister_io_memory(d->mmio_addr);
 return 0;
 }
 
+static void intel_hda_write_config(PCIDevice *pci, uint32_t addr,
+   uint32_t val, int len)
+{
+IntelHDAState *d = DO_UPCAST(IntelHDAState, pci, pci);
+
+pci_default_write_config(pci, addr, val, len);
+if (d->msi)
+msi_write_config(pci, addr, val, len);
+}
+
 static int intel_hda_post_load(void *opaque, int version)
 {
 IntelHDAState* d = opaque;
@@ -1246,8 +1270,10 @@ static PCIDeviceInfo intel_hda_info = {
 .qdev.reset   = intel_hda_reset,
 .init = intel_hda_init,
 .exit = intel_hda_exit,
+.config_write = intel_hda_write_config,
 .qdev.props   = (Property[]) {
 DEFINE_PROP_UINT32("debug", IntelHDAState, debug, 0),
+DEFINE_PROP_UINT32("msi", IntelHDAState, msi, 1),
 DEFINE_PROP_END_OF_LIST(),
 }
 };
-- 
1.7.1




Re: [Qemu-devel] smp: linux guest only sees 1 CPU when not using KVM

2010-11-08 Thread Lennart Sorensen
On Mon, Nov 08, 2010 at 10:57:19PM +0100, Lluís wrote:
> Well, thre's nothing more to add than the contents in the subject.
> 
> I tried booting up with -smp 2, and /proc/cpuinfo on the linux guest
> only return one CPU, while booting with "-smp 2 -enable-kvm" shows 2
> CPUs.

I was under the impression qemu didn't emulate SMP.  KVM on the other
hand does support running on real SMP hardware.  It doesn't have to
emulate SMP after all.

-- 
Len Sorensen



Re: [Qemu-devel] Access to specific isa card with Qemu???

2010-11-08 Thread François Revol
Hi,

> We have a software that runs on MS-DOS and must communicate with a specific 
> card installed on port isa.
> We  want to use this software in Qemu with a machine that runs XP.
> Is it possible to access to the ISA port with Qemu in this case?
> 
> Do we have to do a specific development?
> Can you help us in this development, how much would it cost?

I believe some people already implemented PCI pass-through, or ported it from 
other emulators (Bochs had one IIRC), you'd have to check the mailing list :
http://search.gmane.org/?query=pci+pass+through&group=gmane.comp.emulators.qemu

Essentially you'd have to write the same kind of thing but for ISA, except 
you'll have to specify the resources manually since the card isn't published as 
a specific entity on ISA busses (PnP ones excepted).

Passing through I/O ports should be easy, but IRQ forwarding or even DMA if you 
need it might require some more work.

François.


[Qemu-devel] smp: linux guest only sees 1 CPU when not using KVM

2010-11-08 Thread Lluís
Well, thre's nothing more to add than the contents in the subject.

I tried booting up with -smp 2, and /proc/cpuinfo on the linux guest
only return one CPU, while booting with "-smp 2 -enable-kvm" shows 2
CPUs.

Lluis

-- 
 "And it's much the same thing with knowledge, for whenever you learn
 something new, the whole world becomes that much richer."
 -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
 Tollbooth



[Qemu-devel] Re: [PATCHv2 7/8] Change pci bus get_dev_path callback to print only slot and func

2010-11-08 Thread Michael S. Tsirkin
On Mon, Nov 08, 2010 at 07:29:50PM +0200, Gleb Natapov wrote:
> > This will make it compatible with existing code and generally
> > what users expect.
> > 
> It will drop domain and bus number part anyway.

Yes but the proposal was to add :00: in front to
make it have the expected format for most users.

-- 
MST



Re: [Qemu-devel] [PATCH 0/3] v4 Decouple block device removal from device removal

2010-11-08 Thread Michael S. Tsirkin
On Fri, Nov 05, 2010 at 05:01:49PM +0100, Markus Armbruster wrote:
> "Michael S. Tsirkin"  writes:
> 
> > On Fri, Nov 05, 2010 at 02:27:49PM +0100, Markus Armbruster wrote:
> >> I'd be fine with any of these:
> >> 
> >> 1. A new command "device_disconnet ID" (or similar name) to disconnect
> >>device ID from any host parts.  Nice touch: you don't have to know
> >>about the device's host part(s) to disconnect it.  But it might be
> >>more work than the other two.
> >> 
> >> 2. New commands netdev_disconnect, drive_disconnect (or similar names)
> >>to disconnect a host part from a guest device.  Like (1), except you
> >>have to point to the other end of the connection to cut it.
> >
> > I think it's cleaner not to introduce a concept of a disconnected
> > backend.
> 
> Backends start disconnected, so the concept already exists.
> 
> > One thing that we must be careful to explicitly disallow, is
> > reconnecting guest to another host backend. The reason being
> > that guest might rely on backend features and changing these
> > would break this.
> >
> > Given that, disconnecting without delete isn't helpful.
> 
> What about disconnect, hot plug new device, connect?

Exactly. I don't think we want to support this.
New device might not support all features that old one has.
Or it may have more features.

> >> 3. A new command "drive_del ID" similar to existing netdev_del.  This is
> >>(2) fused with delete.  Conceptual wart: you can't disconnect and
> >>keep the host part around.  Moreover, delete is slightly dangerous,
> >>because it renders any guest device still using the host part
> >>useless.
> >
> > I don't see how it's more dangerous than disconnecting.
> > If guest can't access the backend it might not exist
> > as far as guest is concerned.
> 
> If we keep disconnect and delete separate operations, we can make delete
> fail when still connected.  Typo insurance.
> 
> >> Do you need anything else from me to make progress?
> >
> > Let's go for 3. Need for 1/2 seems dubious, and it's much harder
> > to support.



[Qemu-devel] Re: [PATCHv2 7/8] Change pci bus get_dev_path callback to print only slot and func

2010-11-08 Thread Gleb Natapov
On Mon, Nov 08, 2010 at 11:00:17PM +0200, Michael S. Tsirkin wrote:
> On Mon, Nov 08, 2010 at 07:29:50PM +0200, Gleb Natapov wrote:
> > > This will make it compatible with existing code and generally
> > > what users expect.
> > > 
> > It will drop domain and bus number part anyway.
> 
> Yes but the proposal was to add :00: in front to
> make it have the expected format for most users.
> 
I am not interesting in hacks. I do not know who are those most users 
you are talking about and why they expect :00: in front of
something. Anyway, as I said, I moved to use another callback so you can
do whatever you want with get_dev_path.

--
Gleb.



[Qemu-devel] Re: [PATCH 0/6] Save state error handling (kill off no_migrate)

2010-11-08 Thread Michael S. Tsirkin
On Mon, Nov 08, 2010 at 10:20:46AM -0700, Alex Williamson wrote:
> On Mon, 2010-11-08 at 18:54 +0200, Michael S. Tsirkin wrote:
> > On Mon, Nov 08, 2010 at 07:59:57AM -0700, Alex Williamson wrote:
> > > On Mon, 2010-11-08 at 13:40 +0200, Michael S. Tsirkin wrote:
> > > > On Wed, Oct 06, 2010 at 02:58:57PM -0600, Alex Williamson wrote:
> > > > > Our code paths for saving or migrating a VM are full of functions that
> > > > > return void, leaving no opportunity for a device to cancel a 
> > > > > migration,
> > > > > either from error or incompatibility.  The ivshmem driver attempted to
> > > > > solve this with a no_migrate flag on the save state entry.  I think 
> > > > > the
> > > > > more generic and flexible way to solve this is to allow driver save
> > > > > functions to fail.  This series implements that and converts ivshmem
> > > > > to uses a set_params function to NAK migration much earlier in the
> > > > > processes.  This touches a lot of files, but bulk of those changes are
> > > > > simply s/void/int/ and tacking a "return 0" to the end of functions.
> > > > > Thanks,
> > > > > 
> > > > > Alex
> > > > 
> > > > Well error handling is always tricky: it seems easier to
> > > > require save handlers to never fail.
> > > 
> > > Sure it's easier, but does that make it robust?
> > 
> > More robust in the face of wwhat kind of failure?
> 
> I really don't understand why we're having a discussion about whether
> providing a means to return an error is a good thing or not.  These
> patches touch a lot of files, but the change is dead simple.

I just don't see the motivation. Presumably your patches are
there to achieve some kind of goal, right? I am trying to
figure out what that goal is.

Currently savevm callbacks never fail. So they
return void. Why is returing 0 and adding a bunch of code to test the
condition that never happens a good idea?  It just seems to create more
ways for devices to shoot themselves in the foot.

> > > > So there's a bunch of code here but what exactly is the benefit?
> > > > Since save handlers have no idea what does the remote do,
> > > > what is the compatibility you mention?
> > > 
> > > There are two users I currently have in mind.  ivshmem currently makes
> > > use of the register_device_unmigratable() because it makes use of host
> > > specific resources and connections (aiui).  This sets the no_migrate
> > > flag, which is not dynamic and a bit of a band-aide.
> > >  The other is
> > > device assignment, which needs a way to NAK a migration since physical
> > > devices are never migratable.
> > 
> > Well since all these can't be migrated ever, a fixed property actually seems
> > a good match.  Sure it's not dynamic but all the easier to debug.
> > 
> > >  I imagine we could at some point have
> > > devices with state tied to other features that can't always be detached
> > > from the host, this tries to provide the infrastructure for that to
> > > happen.
> > > 
> > > Alex
> > 
> > Let guest control whether you can migrate?
> > Sounds like something that is more likely to be abused
> > than used constructively. 
> 
> s/guest/device/  So you would rather the migration failed on the
> incoming side where it may not be detected

And incoming migration handlers *must* validate the input, anyway.
We should not plaster over this with checks on outgoing side.

> or it may be detected too
> late to stop the migration?
> 
> Alex

So there's a bug and device is in an unexpected state.
What can we do? Assert, print an error, notify guest - all these
come to mind. But stop migration? Seems arbitrary.

-- 
MST



Re: [Qemu-devel] [PATCH 2/2] Add support for generating a systemtap tapset static probes

2010-11-08 Thread Stefan Hajnoczi
On Mon, Nov 8, 2010 at 11:33 AM, Daniel P. Berrange  wrote:
> @@ -390,6 +396,54 @@ linetod_end_dtrace()
>  EOF
>  }
>
> +linetos_begin_dtrace()
> +{
> +return
> +}
> +
> +linetos_dtrace()
> +{
> +local name args arglist state

Missing binary, i, and arg.

> +name=$(get_name "$1")
> +args=$(get_args "$1")
> +arglist=$(get_argnames "$1", "")
> +state=$(get_state "$1")
> +if [ "$state" = "0" ] ; then
> +name=${name##disable }
> +fi
> +
> +if [ "$target" = "i386" ]
> +then
> +  binary="qemu"
> +else
> +  binary="qemu-system-$target"
> +fi

Perhaps we should just pass in the binary name to avoid hardcoding "qemu" and
"qemu-system-$target" here.  If possible, let's make SystemTap also work for
userspace targets, not just for full-system softmmu targets.

> +
> +# Define prototype for probe arguments
> +cat < +probe qemu.system.$target.$name = process("$bindir/$binary").mark("$name")
> +{
> +EOF
> +
> +i=1
> +for arg in $arglist
> +do
> +cat < +  $arg = \$arg$i;
> +EOF
> +   i="$((i+1))"
> +done
> +
> +cat < +}
> +EOF
> +}
> +
> +linetos_end_dtrace()
> +{
> +return
> +}
> +
>  # Process stdin by calling begin, line, and end functions for the backend
>  convert()
>  {
> @@ -455,6 +509,24 @@ tracetod()
> convert d
>  }
>
> +tracetos()

How about using 'stap' instead of 's' so it's clear we're not generating
assembly?

Stefan



Re: [Qemu-devel] [PATCH 1/3] Make paio subsystem use threadlets infrastructure

2010-11-08 Thread Stefan Hajnoczi
On Mon, Nov 8, 2010 at 2:33 PM, Arun R Bharadwaj
 wrote:
> diff --git a/Makefile.objs b/Makefile.objs
> index cd5a24b..3b7ec27 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -9,6 +9,7 @@ qobject-obj-y += qerror.o
>
>  block-obj-y = cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o
>  block-obj-y += nbd.o block.o aio.o aes.o osdep.o qemu-config.o
> +block-obj-$(CONFIG_POSIX) += qemu-thread.o
>  block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
>  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>
> @@ -124,7 +125,6 @@ endif
>  common-obj-y += $(addprefix ui/, $(ui-obj-y))
>
>  common-obj-y += iov.o acl.o
> -common-obj-$(CONFIG_THREAD) += qemu-thread.o
>  common-obj-y += notify.o event_notifier.o
>  common-obj-y += qemu-timer.o

This change makes CONFIG_THREAD unused.  The ./configure code that
sets CONFIG_THREAD=y should be removed.

> diff --git a/posix-aio-compat.c b/posix-aio-compat.c
> index 7b862b5..00b2a4e 100644
> --- a/posix-aio-compat.c
> +++ b/posix-aio-compat.c
> @@ -29,7 +29,32 @@
>  #include "block_int.h"
>
>  #include "block/raw-posix-aio.h"
> +#include "qemu-thread.h"
>
> +#define MAX_GLOBAL_THREADS  64
> +#define MIN_GLOBAL_THREADS   8
> +
> +QemuMutex aiocb_mutex;

This variable should be static since it isn't used externally.

> +
> +typedef struct ThreadletQueue
> +{
> +QemuMutex lock;
> +QemuCond cond;
> +int max_threads;
> +int min_threads;
> +int cur_threads;
> +int idle_threads;
> +QTAILQ_HEAD(, ThreadletWork) request_list;
> +} ThreadletQueue;
> +
> +typedef struct ThreadletWork
> +{
> +QTAILQ_ENTRY(ThreadletWork) node;
> +void (*func)(struct ThreadletWork *work);
> +} ThreadletWork;
> +
> +static ThreadletQueue globalqueue;
> +static int globalqueue_init;
>
>  struct qemu_paiocb {
> BlockDriverAIOCB common;
> @@ -44,13 +69,13 @@ struct qemu_paiocb {
> int ev_signo;
> off_t aio_offset;
>
> -QTAILQ_ENTRY(qemu_paiocb) node;
> int aio_type;
> ssize_t ret;
> int active;
> struct qemu_paiocb *next;
>
> int async_context_id;
> +ThreadletWork work;
>  };
>
>  typedef struct PosixAioState {
> @@ -58,64 +83,169 @@ typedef struct PosixAioState {
> struct qemu_paiocb *first_aio;
>  } PosixAioState;
>
> +static void *threadlet_worker(void *data)
> +{
> +ThreadletQueue *queue = data;
>
> -static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
> -static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
> -static pthread_t thread_id;
> -static pthread_attr_t attr;
> -static int max_threads = 64;
> -static int cur_threads = 0;
> -static int idle_threads = 0;
> -static QTAILQ_HEAD(, qemu_paiocb) request_list;
> +qemu_mutex_lock(&queue->lock);
> +while (1) {
> +ThreadletWork *work;
> +int ret = 0;
> +
> +while (QTAILQ_EMPTY(&queue->request_list) &&
> +   (ret != ETIMEDOUT)) {
> +/* wait for cond to be signalled or broadcast for 1000s */
> +ret = qemu_cond_timedwait((&queue->cond),
> + &(queue->lock), 10*10);
> +}
>
> -#ifdef CONFIG_PREADV
> -static int preadv_present = 1;
> -#else
> -static int preadv_present = 0;
> -#endif
> +assert(queue->idle_threads != 0);
> +if (QTAILQ_EMPTY(&queue->request_list)) {
> +if (queue->cur_threads > queue->min_threads) {
> +/* We retain the minimum number of threads */
> +break;
> +}
> +} else {
> +work = QTAILQ_FIRST(&queue->request_list);
> +QTAILQ_REMOVE(&queue->request_list, work, node);
>
> -static void die2(int err, const char *what)
> -{
> -fprintf(stderr, "%s failed: %s\n", what, strerror(err));
> -abort();
> +queue->idle_threads--;
> +qemu_mutex_unlock(&queue->lock);
> +
> +/* execute the work function */
> +work->func(work);
> +
> +qemu_mutex_lock(&queue->lock);
> +queue->idle_threads++;
> +}
> +}
> +
> +queue->idle_threads--;
> +queue->cur_threads--;
> +qemu_mutex_unlock(&queue->lock);
> +
> +return NULL;
>  }
>
> -static void die(const char *what)
> +static void spawn_threadlet(ThreadletQueue *queue)
>  {
> -die2(errno, what);
> +QemuThread thread;
> +
> +queue->cur_threads++;
> +queue->idle_threads++;
> +
> +qemu_thread_create(&thread, threadlet_worker, queue);
>  }
>
> -static void mutex_lock(pthread_mutex_t *mutex)
> +/**
> + * submit_work_to_queue: Submit a new task to a private queue to be
> + *executed asynchronously.
> + * @queue: Per-subsystem private queue to which the new task needs
> + * to be submitted.
> + * @work: Contains information about the task that needs to be submitted.
> + */
> +static void submit_work_to_queue(ThreadletQueue *queue, ThreadletWork *work)
>  {
> -int ret = pthread_mutex_lock(mutex);
> -if (ret) die2(ret, "pthread_mutex_lock");
> +qe

Re: [Qemu-devel] [PATCH] trace: Trace SCSI request lifecycle

2010-11-08 Thread Stefan Hajnoczi
On Mon, Nov 8, 2010 at 7:18 PM, Blue Swirl  wrote:
> On Mon, Nov 8, 2010 at 9:44 AM, Stefan Hajnoczi
>  wrote:
>> This patch adds trace events for SCSI request allocation, freeing, CDB
>> parsing, read, and write.  These trace events can be used to instrument
>> the SCSI request lifecycle.
>
> How about converting also the DPRINTFs in hw/scsi-disk.c to tracepoints?

Good idea.  I don't understand the SCSI emulation well enough to trace
the rest, so I couldn't do much besides a mechanical conversion.

Stefan



[Qemu-devel] Re: [PATCH 0/6] Save state error handling (kill off no_migrate)

2010-11-08 Thread Alex Williamson
On Mon, 2010-11-08 at 22:59 +0200, Michael S. Tsirkin wrote:
> On Mon, Nov 08, 2010 at 10:20:46AM -0700, Alex Williamson wrote:
> > On Mon, 2010-11-08 at 18:54 +0200, Michael S. Tsirkin wrote:
> > > On Mon, Nov 08, 2010 at 07:59:57AM -0700, Alex Williamson wrote:
> > > > On Mon, 2010-11-08 at 13:40 +0200, Michael S. Tsirkin wrote:
> > > > > On Wed, Oct 06, 2010 at 02:58:57PM -0600, Alex Williamson wrote:
> > > > > > Our code paths for saving or migrating a VM are full of functions 
> > > > > > that
> > > > > > return void, leaving no opportunity for a device to cancel a 
> > > > > > migration,
> > > > > > either from error or incompatibility.  The ivshmem driver attempted 
> > > > > > to
> > > > > > solve this with a no_migrate flag on the save state entry.  I think 
> > > > > > the
> > > > > > more generic and flexible way to solve this is to allow driver save
> > > > > > functions to fail.  This series implements that and converts ivshmem
> > > > > > to uses a set_params function to NAK migration much earlier in the
> > > > > > processes.  This touches a lot of files, but bulk of those changes 
> > > > > > are
> > > > > > simply s/void/int/ and tacking a "return 0" to the end of functions.
> > > > > > Thanks,
> > > > > > 
> > > > > > Alex
> > > > > 
> > > > > Well error handling is always tricky: it seems easier to
> > > > > require save handlers to never fail.
> > > > 
> > > > Sure it's easier, but does that make it robust?
> > > 
> > > More robust in the face of wwhat kind of failure?
> > 
> > I really don't understand why we're having a discussion about whether
> > providing a means to return an error is a good thing or not.  These
> > patches touch a lot of files, but the change is dead simple.
> 
> I just don't see the motivation. Presumably your patches are
> there to achieve some kind of goal, right? I am trying to
> figure out what that goal is.

My goal is that I want to be able to NAK a migration when devices are
assigned, and I think we can do it more generically than the no_migrate
flag so that it supports this application and any other reason that
saves might fail in the future.

> Currently savevm callbacks never fail. So they
> return void. Why is returing 0 and adding a bunch of code to test the
> condition that never happens a good idea?  It just seems to create more
> ways for devices to shoot themselves in the foot.

And more ways to indicate something bad happened and keep running.  We
already have far too many abort() calls in the code.

> > > > > So there's a bunch of code here but what exactly is the benefit?
> > > > > Since save handlers have no idea what does the remote do,
> > > > > what is the compatibility you mention?
> > > > 
> > > > There are two users I currently have in mind.  ivshmem currently makes
> > > > use of the register_device_unmigratable() because it makes use of host
> > > > specific resources and connections (aiui).  This sets the no_migrate
> > > > flag, which is not dynamic and a bit of a band-aide.
> > > >  The other is
> > > > device assignment, which needs a way to NAK a migration since physical
> > > > devices are never migratable.
> > > 
> > > Well since all these can't be migrated ever, a fixed property actually 
> > > seems
> > > a good match.  Sure it's not dynamic but all the easier to debug.
> > > 
> > > >  I imagine we could at some point have
> > > > devices with state tied to other features that can't always be detached
> > > > from the host, this tries to provide the infrastructure for that to
> > > > happen.
> > > > 
> > > > Alex
> > > 
> > > Let guest control whether you can migrate?
> > > Sounds like something that is more likely to be abused
> > > than used constructively. 
> > 
> > s/guest/device/  So you would rather the migration failed on the
> > incoming side where it may not be detected
> 
> And incoming migration handlers *must* validate the input, anyway.
> We should not plaster over this with checks on outgoing side.

I'm not in any way suggesting incoming shouldn't do validation.

> > or it may be detected too
> > late to stop the migration?
> > 
> > Alex
> 
> So there's a bug and device is in an unexpected state.
> What can we do? Assert, print an error, notify guest - all these
> come to mind. But stop migration? Seems arbitrary.

Perhaps the problem is that either an assert or an fprintf are the first
things that come to mind.  We shouldn't have guests randomly blowing up
or telling users to go scan through their log files to find errors.
It's not very hard to allow simple error handling, so why shouldn't our
first plan of attack be to return an error so that the human/qmp monitor
can detect it and inform the user.  For the current candidates for this
interface, there's no point notifying the guest, it's the interface
attempting to do the migration that needs to know there's something
blocking it.

Alex






Re: [Qemu-devel] [Bug 671831] [NEW] Sparc guest assert error

2010-11-08 Thread Blue Swirl
On Mon, Nov 8, 2010 at 2:36 PM, Stefan Hajnoczi  wrote:
> I just gave the SPARC Linux 2.6 image from qemu.org a spin on sun4m but no 
> luck:
>
> sparc-softmmu/qemu-system-sparc -kernel
> '/tmp/sparc-test/vmlinux-2.6.11+tcx' -initrd
> '/tmp/sparc-test/linux.img' -append "root=/dev/ram" -drive
> if=scsi,file=test.raw,cache=none
>
> The kernel correctly notices the sda device and detects partitions (so
> it is doing disk reads).  There is no assertion error so this problem
> may be specific to the Linux 2.4 ESP driver.

At least Gentoo 2004.1 (Linux 2.4.25-sparc-r1) live CD works.



[Qemu-devel] [PATCH 0/2] Add support for SystemTAP and DTrace tracing backends (v5)

2010-11-08 Thread Daniel P. Berrange
A repost of the SystemTAP/DTrace patches from

  http://lists.gnu.org/archive/html/qemu-devel/2010-11/msg00496.html

The patch is now split into two pieces. 

The first patch contains the generic DTrace tracing backend support.

The second patch contains additional pieces for SystemTAP to generate
a tapset for each of the qemu-system-XXX binaries, to simplify life
for admins wanting to use the tracing backend. This addresses the 
problem in previous versions of the patch where the tapset only worked 
for /usr/bin/qemu and no other binaries. Unfortunately SystemTAP does 
not allow for use of wildcards, so it was neccessary to go for a separate 
tapset file per emulator binary.

Changes in v5:

 - The Makefile merge error is now fixed




[Qemu-devel] [PATCH 1/2] Add a DTrace tracing backend targetted for SystemTAP compatability

2010-11-08 Thread Daniel P. Berrange
This introduces a new tracing backend that targets the SystemTAP
implementation of DTrace userspace tracing. The core functionality
should be applicable and standard across any DTrace implementation
on Solaris, OS-X, *BSD, but the Makefile rules will likely need
some small additional changes to cope with OS specific build
requirements.

This backend builds a little differently from the other tracing
backends. Specifically there is no 'trace.c' file, because the
'dtrace' command line tool generates a '.o' file directly from
the dtrace probe definition file. The probe definition is usually
named with a '.d' extension but QEMU uses '.d' files for its
external makefile dependancy tracking, so this uses '.dtrace' as
the extension for the probe definition file.

The 'tracetool' program gains the ability to generate a trace.h
file for DTrace, and also to generate the trace.d file containing
the dtrace probe definition.

Example usage of a dtrace probe in systemtap looks like:

  probe process("qemu").mark("qemu_malloc") {
printf("Malloc %d %p\n", $arg1, $arg2);
  }

* .gitignore: Ignore trace-dtrace.*
* Makefile: Extra rules for generating DTrace files
* Makefile.obj: Don't build trace.o for DTrace, use
  trace-dtrace.o generated by 'dtrace' instead
* tracetool: Support for generating DTrace data files

Signed-off-by: Daniel P. Berrange 
---
 .gitignore|2 +
 Makefile  |   23 +++
 Makefile.objs |4 ++
 configure |   14 ++-
 tracetool |  116 -
 5 files changed, 148 insertions(+), 11 deletions(-)

diff --git a/.gitignore b/.gitignore
index a43e4d1..3efb4ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,8 @@ config-host.*
 config-target.*
 trace.h
 trace.c
+trace-dtrace.h
+trace-dtrace.dtrace
 *-timestamp
 *-softmmu
 *-darwin-user
diff --git a/Makefile b/Makefile
index 02698e9..554ad97 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,9 @@
 # Makefile for QEMU.
 
 GENERATED_HEADERS = config-host.h trace.h qemu-options.def
+ifeq ($(TRACE_BACKEND),dtrace)
+GENERATED_HEADERS += trace-dtrace.h
+endif
 
 ifneq ($(wildcard config-host.mak),)
 # Put the all: rule here so that config-host.mak can contain dependencies.
@@ -108,7 +111,11 @@ ui/vnc.o: QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
 
 bt-host.o: QEMU_CFLAGS += $(BLUEZ_CFLAGS)
 
+ifeq ($(TRACE_BACKEND),dtrace)
+trace.h: trace.h-timestamp trace-dtrace.h
+else
 trace.h: trace.h-timestamp
+endif
 trace.h-timestamp: $(SRC_PATH)/trace-events config-host.mak
$(call quiet-command,sh $(SRC_PATH)/tracetool --$(TRACE_BACKEND) -h < 
$< > $@,"  GEN   trace.h")
@cmp -s $@ trace.h || cp $@ trace.h
@@ -120,6 +127,20 @@ trace.c-timestamp: $(SRC_PATH)/trace-events config-host.mak
 
 trace.o: trace.c $(GENERATED_HEADERS)
 
+trace-dtrace.h: trace-dtrace.dtrace
+   $(call quiet-command,dtrace -o $@ -h -s $<, "  GEN   trace-dtrace.h")
+
+# Normal practice is to name DTrace probe file with a '.d' extension
+# but that gets picked up by QEMU's Makefile as an external dependancy
+# rule file. So we use '.dtrace' instead
+trace-dtrace.dtrace: trace-dtrace.dtrace-timestamp
+trace-dtrace.dtrace-timestamp: $(SRC_PATH)/trace-events config-host.mak
+   $(call quiet-command,sh $(SRC_PATH)/tracetool --$(TRACE_BACKEND) -d < 
$< > $@,"  GEN   trace-dtrace.dtrace")
+   @cmp -s $@ trace-dtrace.dtrace || cp $@ trace-dtrace.dtrace
+
+trace-dtrace.o: trace-dtrace.dtrace $(GENERATED_HEADERS)
+   $(call quiet-command,dtrace -o $@ -G -s $<, "  GEN trace-dtrace.o")
+
 simpletrace.o: simpletrace.c $(GENERATED_HEADERS)
 
 version.o: $(SRC_PATH)/version.rc config-host.mak
@@ -157,6 +178,8 @@ clean:
rm -f slirp/*.o slirp/*.d audio/*.o audio/*.d block/*.o block/*.d 
net/*.o net/*.d fsdev/*.o fsdev/*.d ui/*.o ui/*.d
rm -f qemu-img-cmds.h
rm -f trace.c trace.h trace.c-timestamp trace.h-timestamp
+   rm -f trace-dtrace.dtrace trace-dtrace.dtrace-timestamp
+   rm -f trace-dtrace.h trace-dtrace.h-timestamp
$(MAKE) -C tests clean
for d in $(ALL_SUBDIRS) libhw32 libhw64 libuser libdis libdis-user; do \
if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \
diff --git a/Makefile.objs b/Makefile.objs
index faf485e..84fc80e 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -285,11 +285,15 @@ libdis-$(CONFIG_SPARC_DIS) += sparc-dis.o
 ##
 # trace
 
+ifeq ($(TRACE_BACKEND),dtrace)
+trace-obj-y = trace-dtrace.o
+else
 trace-obj-y = trace.o
 ifeq ($(TRACE_BACKEND),simple)
 trace-obj-y += simpletrace.o
 user-obj-y += qemu-timer-common.o
 endif
+endif
 
 vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
 
diff --git a/configure b/configure
index 7025d2b..f8dad3e 100755
--- a/configure
+++ b/configure
@@ -929,7 +929,7 @@ echo "  --enable-docsenable documentation build"
 echo "  --disable-docs   disable documentation build"
 echo "  --disable-vhost-net  disable vhost-net acceleratio

[Qemu-devel] [PATCH 2/2] Add support for generating a systemtap tapset static probes

2010-11-08 Thread Daniel P. Berrange
This introduces generation of a qemu.stp/qemu-system-XXX.stp
files which provides tapsets with friendly names for static
probes & their arguments. Instead of

probe process("qemu").mark("qemu_malloc") {
printf("Malloc %d %p\n", $arg1, $arg2);
}

It is now possible todo

probe qemu.system.i386.qemu_malloc {
printf("Malloc %d %p\n", size, ptr);
}

There is one tapset defined per target arch.

* Makefile: Generate a qemu.stp file for systemtap
* tracetool: Support for generating systemtap tapsets

Signed-off-by: Daniel P. Berrange 
---
 Makefile.target |   19 +++-
 configure   |7 
 tracetool   |   92 +++
 3 files changed, 117 insertions(+), 1 deletions(-)

diff --git a/Makefile.target b/Makefile.target
index 91e6e74..a5e6410 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -40,7 +40,20 @@ kvm.o kvm-all.o vhost.o vhost_net.o: 
QEMU_CFLAGS+=$(KVM_CFLAGS)
 config-target.h: config-target.h-timestamp
 config-target.h-timestamp: config-target.mak
 
-all: $(PROGS)
+ifdef CONFIG_SYSTEMTAP_TRACE
+trace: $(QEMU_PROG).stp
+
+$(QEMU_PROG).stp:
+   $(call quiet-command,sh $(SRC_PATH)/tracetool \
+   --$(TRACE_BACKEND) \
+   --bindir $(bindir) \
+   --target $(TARGET_ARCH) \
+   -s < $(SRC_PATH)/trace-events > $(QEMU_PROG).stp,"  GEN   
$(QEMU_PROG).stp")
+else
+trace:
+endif
+
+all: $(PROGS) trace
 
 # Dummy command so that make thinks it has done something
@true
@@ -348,6 +361,10 @@ ifneq ($(STRIP),)
$(STRIP) $(patsubst %,"$(DESTDIR)$(bindir)/%",$(PROGS))
 endif
 endif
+ifdef CONFIG_SYSTEMTAP_TRACE
+   $(INSTALL_DIR) "$(DESTDIR)$(datadir)/../systemtap/tapset"
+   $(INSTALL_DATA) $(QEMU_PROG).stp 
"$(DESTDIR)$(datadir)/../systemtap/tapset"
+endif
 
 # Include automatically generated dependency files
 -include $(wildcard *.d */*.d)
diff --git a/configure b/configure
index f8dad3e..e560f87 100755
--- a/configure
+++ b/configure
@@ -2192,6 +2192,10 @@ EOF
 echo
 exit 1
   fi
+  trace_backend_stap="no"
+  if has 'stap' ; then
+trace_backend_stap="yes"
+  fi
 fi
 
 ##
@@ -2645,6 +2649,9 @@ fi
 if test "$trace_backend" = "simple"; then
   trace_file="\"$trace_file-%u\""
 fi
+if test "$trace_backend" = "dtrace" -a "$trace_backend_stap" = "yes" ; then
+  echo "CONFIG_SYSTEMTAP_TRACE=y" >> $config_host_mak
+fi
 echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak
 
 echo "TOOLS=$tools" >> $config_host_mak
diff --git a/tracetool b/tracetool
index 5b6636a..d797ab7 100755
--- a/tracetool
+++ b/tracetool
@@ -26,6 +26,12 @@ Output formats:
   -hGenerate .h file
   -cGenerate .c file
   -dGenerate .d file (DTrace only)
+  -sGenerate .stp file (DTrace with SystemTAP only)
+
+Options:
+  --bindir [bindir]  QEMU binary install location
+  --target [arch]QEMU target architecture
+
 EOF
 exit 1
 }
@@ -390,6 +396,54 @@ linetod_end_dtrace()
 EOF
 }
 
+linetos_begin_dtrace()
+{
+return
+}
+
+linetos_dtrace()
+{
+local name args arglist state
+name=$(get_name "$1")
+args=$(get_args "$1")
+arglist=$(get_argnames "$1", "")
+state=$(get_state "$1")
+if [ "$state" = "0" ] ; then
+name=${name##disable }
+fi
+
+if [ "$target" = "i386" ]
+then
+  binary="qemu"
+else
+  binary="qemu-system-$target"
+fi
+
+# Define prototype for probe arguments
+cat <

Re: [Qemu-devel] [PATCH] trace: Trace SCSI request lifecycle

2010-11-08 Thread Blue Swirl
On Mon, Nov 8, 2010 at 9:44 AM, Stefan Hajnoczi
 wrote:
> This patch adds trace events for SCSI request allocation, freeing, CDB
> parsing, read, and write.  These trace events can be used to instrument
> the SCSI request lifecycle.

How about converting also the DPRINTFs in hw/scsi-disk.c to tracepoints?

>
> Signed-off-by: Stefan Hajnoczi 
> ---
>  hw/scsi-bus.c  |    5 +
>  hw/scsi-disk.c |    9 +
>  trace-events   |   11 +++
>  3 files changed, 25 insertions(+), 0 deletions(-)
>
> diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
> index 5a3fd4b..0228d4f 100644
> --- a/hw/scsi-bus.c
> +++ b/hw/scsi-bus.c
> @@ -4,6 +4,7 @@
>  #include "scsi-defs.h"
>  #include "qdev.h"
>  #include "blockdev.h"
> +#include "trace.h"
>
>  static struct BusInfo scsi_bus_info = {
>     .name  = "SCSI",
> @@ -145,6 +146,8 @@ SCSIRequest *scsi_req_alloc(size_t size, SCSIDevice *d, 
> uint32_t tag, uint32_t l
>     req->status = -1;
>     req->enqueued = true;
>     QTAILQ_INSERT_TAIL(&d->requests, req, next);
> +
> +    trace_scsi_req_alloc(d, tag, lun, req);
>     return req;
>  }
>
> @@ -172,6 +175,7 @@ void scsi_req_free(SCSIRequest *req)
>  {
>     scsi_req_dequeue(req);
>     qemu_free(req);
> +    trace_scsi_req_free(req);
>  }
>
>  static int scsi_req_length(SCSIRequest *req, uint8_t *cmd)
> @@ -395,6 +399,7 @@ int scsi_req_parse(SCSIRequest *req, uint8_t *buf)
>     memcpy(req->cmd.buf, buf, req->cmd.len);
>     scsi_req_xfer_mode(req);
>     req->cmd.lba = scsi_req_lba(req);
> +    trace_scsi_req_parse(req, req->cmd.len, req->cmd.lba, req->cmd.buf[0]);
>     return 0;
>  }
>
> diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
> index dc71957..c7a3f2a 100644
> --- a/hw/scsi-disk.c
> +++ b/hw/scsi-disk.c
> @@ -37,6 +37,7 @@ do { fprintf(stderr, "scsi-disk: " fmt , ## __VA_ARGS__); } 
> while (0)
>  #include "scsi-defs.h"
>  #include "sysemu.h"
>  #include "blockdev.h"
> +#include "trace.h"
>
>  #define SCSI_DMA_BUF_SIZE    131072
>  #define SCSI_MAX_INQUIRY_LEN 256
> @@ -136,6 +137,8 @@ static void scsi_read_complete(void * opaque, int ret)
>     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
>     int n;
>
> +    trace_scsi_read_complete(r, ret);
> +
>     r->req.aiocb = NULL;
>
>     if (ret) {
> @@ -158,6 +161,8 @@ static void scsi_read_request(SCSIDiskReq *r)
>     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
>     uint32_t n;
>
> +    trace_scsi_read_request(r, r->sector, r->sector_count);
> +
>     if (r->sector_count == (uint32_t)-1) {
>         DPRINTF("Read buf_len=%zd\n", r->iov.iov_len);
>         r->sector_count = 0;
> @@ -240,6 +245,8 @@ static void scsi_write_complete(void * opaque, int ret)
>     uint32_t len;
>     uint32_t n;
>
> +    trace_scsi_write_complete(r, ret);
> +
>     r->req.aiocb = NULL;
>
>     if (ret) {
> @@ -269,6 +276,8 @@ static void scsi_write_request(SCSIDiskReq *r)
>     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
>     uint32_t n;
>
> +    trace_scsi_write_request(r, r->sector, r->iov.iov_len);
> +
>     n = r->iov.iov_len / 512;
>     if (n) {
>         qemu_iovec_init_external(&r->qiov, &r->iov, 1);
> diff --git a/trace-events b/trace-events
> index 947f8b0..7069628 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -189,3 +189,14 @@ disable sun4m_iommu_mem_writel_pgflush(uint32_t val) 
> "page flush %x"
>  disable sun4m_iommu_page_get_flags(uint64_t pa, uint64_t iopte, uint32_t 
> ret) "get flags addr %"PRIx64" => pte %"PRIx64", *pte = %x"
>  disable sun4m_iommu_translate_pa(uint64_t addr, uint64_t pa, uint32_t iopte) 
> "xlate dva %"PRIx64" => pa %"PRIx64" iopte = %x"
>  disable sun4m_iommu_bad_addr(uint64_t addr) "bad addr %"PRIx64""
> +
> +# hw/scsi-bus.c
> +disable scsi_req_alloc(void *dev, uint32_t tag, uint32_t lun, void *req) 
> "dev %p tag %#x lun %#x req %p"
> +disable scsi_req_free(void *req) "req %p"
> +disable scsi_req_parse(void *req, int len, uint64_t lba, uint8_t cmd) "req 
> %p len %d lba %"PRIu64" cmd %#x"
> +
> +# hw/scsi-disk.c
> +disable scsi_read_request(void *req, uint64_t sector, uint32_t sector_count) 
> "req %p sector %"PRIu64" sector_count %u"
> +disable scsi_read_complete(void *req, int ret) "req %p ret %d"
> +disable scsi_write_request(void *req, uint64_t sector, size_t len) "req %p 
> sector %"PRIu64" len %zu"
> +disable scsi_write_complete(void *req, int ret) "req %p ret %d"
> --
> 1.7.2.3
>
>
>



Re: [Qemu-devel] [PATCH 1/2] Add a DTrace tracing backend targetted for SystemTAP compatability

2010-11-08 Thread Stefan Hajnoczi
On Mon, Nov 8, 2010 at 4:57 PM, Daniel P. Berrange  wrote:
> On Mon, Nov 08, 2010 at 04:52:01PM +, Stefan Hajnoczi wrote:
>> On Mon, Nov 8, 2010 at 11:33 AM, Daniel P. Berrange  
>> wrote:
>> > diff --git a/Makefile b/Makefile
>> > index 02698e9..384bf72 100644
>> > --- a/Makefile
>> > +++ b/Makefile
>> > @@ -1,6 +1,10 @@
>> >  # Makefile for QEMU.
>> >
>> >  GENERATED_HEADERS = config-host.h trace.h qemu-options.def
>> > +GENERATED_HEADERS = config-host.h trace.h
>> > +ifeq ($(TRACE_BACKEND),dtrace)
>> > +GENERATED_HEADERS += trace-dtrace.h
>> > +endif
>>
>> What happened to qemu-options.def?  GENERATED_HEADERS is defined
>> twice, the first line should probably be deleted.
>
> Oh, the loss of qemu-options.def is a rebase/merge error. GENERATED_HEADERS
> isn't defined twice, because the second line is using += to append to the
> first definition.

Ah, it's the merge error I was referring to:

 @@ -1,6 +1,10 @@
 # Makefile for QEMU.

 GENERATED_HEADERS = config-host.h trace.h qemu-options.def
+GENERATED_HEADERS = config-host.h trace.h

Stefan



[Qemu-devel] [patch 2/3] block: set sector dirty on AIO write completion

2010-11-08 Thread Marcelo Tosatti
Sectors are marked dirty in the bitmap on AIO submission. This is wrong
since data has not reached storage.

Set a given sector as dirty in the dirty bitmap on AIO completion, so that
reading a sector marked as dirty is guaranteed to return uptodate data.

Signed-off-by: Marcelo Tosatti 

Index: qemu-kvm/block.c
===
--- qemu-kvm.orig/block.c
+++ qemu-kvm/block.c
@@ -2018,12 +2018,49 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDr
 return ret;
 }
 
+typedef struct BlockCompleteData {
+BlockDriverCompletionFunc *cb;
+void *opaque;
+BlockDriverState *bs;
+int64_t sector_num;
+int nb_sectors;
+} BlockCompleteData;
+
+static void block_complete_cb(void *opaque, int ret)
+{
+BlockCompleteData *b = opaque;
+
+if (b->bs->dirty_bitmap) {
+set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
+}
+b->cb(b->opaque, ret);
+qemu_free(b);
+}
+
+static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+BlockCompleteData *blkdata = qemu_mallocz(sizeof(BlockCompleteData));
+
+blkdata->bs = bs;
+blkdata->cb = cb;
+blkdata->opaque = opaque;
+blkdata->sector_num = sector_num;
+blkdata->nb_sectors = nb_sectors;
+
+return blkdata;
+}
+
 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
   QEMUIOVector *qiov, int nb_sectors,
   BlockDriverCompletionFunc *cb, void *opaque)
 {
 BlockDriver *drv = bs->drv;
 BlockDriverAIOCB *ret;
+BlockCompleteData *blk_cb_data;
 
 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
 
@@ -2035,7 +2072,10 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockD
 return NULL;
 
 if (bs->dirty_bitmap) {
-set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
+blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
+ opaque);
+cb = &block_complete_cb;
+opaque = blk_cb_data;
 }
 
 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,





[Qemu-devel] [patch 1/3] block: fix shift in dirty bitmap calculation

2010-11-08 Thread Marcelo Tosatti
Otherwise upper 32 bits of bitmap entries are not correctly calculated.

Signed-off-by: Marcelo Tosatti 

Index: qemu-kvm/block.c
===
--- qemu-kvm.orig/block.c
+++ qemu-kvm/block.c
@@ -930,14 +930,14 @@ static void set_dirty_bitmap(BlockDriver
 bit = start % (sizeof(unsigned long) * 8);
 val = bs->dirty_bitmap[idx];
 if (dirty) {
-if (!(val & (1 << bit))) {
+if (!(val & (1UL << bit))) {
 bs->dirty_count++;
-val |= 1 << bit;
+val |= 1UL << bit;
 }
 } else {
-if (val & (1 << bit)) {
+if (val & (1UL << bit)) {
 bs->dirty_count--;
-val &= ~(1 << bit);
+val &= ~(1UL << bit);
 }
 }
 bs->dirty_bitmap[idx] = val;
@@ -2672,8 +2672,8 @@ int bdrv_get_dirty(BlockDriverState *bs,
 
 if (bs->dirty_bitmap &&
 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
-return bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
-(1 << (chunk % (sizeof(unsigned long) * 8)));
+return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
+(1UL << (chunk % (sizeof(unsigned long) * 8;
 } else {
 return 0;
 }





[Qemu-devel] [patch 3/3] block migration: do not submit multiple AIOs for same sector

2010-11-08 Thread Marcelo Tosatti
Block migration can submit multiple AIO reads for the same sector/chunk, but
completion of such reads can happen out of order:

migration   guest
- get_dirty(N)
- aio_read(N) 
- clear_dirty(N)
write(N)
set_dirty(N)
- get_dirty(N)
- aio_read(N)

If the first aio_read completes after the second, stale data will be 
migrated to the destination.

Fix by not allowing multiple AIOs inflight for the same sector.

Signed-off-by: Marcelo Tosatti 


Index: qemu-kvm/block-migration.c
===
--- qemu-kvm.orig/block-migration.c
+++ qemu-kvm/block-migration.c
@@ -49,12 +49,14 @@ typedef struct BlkMigDevState {
 int64_t total_sectors;
 int64_t dirty;
 QSIMPLEQ_ENTRY(BlkMigDevState) entry;
+unsigned long *aio_bitmap;
 } BlkMigDevState;
 
 typedef struct BlkMigBlock {
 uint8_t *buf;
 BlkMigDevState *bmds;
 int64_t sector;
+int nr_sectors;
 struct iovec iov;
 QEMUIOVector qiov;
 BlockDriverAIOCB *aiocb;
@@ -140,6 +142,57 @@ static inline long double compute_read_b
 return  (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time;
 }
 
+static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
+{
+int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+if (bmds->aio_bitmap &&
+(sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
+return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
+(1UL << (chunk % (sizeof(unsigned long) * 8;
+} else {
+return 0;
+}
+}
+
+static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
+ int nb_sectors, int set)
+{
+int64_t start, end;
+unsigned long val, idx, bit;
+
+start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
+end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+for (; start <= end; start++) {
+idx = start / (sizeof(unsigned long) * 8);
+bit = start % (sizeof(unsigned long) * 8);
+val = bmds->aio_bitmap[idx];
+if (set) {
+if (!(val & (1UL << bit))) {
+val |= 1UL << bit;
+}
+} else {
+if (val & (1UL << bit)) {
+val &= ~(1UL << bit);
+}
+}
+bmds->aio_bitmap[idx] = val;
+}
+}
+
+static void alloc_aio_bitmap(BlkMigDevState *bmds)
+{
+BlockDriverState *bs = bmds->bs;
+int64_t bitmap_size;
+
+bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
+BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
+bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
+
+bmds->aio_bitmap = qemu_mallocz(bitmap_size);
+}
+
 static void blk_mig_read_cb(void *opaque, int ret)
 {
 BlkMigBlock *blk = opaque;
@@ -151,6 +204,7 @@ static void blk_mig_read_cb(void *opaque
 add_avg_read_time(blk->time);
 
 QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
+bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
 
 block_mig_state.submitted--;
 block_mig_state.read_done++;
@@ -194,6 +248,7 @@ static int mig_save_device_bulk(Monitor 
 blk->buf = qemu_malloc(BLOCK_SIZE);
 blk->bmds = bmds;
 blk->sector = cur_sector;
+blk->nr_sectors = nr_sectors;
 
 blk->iov.iov_base = blk->buf;
 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
@@ -248,6 +303,7 @@ static void init_blk_migration_it(void *
 bmds->total_sectors = sectors;
 bmds->completed_sectors = 0;
 bmds->shared_base = block_mig_state.shared_base;
+alloc_aio_bitmap(bmds);
 
 block_mig_state.total_sector_sum += sectors;
 
@@ -329,6 +385,8 @@ static int mig_save_device_dirty(Monitor
 int nr_sectors;
 
 for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
+if (bmds_aio_inflight(bmds, sector))
+qemu_aio_flush();
 if (bdrv_get_dirty(bmds->bs, sector)) {
 
 if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
@@ -340,6 +398,7 @@ static int mig_save_device_dirty(Monitor
 blk->buf = qemu_malloc(BLOCK_SIZE);
 blk->bmds = bmds;
 blk->sector = sector;
+blk->nr_sectors = nr_sectors;
 
 if (is_async) {
 blk->iov.iov_base = blk->buf;
@@ -354,6 +413,7 @@ static int mig_save_device_dirty(Monitor
 goto error;
 }
 block_mig_state.submitted++;
+bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
 } else {
 if (bdrv_read(bmds->bs, sector, blk->buf,
   nr_sectors) < 0) {
@@ -474,6 +534,7 @@ static void blk_mig_cleanup(Monitor *mon
 
 while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
 QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
+qemu_free(bmds->aio_bitmap);
 qemu_

[Qemu-devel] [patch 0/3] block migration fixes

2010-11-08 Thread Marcelo Tosatti
Following patchset fixes block migration corruption issues.




Re: [Qemu-devel] [PATCH 0/3] v4 Decouple block device removal from device removal

2010-11-08 Thread Daniel P. Berrange
On Mon, Nov 08, 2010 at 12:39:01PM -0600, Ryan Harper wrote:
> * Michael S. Tsirkin  [2010-11-08 10:57]:
> > On Mon, Nov 08, 2010 at 08:02:50AM -0600, Ryan Harper wrote:
> > > * Markus Armbruster  [2010-11-08 06:04]:
> > > > "Michael S. Tsirkin"  writes:
> > > > 
> > > > > On Mon, Nov 08, 2010 at 11:32:01AM +0100, Markus Armbruster wrote:
> > 
> > Daniel, I'd like your input here: can you live with
> > device diappearing from info block and parsing
> > qdev tree info to figure out whether device is really gone?
> 
> AFAICT, libvirt doesn't look at or use info block at all.
> 
> I'd rather not have to add info block to libvirt; but currently I can't
> see how else we can determine if we should call drive_unplug if we do a
> device_del() and the guest removes it before we call drive_unplug().  
> 
> What happens is that the guest removes the device and when we call
> drive_unplug() it fails to find the target device (since it was deleted
> by the guest). Then we fail the PCiDelDisk and libvirt keeps the device
> config around even though the guest has finished removing it.

This needs drive_unplug to return an explicitly identifiable
'no such device' error code, which libvirt can catch and
ignore.  Making the call to drive_unplug conditional on a
check to query-block/query-qdev is really a bug, because it
has an designed in race condition which means you need to 
check for a 'no such device' error code regardless. So it
is better to just blindly call drive_unplug and handle the
non-fatal failure conditions every time - this ensures that
codepath gets exercised more frequently too :-)

Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|



[Qemu-devel] Re: phys_page_find bug?

2010-11-08 Thread Artyom Tarasenko
On Fri, May 7, 2010 at 6:26 PM, Artyom Tarasenko
 wrote:
> phys_page_find (exec.c) returns sometimes a page for addresses where
> nothing is connected.
>
> One example, done with qemu-system-sparc -M SS-20
>
> ok f130 2f spacec@ .
>
> // The address translates correctly, in cpu_physical_memory_rw
> // addr== 0xff130 (where nothing is connected)
> // but then phys_page_find returns a nonzero and produces
>
> Unassigned mem read access of 1 byte to 000ff150 from x
>
> (note the "5" in the line above where "3" is expected)
>
> I wonder if this is only true for non-wired addresses, or whether
> phys_page_find can also
> find wrong pages for the addresses where something is connected?
>
> Or is my assumption is wrong and phys_page_find can return a page for
> not-connected
> addresses and the bug is actually in cpu_physical_memory_rw ?
>
> Is the qemu algorithm of working with the physical address space
> described somewhere?

I tried to switch devices off and found that the bug is triggered by
registering escc.
It's harder to debug without escc, so I can't tell whether something
else is causing
the problem too.

Is escc addressing somehow special?

>Is the qemu algorithm of working with the physical address space described 
>somewhere?

I guess no one knows it anymore, since no-one cared to answer within a
half year :-/.

-- 
Regards,
Artyom Tarasenko

solaris/sparc under qemu blog: http://tyom.blogspot.com/



Re: [Qemu-devel] [PATCH 0/3] v4 Decouple block device removal from device removal

2010-11-08 Thread Ryan Harper
* Daniel P. Berrange  [2010-11-08 11:05]:
> On Mon, Nov 08, 2010 at 06:56:02PM +0200, Michael S. Tsirkin wrote:
> > On Mon, Nov 08, 2010 at 08:02:50AM -0600, Ryan Harper wrote:
> > > * Markus Armbruster  [2010-11-08 06:04]:
> > > > "Michael S. Tsirkin"  writes:
> > > > >> Here's how the various objects are connected to each other:
> > > > >> 
> > > > >>contains
> > > > >> drivelist---> DriveInfo
> > > > >> |
> > > > >> | .bdrv
> > > > >> | .id == .bdrv->device_name
> > > > >> |
> > > > >>contains V
> > > > >> bdrv_states  ---> BlockDriverState
> > > > >>  |   ^
> > > > >>.peer |   |
> > > > >>  |   |  host part
> > > > >> -|---|---
> > > > >>  |   | guest part
> > > > >>  |   | property "drive"
> > > > >>  v   |
> > > > >>   DeviceState
> > > > >> 
> > > > >> To disconnect host from guest part, you need to cut both pointers.  
> > > > >> To
> > > > >> delete the host part, you need to delete both objects, 
> > > > >> BlockDriverState
> > > > >> and DriveInfo.
> > > > >
> > > > >
> > > > > If we remove DriveInfo, how can management later detect that guest 
> > > > > part
> > > > > was deleted?
> > > > 
> > > > Directly: check whether the qdev is gone.
> > > > 
> > > > I don't know how to check that indirectly, via DriveInfo.
> > > > 
> > > > >  If you want symmetry with netdev, it's possible to keep a
> > > > > shell of BlockDriverState/DriveInfo around (solving dangling pointer
> > > > > problems).
> > > > 
> > > > netdev_del deletes the host network part:
> > > > 
> > > > (qemu) info network
> > > > Devices not on any VLAN:
> > > >   net.0: net=10.0.2.0, restricted=n peer=nic.0
> > > >   nic.0: model=virtio-net-pci,macaddr=52:54:00:12:34:56 peer=net.0
> > > > (qemu) netdev_del net.0
> > > > (qemu) info network
> > > > Devices not on any VLAN:
> > > >   nic.0: model=virtio-net-pci,macaddr=52:54:00:12:34:56 peer=net.0
> > > > 
> > > > It leaves around the VLAN object.  Since qdev property points to that,
> > > > it doesn't dangle.
> > > > 
> > > > In my opinion, drive_del should make the drive vanish from "info block",
> > > 
> > > Yeah; that's the right thing to do here.  Let me respin the patch with
> > > the name change and the additional work to fix up the pointers and
> > > ensure that we don't see the drive in info block.
> > 
> > Daniel, I'd like your input here: can you live with
> > device diappearing from info block and parsing
> > qdev tree info to figure out whether device is really gone?
> 
> We don't use info block for anything. Having to parse the full qdev tree
> to determine if a single device is gone seems rather tedious. It would
> be better if query-qdev took an optional argument, which is the name
> of the device to root the tree at. Then checking whether a device
> named 'foo' is gone just means running 'query-qdev foo' and seeing if
> that returns an error about the device not existing, then we know it
> has gone. No need to parse anything. Being able to query the qdev data
> for a single device, or sub-tree of devices seems useful in its own
> right.

Since I'm not looking forward to parsing info block (easy) nor parsing
all of qdev tree (much harder) I really like the query approach.  

That makes it easy to put a query in the netdev_del/drive_del commands
to skip invoking them if the guest has already responded.

> 
> Regards,
> Daniel
> -- 
> |: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
> |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
> |: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
> |: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
ry...@us.ibm.com



Re: [Qemu-devel] [PATCH 0/3] v4 Decouple block device removal from device removal

2010-11-08 Thread Ryan Harper
* Michael S. Tsirkin  [2010-11-08 10:57]:
> On Mon, Nov 08, 2010 at 08:02:50AM -0600, Ryan Harper wrote:
> > * Markus Armbruster  [2010-11-08 06:04]:
> > > "Michael S. Tsirkin"  writes:
> > > 
> > > > On Mon, Nov 08, 2010 at 11:32:01AM +0100, Markus Armbruster wrote:
> > > >> Ryan Harper  writes:
> > > >> 
> > > >> > * Markus Armbruster  [2010-11-06 04:19]:
> > > >> >> Ryan Harper  writes:
> > > >> >> 
> > > >> >> > * Markus Armbruster  [2010-11-05 11:11]:
> > > >> >> >> Ryan Harper  writes:
> > > >> >> >> 
> > > >> >> >> > * Markus Armbruster  [2010-11-05 08:28]:
> > > >> >> >> >> I'd be fine with any of these:
> > > >> >> >> >> 
> > > >> >> >> >> 1. A new command "device_disconnet ID" (or similar name) to 
> > > >> >> >> >> disconnect
> > > >> >> >> >>device ID from any host parts.  Nice touch: you don't have 
> > > >> >> >> >> to know
> > > >> >> >> >>about the device's host part(s) to disconnect it.  But it 
> > > >> >> >> >> might be
> > > >> >> >> >>more work than the other two.
> > > >> >> >> >
> > > >> >> >> > This is sort of what netdev_del() and drive_unplug() are 
> > > >> >> >> > today; we're
> > > >> >> >> > just saying sever the connection of this device id.   
> > > >> >> >> 
> > > >> >> >> No, I have netdev_del as (3).
> > > >> >> >> 
> > > >> >> >> All three options are "sort of" the same, just different 
> > > >> >> >> commands with
> > > >> >> >> a common purpose.
> > > >> >> >> 
> > > >> >> >> > I'd like to rename drive_unplug() to blockdev_del() and call 
> > > >> >> >> > it done.  I
> > > >> >> >> > was looking at libvirt and the right call to netdev_del is 
> > > >> >> >> > already
> > > >> >> >> > in-place; I'd just need to re-spin my block patch to call 
> > > >> >> >> > blockdev_del()
> > > >> >> >> > after invoking device_del() to match what is done for net.
> > > >> >> >> 
> > > >> >> >> Unless I'm missing something, you can't just rename: your unplug 
> > > >> >> >> does
> > > >> >> >> not delete the host part.
> > > >> >> >> 
> > > >> >> >> >> 2. New commands netdev_disconnect, drive_disconnect (or 
> > > >> >> >> >> similar names)
> > > >> >> >> >>to disconnect a host part from a guest device.  Like (1), 
> > > >> >> >> >> except you
> > > >> >> >> >>have to point to the other end of the connection to cut it.
> > > >> >> >> >
> > > >> >> >> > What's the advantage here? We need an additional piece of info 
> > > >> >> >> > (host
> > > >> >> >> > part) in addition to the device id?
> > > >> >> >> 
> > > >> >> >> That's a disadvantage.
> > > >> >> >> 
> > > >> >> >> Possible advantage: implementation could be slightly easier than 
> > > >> >> >> (1),
> > > >> >> >> because you don't have to find the host parts.
> > > >> >> >> 
> > > >> >> >> >> 3. A new command "drive_del ID" similar to existing 
> > > >> >> >> >> netdev_del.  This is
> > > >> >> >> >>(2) fused with delete.  Conceptual wart: you can't 
> > > >> >> >> >> disconnect and
> > > >> >> >> >>keep the host part around.  Moreover, delete is slightly 
> > > >> >> >> >> dangerous,
> > > >> >> >> >>because it renders any guest device still using the host 
> > > >> >> >> >> part
> > > >> >> >> >>useless.
> > > >> >> >> >
> > > >> >> >> > Hrm, I thought that's what (1) is.
> > > >> >> >> 
> > > >> >> >> No.
> > > >> >> >> 
> > > >> >> >> With (1), the argument is a *device* ID, and we disconnect *all* 
> > > >> >> >> host
> > > >> >> >> parts connected to this device (typically just one).
> > > >> >> >> 
> > > >> >> >> With (3), the argument is a netdev/drive ID, and disconnect 
> > > >> >> >> *this* host
> > > >> >> >> part from the peer device.
> > > >> >> >> 
> > > >> >> >> > Well, either (1) or (3); 
> > > >> >> >> > I'd like to
> > > >> >> >> > rename drive_unplug() to blockdev_del() since they're similar 
> > > >> >> >> > function
> > > >> >> >> > w.r.t removing access to the host resource.  And we can invoke 
> > > >> >> >> > them in
> > > >> >> >> > the same way from libvirt (after doing guest notification, 
> > > >> >> >> > remove
> > > >> >> >> > access).
> > > >> >> >> 
> > > >> >> >> I'd call it drive_del for now, to match drive_add.
> > > >> >> >
> > > >> >> > OK, drive_del() and as you mentioned, drive_unplug will take out 
> > > >> >> > the
> > > >> >> > block driver, but doesn't remove the dinfo object; that ends up 
> > > >> >> > dying
> > > >> >> > when we call the device destructor.  I think for symmetry we'll 
> > > >> >> > want
> > > >> >> > drive_del to remove the dinfo object as well.
> > > >> >> 
> > > >> >> Exactly.
> > > >> >> 
> > > >> >> a. bdrv_detach() to zap the pointer from bdrv to qdev
> > > >> >> b. zap the pointer from qdev to bdrv
> > > >> >> c. drive_uninit() to dispose of the host part
> > > >> >
> > > >> > a-c need to be done to match netdev_del symmetry?  How hard of a req 
> > > >> > is
> > > >> > this?
> > > >> 
> > > >> Without (c), it's not a delete.  And (c) without (b) leaves a dangling
> > > >> pointer.  (c) 

[Qemu-devel] Re: [PATCHv2 7/8] Change pci bus get_dev_path callback to print only slot and func

2010-11-08 Thread Gleb Natapov
On Mon, Nov 08, 2010 at 08:12:55PM +0200, Michael S. Tsirkin wrote:
> On Mon, Nov 08, 2010 at 07:29:50PM +0200, Gleb Natapov wrote:
> > On Mon, Nov 08, 2010 at 07:17:37PM +0200, Michael S. Tsirkin wrote:
> > > On Sun, Oct 31, 2010 at 01:40:08PM +0200, Gleb Natapov wrote:
> > > > Domain should be determined form parent bus and bus number is configured
> > > > by guest and should not be used in qemu internally.
> > > > 
> > > > Signed-off-by: Gleb Natapov 
> > > > ---
> > > >  hw/pci.c |   11 ++-
> > > >  1 files changed, 6 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/hw/pci.c b/hw/pci.c
> > > > index 92aaa85..1c5706f 100644
> > > > --- a/hw/pci.c
> > > > +++ b/hw/pci.c
> > > > @@ -2138,12 +2138,13 @@ static void pcibus_dev_print(Monitor *mon, 
> > > > DeviceState *dev, int indent)
> > > >  static char *pcibus_get_dev_path(DeviceState *dev)
> > > >  {
> > > >  PCIDevice *d = (PCIDevice *)dev;
> > > > -char path[16];
> > > > -
> > > > -snprintf(path, sizeof(path), "%04x:%02x:%02x.%x",
> > > > - pci_find_domain(d->bus), d->config[PCI_SECONDARY_BUS],
> > > > - PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
> > > > +char path[50];
> > > > +int off;
> > > >  
> > > > +off = snprintf(path, sizeof(path), "%...@%x", 
> > > > qdev_driver_name(dev),
> > > > + PCI_SLOT(d->devfn));
> > > > +if (PCI_FUNC(d->devfn))
> > > > +snprintf(path + off, sizeof(path) + off, ",%x", 
> > > > PCI_FUNC(d->devfn));
> > > >  return strdup(path);
> > > >  }
> > > 
> > > Can we change separators to be : and . instead of @ and , please?
> > No. I am implementing well defined Open Firmware device path. Lets stop
> > reinventing things wrongly please?
> > 
> > > This will make it compatible with existing code and generally
> > > what users expect.
> > > 
> > It will drop domain and bus number part anyway.
> 
> Yes but most users at the moment have domain and bus 0 only.
> So even simply adding :00 prefix will work.
> 
Are we trying to build device path or do bunch of hacks? We already have
hack now. Why change anything at all? PPC Macs  (G500) had 3 pci buses.

--
Gleb.



[Qemu-devel] [PATCH] linux-user: remove unnecessary local from __get_user(), __put_user()

2010-11-08 Thread Peter Maydell
Remove an unnecessary local variable from the __get_user() and
__put_user() macros. This avoids confusing compilation failures
if the name of the local variable ('size') happens to be the
same as the variable the macro user is trying to read/write.

Signed-off-by: Peter Maydell 
---
 linux-user/qemu.h |6 ++
 1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 708021e..d717392 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -266,8 +266,7 @@ static inline int access_ok(int type, abi_ulong addr, 
abi_ulong size)
  */
 #define __put_user(x, hptr)\
 ({\
-int size = sizeof(*hptr);\
-switch(size) {\
+switch(sizeof(*hptr)) {\
 case 1:\
 *(uint8_t *)(hptr) = (uint8_t)(typeof(*hptr))(x);\
 break;\
@@ -288,8 +287,7 @@ static inline int access_ok(int type, abi_ulong addr, 
abi_ulong size)
 
 #define __get_user(x, hptr) \
 ({\
-int size = sizeof(*hptr);\
-switch(size) {\
+switch(sizeof(*hptr)) {\
 case 1:\
 x = (typeof(*hptr))*(uint8_t *)(hptr);\
 break;\
-- 
1.7.1




[Qemu-devel] Re: [PATCHv2 7/8] Change pci bus get_dev_path callback to print only slot and func

2010-11-08 Thread Michael S. Tsirkin
On Mon, Nov 08, 2010 at 07:29:50PM +0200, Gleb Natapov wrote:
> On Mon, Nov 08, 2010 at 07:17:37PM +0200, Michael S. Tsirkin wrote:
> > On Sun, Oct 31, 2010 at 01:40:08PM +0200, Gleb Natapov wrote:
> > > Domain should be determined form parent bus and bus number is configured
> > > by guest and should not be used in qemu internally.
> > > 
> > > Signed-off-by: Gleb Natapov 
> > > ---
> > >  hw/pci.c |   11 ++-
> > >  1 files changed, 6 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/hw/pci.c b/hw/pci.c
> > > index 92aaa85..1c5706f 100644
> > > --- a/hw/pci.c
> > > +++ b/hw/pci.c
> > > @@ -2138,12 +2138,13 @@ static void pcibus_dev_print(Monitor *mon, 
> > > DeviceState *dev, int indent)
> > >  static char *pcibus_get_dev_path(DeviceState *dev)
> > >  {
> > >  PCIDevice *d = (PCIDevice *)dev;
> > > -char path[16];
> > > -
> > > -snprintf(path, sizeof(path), "%04x:%02x:%02x.%x",
> > > - pci_find_domain(d->bus), d->config[PCI_SECONDARY_BUS],
> > > - PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
> > > +char path[50];
> > > +int off;
> > >  
> > > +off = snprintf(path, sizeof(path), "%...@%x", qdev_driver_name(dev),
> > > + PCI_SLOT(d->devfn));
> > > +if (PCI_FUNC(d->devfn))
> > > +snprintf(path + off, sizeof(path) + off, ",%x", 
> > > PCI_FUNC(d->devfn));
> > >  return strdup(path);
> > >  }
> > 
> > Can we change separators to be : and . instead of @ and , please?
> No. I am implementing well defined Open Firmware device path. Lets stop
> reinventing things wrongly please?
> 
> > This will make it compatible with existing code and generally
> > what users expect.
> > 
> It will drop domain and bus number part anyway.

Yes but most users at the moment have domain and bus 0 only.
So even simply adding :00 prefix will work.

> --
>   Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Qemu-devel] Re: [PATCH 2/4] hda-audio: exit cleanup

2010-11-08 Thread malc
On Mon, 8 Nov 2010, Gerd Hoffmann wrote:

> Add exit callback to the driver.  Unregister the sound card properly
> on exit.
> 
> Signed-off-by: Gerd Hoffmann 
> ---
>  hw/hda-audio.c |   23 +++
>  1 files changed, 23 insertions(+), 0 deletions(-)
> 
> diff --git a/hw/hda-audio.c b/hw/hda-audio.c
> index 1035774..5593c84 100644
> --- a/hw/hda-audio.c
> +++ b/hw/hda-audio.c
> @@ -808,6 +808,27 @@ static int hda_audio_init(HDACodecDevice *hda, const 
> struct desc_codec *desc)
>  return 0;
>  }
>  
> +static int hda_audio_exit(HDACodecDevice *hda)
> +{
> +HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
> +HDAAudioStream *st;
> +int i;
> +
> +dprint(a, 1, "%s\n", __FUNCTION__);
> +for (i = 0; i < ARRAY_SIZE(a->st); i++) {
> +st = a->st + i;
> +if (st->node == NULL)
> +continue;

Braces

> +if (st->output) {
> +AUD_close_out(&a->card, st->voice.out);
> +} else {
> +AUD_close_in(&a->card, st->voice.in);
> +}
> +}
> +AUD_remove_card(&a->card);
> +return 0;
> +}
> +
>  static int hda_audio_post_load(void *opaque, int version)
>  {
>  HDAAudioState *a = opaque;
> @@ -879,6 +900,7 @@ static HDACodecDeviceInfo hda_audio_info_output = {
>  .qdev.vmsd= &vmstate_hda_audio,
>  .qdev.props   = hda_audio_properties,
>  .init = hda_audio_init_output,
> +.exit = hda_audio_exit,
>  .command  = hda_audio_command,
>  .stream   = hda_audio_stream,
>  };
> @@ -890,6 +912,7 @@ static HDACodecDeviceInfo hda_audio_info_duplex = {
>  .qdev.vmsd= &vmstate_hda_audio,
>  .qdev.props   = hda_audio_properties,
>  .init = hda_audio_init_duplex,
> +.exit = hda_audio_exit,
>  .command  = hda_audio_command,
>  .stream   = hda_audio_stream,
>  };
> 

-- 
mailto:av1...@comtv.ru



[Qemu-devel] Re: [PATCH] add VMSTATE_BOOL

2010-11-08 Thread Michael S. Tsirkin
On Mon, Nov 01, 2010 at 03:51:54PM +0100, Gerd Hoffmann wrote:
> 
> Signed-off-by: Gerd Hoffmann 
> ---
>  hw/hw.h  |   14 ++
>  savevm.c |   21 +
>  2 files changed, 35 insertions(+), 0 deletions(-)
> 
> diff --git a/hw/hw.h b/hw/hw.h
> index e935364..234c713 100644
> --- a/hw/hw.h
> +++ b/hw/hw.h
> @@ -333,6 +333,8 @@ struct VMStateDescription {
>  const VMStateSubsection *subsections;
>  };
>  
> +extern const VMStateInfo vmstate_info_bool;
> +
>  extern const VMStateInfo vmstate_info_int8;
>  extern const VMStateInfo vmstate_info_int16;
>  extern const VMStateInfo vmstate_info_int32;
> @@ -613,6 +615,9 @@ extern const VMStateDescription vmstate_i2c_slave;
>  #define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type)  \
>  VMSTATE_STRUCT_POINTER_TEST(_field, _state, NULL, _vmsd, _type)
>  
> +#define VMSTATE_BOOL_V(_f, _s, _v)\
> +VMSTATE_SINGLE(_f, _s, _v, vmstate_info_bool, bool)
> +
>  #define VMSTATE_INT8_V(_f, _s, _v)\
>  VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int8, int8_t)
>  #define VMSTATE_INT16_V(_f, _s, _v)   \
> @@ -631,6 +636,9 @@ extern const VMStateDescription vmstate_i2c_slave;
>  #define VMSTATE_UINT64_V(_f, _s, _v)  \
>  VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, uint64_t)
>  
> +#define VMSTATE_BOOL(_f, _s)  \
> +VMSTATE_BOOL_V(_f, _s, 0)
> +
>  #define VMSTATE_INT8(_f, _s)  \
>  VMSTATE_INT8_V(_f, _s, 0)
>  #define VMSTATE_INT16(_f, _s) \
> @@ -685,6 +693,12 @@ extern const VMStateDescription vmstate_i2c_slave;
>  #define VMSTATE_PTIMER(_f, _s)\
>  VMSTATE_PTIMER_V(_f, _s, 0)
>  
> +#define VMSTATE_BOOL_ARRAY_V(_f, _s, _n, _v) \
> +VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_bool, bool)
> +
> +#define VMSTATE_BOOL_ARRAY(_f, _s, _n)   \
> +VMSTATE_BOOL_ARRAY_V(_f, _s, _n, 0)
> +

Why don't we pack the bits?

>  #define VMSTATE_UINT16_ARRAY_V(_f, _s, _n, _v) \
>  VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint16, uint16_t)
>  
> diff --git a/savevm.c b/savevm.c
> index 10057f3..14268ea 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -675,6 +675,27 @@ uint64_t qemu_get_be64(QEMUFile *f)
>  return v;
>  }
>  
> +/* bool */
> +
> +static int get_bool(QEMUFile *f, void *pv, size_t size)
> +{
> +bool *v = pv;
> +*v = qemu_get_byte(f);
> +return 0;

We must really validate that the value is 0 or 1.
If it's not, we will get undefined behaviour.

> +}
> +
> +static void put_bool(QEMUFile *f, void *pv, size_t size)
> +{
> +bool *v = pv;
> +qemu_put_byte(f, *v);

Is there a guarantee that bool is a single byte, BTW?

> +}
> +
> +const VMStateInfo vmstate_info_bool = {
> +.name = "bool",
> +.get  = get_bool,
> +.put  = put_bool,
> +};
> +
>  /* 8 bit int */
>  
>  static int get_int8(QEMUFile *f, void *pv, size_t size)
> -- 
> 1.7.1
> 



[Qemu-devel] Re: [PATCHv2 7/8] Change pci bus get_dev_path callback to print only slot and func

2010-11-08 Thread Gleb Natapov
On Mon, Nov 08, 2010 at 07:17:37PM +0200, Michael S. Tsirkin wrote:
> On Sun, Oct 31, 2010 at 01:40:08PM +0200, Gleb Natapov wrote:
> > Domain should be determined form parent bus and bus number is configured
> > by guest and should not be used in qemu internally.
> > 
> > Signed-off-by: Gleb Natapov 
> > ---
> >  hw/pci.c |   11 ++-
> >  1 files changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index 92aaa85..1c5706f 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -2138,12 +2138,13 @@ static void pcibus_dev_print(Monitor *mon, 
> > DeviceState *dev, int indent)
> >  static char *pcibus_get_dev_path(DeviceState *dev)
> >  {
> >  PCIDevice *d = (PCIDevice *)dev;
> > -char path[16];
> > -
> > -snprintf(path, sizeof(path), "%04x:%02x:%02x.%x",
> > - pci_find_domain(d->bus), d->config[PCI_SECONDARY_BUS],
> > - PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
> > +char path[50];
> > +int off;
> >  
> > +off = snprintf(path, sizeof(path), "%...@%x", qdev_driver_name(dev),
> > + PCI_SLOT(d->devfn));
> > +if (PCI_FUNC(d->devfn))
> > +snprintf(path + off, sizeof(path) + off, ",%x", 
> > PCI_FUNC(d->devfn));
> >  return strdup(path);
> >  }
> 
> Can we change separators to be : and . instead of @ and , please?
No. I am implementing well defined Open Firmware device path. Lets stop
reinventing things wrongly please?

> This will make it compatible with existing code and generally
> what users expect.
> 
It will drop domain and bus number part anyway.

--
Gleb.



[Qemu-devel] Re: [PATCH] PCI: Bus number from the bridge, not the device

2010-11-08 Thread Gleb Natapov
On Mon, Nov 08, 2010 at 07:08:37PM +0200, Michael S. Tsirkin wrote:
> On Mon, Nov 08, 2010 at 07:00:15PM +0200, Gleb Natapov wrote:
> > On Mon, Nov 08, 2010 at 06:26:33PM +0200, Michael S. Tsirkin wrote:
> > > On Mon, Nov 08, 2010 at 07:52:12AM -0700, Alex Williamson wrote:
> > > > On Mon, 2010-11-08 at 13:22 +0200, Michael S. Tsirkin wrote:
> > > > > On Mon, Oct 04, 2010 at 03:53:11PM -0600, Alex Williamson wrote:
> > > > > > pcibus_dev_print() was erroneously retrieving the device bus
> > > > > > number from the secondary bus number offset of the device
> > > > > > instead of the bridge above the device.  This ends of landing
> > > > > > in the 2nd byte of the 3rd BAR for devices, which thankfully
> > > > > > is usually zero.  pcibus_get_dev_path() copied this code,
> > > > > > inheriting the same bug.  pcibus_get_dev_path() is used for
> > > > > > ramblock naming, so changing it can effect migration.  However,
> > > > > > I've only seen this byte be non-zero for an assigned device,
> > > > > > which can't migrate anyway, so hopefully we won't run into
> > > > > > any issues.
> > > > > > 
> > > > > > Signed-off-by: Alex Williamson 
> > > > > 
> > > > > Good catch. Applied.
> > > > > I don't really see why do we put the dev path
> > > > > in the bus object: why not let device supply its name?
> > > > 
> > > > Because the device name is not unique.  This came about from the
> > > > discussion about how to create a canonical device path that Gleb and
> > > > Markus are again trying to hash out.  If we go up to the bus and get the
> > > > bus address, we have a VM unique name.  Unfortunately, it's difficult to
> > > > define what the bus should print in all cases (ISA), but since they
> > > > don't do hotplug and typically don't allocate ramblocks, we can mostly
> > > > ignore it for this use case.
> > > > 
> > > > > And I think this will affect nested bridges. However they are 
> > > > > currently
> > > > > broken anyway: we really must convert to topological names as bus 
> > > > > number
> > > > > is guest-assigned - they don't have to be unique, even.
> > > > 
> > > > Yes, nested bridges are a problem.  How can the seg/bus/devfn not be
> > > > unique?
> > > 
> > > Bus numbers for nested bridges are guest assigned. We start with 0 after 
> > > reset.
> > > 
> > > > > What does fixing this involve? Just changing pcibus_get_dev_path?
> > > > 
> > > > How do you plan to fix it?  Don't forget that migration depends on these
> > > > names, so some kind of compatibility layer would be required.  Thanks,
> > > > 
> > > > Alex
> > > 
> > > Replace bus number with slot numbers of parent bridges up to the root.
> > > This works for root bridge in a compatible way because bus number there
> > > is hard-coded to 0.
> > > IMO nested bridges are broken anyway, no way to be compatible there.
> > > 
> > > 
> > > Gleb, Markus, I think the following should be sufficient for PCI.  What
> > > do you think?  Also - do we need to update QMP/monitor to teach them to
> > > work with these paths?
> > > 
> > 
> > I am no longer use bus's get_dev_path callback for my purpose (I added
> > another one get_fw_dev_path)
> 
> Why? Because get_dev_path is unstable? to avoid changing for migration?
Because to get functional get_dev_path() we need to implemented it
recursively (hint domain should not be part of PCI bus get_dev_path but
its parent bus instead). And since  get_dev_path() is used in migration
code you have all or nothing situation. Either you implement it for all
devices properly or for none.

> So with this patch, you get to use it again.
Unfortunately not.

> IMO two ways to address devices is a bad idea.
> 
> > since it is used for migration
> 
> what is used for migration?
> 
get_dev_path() callback is used to create unique instance id. Actually
it is the only use of it so simple grep will show you this :)

> > it should
> > be done for all buses to work properly. And by properly I mean produce
> > full path from system root to the device itself recursively.
> 
> This is what the code does. Recursion is not needed here though.
> 
Recursion is needed to create full device path. I am not interested in
only PCI here. In fact PCI one is the easiest.

> > But what I
> > learned is that by changing get_dev_path output you will break migration
> > from older guest to newer once (something we have to support).
> 
> Well I think migration for sytems with nested buses are broken anyway:
> it's just a new feature where we simply did not figure out the migration
> andgle before the last release.
> Without nesting my code returns exactly the existing output
> so no, it's not broken.
> 
It does not drops bus id part? If without bridges the output is exactly
same then migration is OK with the patch.


> > And of
> > course, as Alex said already, you need to traverse bridges recursively
> 
> Well this is an implementation detail.  loop is functionally equivalent,
> only cleaner and easier to understand.
> 
With that I can agree. I am ref

[Qemu-devel] Re: [PATCHv2 7/8] Change pci bus get_dev_path callback to print only slot and func

2010-11-08 Thread Michael S. Tsirkin
On Sun, Oct 31, 2010 at 01:40:08PM +0200, Gleb Natapov wrote:
> Domain should be determined form parent bus and bus number is configured
> by guest and should not be used in qemu internally.
> 
> Signed-off-by: Gleb Natapov 
> ---
>  hw/pci.c |   11 ++-
>  1 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 92aaa85..1c5706f 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -2138,12 +2138,13 @@ static void pcibus_dev_print(Monitor *mon, 
> DeviceState *dev, int indent)
>  static char *pcibus_get_dev_path(DeviceState *dev)
>  {
>  PCIDevice *d = (PCIDevice *)dev;
> -char path[16];
> -
> -snprintf(path, sizeof(path), "%04x:%02x:%02x.%x",
> - pci_find_domain(d->bus), d->config[PCI_SECONDARY_BUS],
> - PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
> +char path[50];
> +int off;
>  
> +off = snprintf(path, sizeof(path), "%...@%x", qdev_driver_name(dev),
> + PCI_SLOT(d->devfn));
> +if (PCI_FUNC(d->devfn))
> +snprintf(path + off, sizeof(path) + off, ",%x", PCI_FUNC(d->devfn));
>  return strdup(path);
>  }

If we want to be backwards compatible at least for a single domain,
we need to call PCI root in that case ":00". Can this work?
I think that keeping a single way to address devices is good.


-- 
MST



[Qemu-devel] Re: [PATCH 0/6] Save state error handling (kill off no_migrate)

2010-11-08 Thread Alex Williamson
On Mon, 2010-11-08 at 18:54 +0200, Michael S. Tsirkin wrote:
> On Mon, Nov 08, 2010 at 07:59:57AM -0700, Alex Williamson wrote:
> > On Mon, 2010-11-08 at 13:40 +0200, Michael S. Tsirkin wrote:
> > > On Wed, Oct 06, 2010 at 02:58:57PM -0600, Alex Williamson wrote:
> > > > Our code paths for saving or migrating a VM are full of functions that
> > > > return void, leaving no opportunity for a device to cancel a migration,
> > > > either from error or incompatibility.  The ivshmem driver attempted to
> > > > solve this with a no_migrate flag on the save state entry.  I think the
> > > > more generic and flexible way to solve this is to allow driver save
> > > > functions to fail.  This series implements that and converts ivshmem
> > > > to uses a set_params function to NAK migration much earlier in the
> > > > processes.  This touches a lot of files, but bulk of those changes are
> > > > simply s/void/int/ and tacking a "return 0" to the end of functions.
> > > > Thanks,
> > > > 
> > > > Alex
> > > 
> > > Well error handling is always tricky: it seems easier to
> > > require save handlers to never fail.
> > 
> > Sure it's easier, but does that make it robust?
> 
> More robust in the face of wwhat kind of failure?

I really don't understand why we're having a discussion about whether
providing a means to return an error is a good thing or not.  These
patches touch a lot of files, but the change is dead simple.

> > > So there's a bunch of code here but what exactly is the benefit?
> > > Since save handlers have no idea what does the remote do,
> > > what is the compatibility you mention?
> > 
> > There are two users I currently have in mind.  ivshmem currently makes
> > use of the register_device_unmigratable() because it makes use of host
> > specific resources and connections (aiui).  This sets the no_migrate
> > flag, which is not dynamic and a bit of a band-aide.
> >  The other is
> > device assignment, which needs a way to NAK a migration since physical
> > devices are never migratable.
> 
> Well since all these can't be migrated ever, a fixed property actually seems
> a good match.  Sure it's not dynamic but all the easier to debug.
> 
> >  I imagine we could at some point have
> > devices with state tied to other features that can't always be detached
> > from the host, this tries to provide the infrastructure for that to
> > happen.
> > 
> > Alex
> 
> Let guest control whether you can migrate?
> Sounds like something that is more likely to be abused
> than used constructively. 

s/guest/device/  So you would rather the migration failed on the
incoming side where it may not be detected or it may be detected too
late to stop the migration?

Alex





[Qemu-devel] Re: [PATCHv2 7/8] Change pci bus get_dev_path callback to print only slot and func

2010-11-08 Thread Michael S. Tsirkin
On Sun, Oct 31, 2010 at 01:40:08PM +0200, Gleb Natapov wrote:
> Domain should be determined form parent bus and bus number is configured
> by guest and should not be used in qemu internally.
> 
> Signed-off-by: Gleb Natapov 
> ---
>  hw/pci.c |   11 ++-
>  1 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 92aaa85..1c5706f 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -2138,12 +2138,13 @@ static void pcibus_dev_print(Monitor *mon, 
> DeviceState *dev, int indent)
>  static char *pcibus_get_dev_path(DeviceState *dev)
>  {
>  PCIDevice *d = (PCIDevice *)dev;
> -char path[16];
> -
> -snprintf(path, sizeof(path), "%04x:%02x:%02x.%x",
> - pci_find_domain(d->bus), d->config[PCI_SECONDARY_BUS],
> - PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
> +char path[50];
> +int off;
>  
> +off = snprintf(path, sizeof(path), "%...@%x", qdev_driver_name(dev),
> + PCI_SLOT(d->devfn));
> +if (PCI_FUNC(d->devfn))
> +snprintf(path + off, sizeof(path) + off, ",%x", PCI_FUNC(d->devfn));
>  return strdup(path);
>  }

Can we change separators to be : and . instead of @ and , please?
This will make it compatible with existing code and generally
what users expect.

-- 
MST



[Qemu-devel] Re: [PATCH] PCI: Bus number from the bridge, not the device

2010-11-08 Thread Michael S. Tsirkin
On Mon, Nov 08, 2010 at 07:00:15PM +0200, Gleb Natapov wrote:
> On Mon, Nov 08, 2010 at 06:26:33PM +0200, Michael S. Tsirkin wrote:
> > On Mon, Nov 08, 2010 at 07:52:12AM -0700, Alex Williamson wrote:
> > > On Mon, 2010-11-08 at 13:22 +0200, Michael S. Tsirkin wrote:
> > > > On Mon, Oct 04, 2010 at 03:53:11PM -0600, Alex Williamson wrote:
> > > > > pcibus_dev_print() was erroneously retrieving the device bus
> > > > > number from the secondary bus number offset of the device
> > > > > instead of the bridge above the device.  This ends of landing
> > > > > in the 2nd byte of the 3rd BAR for devices, which thankfully
> > > > > is usually zero.  pcibus_get_dev_path() copied this code,
> > > > > inheriting the same bug.  pcibus_get_dev_path() is used for
> > > > > ramblock naming, so changing it can effect migration.  However,
> > > > > I've only seen this byte be non-zero for an assigned device,
> > > > > which can't migrate anyway, so hopefully we won't run into
> > > > > any issues.
> > > > > 
> > > > > Signed-off-by: Alex Williamson 
> > > > 
> > > > Good catch. Applied.
> > > > I don't really see why do we put the dev path
> > > > in the bus object: why not let device supply its name?
> > > 
> > > Because the device name is not unique.  This came about from the
> > > discussion about how to create a canonical device path that Gleb and
> > > Markus are again trying to hash out.  If we go up to the bus and get the
> > > bus address, we have a VM unique name.  Unfortunately, it's difficult to
> > > define what the bus should print in all cases (ISA), but since they
> > > don't do hotplug and typically don't allocate ramblocks, we can mostly
> > > ignore it for this use case.
> > > 
> > > > And I think this will affect nested bridges. However they are currently
> > > > broken anyway: we really must convert to topological names as bus number
> > > > is guest-assigned - they don't have to be unique, even.
> > > 
> > > Yes, nested bridges are a problem.  How can the seg/bus/devfn not be
> > > unique?
> > 
> > Bus numbers for nested bridges are guest assigned. We start with 0 after 
> > reset.
> > 
> > > > What does fixing this involve? Just changing pcibus_get_dev_path?
> > > 
> > > How do you plan to fix it?  Don't forget that migration depends on these
> > > names, so some kind of compatibility layer would be required.  Thanks,
> > > 
> > > Alex
> > 
> > Replace bus number with slot numbers of parent bridges up to the root.
> > This works for root bridge in a compatible way because bus number there
> > is hard-coded to 0.
> > IMO nested bridges are broken anyway, no way to be compatible there.
> > 
> > 
> > Gleb, Markus, I think the following should be sufficient for PCI.  What
> > do you think?  Also - do we need to update QMP/monitor to teach them to
> > work with these paths?
> > 
> 
> I am no longer use bus's get_dev_path callback for my purpose (I added
> another one get_fw_dev_path)

Why? Because get_dev_path is unstable? to avoid changing for migration?
So with this patch, you get to use it again.
IMO two ways to address devices is a bad idea.

> since it is used for migration

what is used for migration?

> it should
> be done for all buses to work properly. And by properly I mean produce
> full path from system root to the device itself recursively.

This is what the code does. Recursion is not needed here though.

> But what I
> learned is that by changing get_dev_path output you will break migration
> from older guest to newer once (something we have to support).

Well I think migration for sytems with nested buses are broken anyway:
it's just a new feature where we simply did not figure out the migration
andgle before the last release.
Without nesting my code returns exactly the existing output
so no, it's not broken.

> And of
> course, as Alex said already, you need to traverse bridges recursively

Well this is an implementation detail.  loop is functionally equivalent,
only cleaner and easier to understand.

> and
> domain does not provide any meaningful information.

I dont understand yet, sorry. Code I posted returns exactly
what's there today. If domain does not provide any meaningful
information, How do things work then? And how do you address
roots on a system with multiple roots?

> > This is on top of Alex's patch, completely untested.
> > 
> > 
> > pci: fix device path for devices behind nested bridges
> > 
> > We were using bus number in the device path, which is clearly
> > broken as this number is guest-assigned for all devices
> > except the root.
> > 
> > Fix by using hierarchical list of slots, walking the path
> > from root down to device, instead. Add :00 as bus number
> > so that if there are no nested bridges, this is compatible
> > with what we have now.
> > 
> > Signed-off-by: Michael S. Tsirkin 
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index 7d12473..fa98d94 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -1826,13 +1826,45 @@ static void pcibus_dev_print(Monitor *mo

Re: [Qemu-devel] [PATCH 0/3] v4 Decouple block device removal from device removal

2010-11-08 Thread Daniel P. Berrange
On Mon, Nov 08, 2010 at 06:56:02PM +0200, Michael S. Tsirkin wrote:
> On Mon, Nov 08, 2010 at 08:02:50AM -0600, Ryan Harper wrote:
> > * Markus Armbruster  [2010-11-08 06:04]:
> > > "Michael S. Tsirkin"  writes:
> > > >> Here's how the various objects are connected to each other:
> > > >> 
> > > >>contains
> > > >> drivelist---> DriveInfo
> > > >> |
> > > >> | .bdrv
> > > >> | .id == .bdrv->device_name
> > > >> |
> > > >>contains V
> > > >> bdrv_states  ---> BlockDriverState
> > > >>  |   ^
> > > >>.peer |   |
> > > >>  |   |  host part
> > > >> -|---|---
> > > >>  |   | guest part
> > > >>  |   | property "drive"
> > > >>  v   |
> > > >>   DeviceState
> > > >> 
> > > >> To disconnect host from guest part, you need to cut both pointers.  To
> > > >> delete the host part, you need to delete both objects, BlockDriverState
> > > >> and DriveInfo.
> > > >
> > > >
> > > > If we remove DriveInfo, how can management later detect that guest part
> > > > was deleted?
> > > 
> > > Directly: check whether the qdev is gone.
> > > 
> > > I don't know how to check that indirectly, via DriveInfo.
> > > 
> > > >  If you want symmetry with netdev, it's possible to keep a
> > > > shell of BlockDriverState/DriveInfo around (solving dangling pointer
> > > > problems).
> > > 
> > > netdev_del deletes the host network part:
> > > 
> > > (qemu) info network
> > > Devices not on any VLAN:
> > >   net.0: net=10.0.2.0, restricted=n peer=nic.0
> > >   nic.0: model=virtio-net-pci,macaddr=52:54:00:12:34:56 peer=net.0
> > > (qemu) netdev_del net.0
> > > (qemu) info network
> > > Devices not on any VLAN:
> > >   nic.0: model=virtio-net-pci,macaddr=52:54:00:12:34:56 peer=net.0
> > > 
> > > It leaves around the VLAN object.  Since qdev property points to that,
> > > it doesn't dangle.
> > > 
> > > In my opinion, drive_del should make the drive vanish from "info block",
> > 
> > Yeah; that's the right thing to do here.  Let me respin the patch with
> > the name change and the additional work to fix up the pointers and
> > ensure that we don't see the drive in info block.
> 
> Daniel, I'd like your input here: can you live with
> device diappearing from info block and parsing
> qdev tree info to figure out whether device is really gone?

We don't use info block for anything. Having to parse the full qdev tree
to determine if a single device is gone seems rather tedious. It would
be better if query-qdev took an optional argument, which is the name
of the device to root the tree at. Then checking whether a device
named 'foo' is gone just means running 'query-qdev foo' and seeing if
that returns an error about the device not existing, then we know it
has gone. No need to parse anything. Being able to query the qdev data
for a single device, or sub-tree of devices seems useful in its own
right.

Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|



[Qemu-devel] Re: KVM call agenda for Nov 9

2010-11-08 Thread Anthony Liguori

On 11/08/2010 08:23 AM, Juan Quintela wrote:

Please send in any agenda items you are interested in covering.

For once I start:
- report from linux plumbers virt track (jes?, anthony?)
- report from linux kernel summit (avi?)
   


Good thinking.

Regards,

Anthony Liguori


Cheers, Juan.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
   





[Qemu-devel] Re: [PATCH] PCI: Bus number from the bridge, not the device

2010-11-08 Thread Gleb Natapov
On Mon, Nov 08, 2010 at 06:26:33PM +0200, Michael S. Tsirkin wrote:
> On Mon, Nov 08, 2010 at 07:52:12AM -0700, Alex Williamson wrote:
> > On Mon, 2010-11-08 at 13:22 +0200, Michael S. Tsirkin wrote:
> > > On Mon, Oct 04, 2010 at 03:53:11PM -0600, Alex Williamson wrote:
> > > > pcibus_dev_print() was erroneously retrieving the device bus
> > > > number from the secondary bus number offset of the device
> > > > instead of the bridge above the device.  This ends of landing
> > > > in the 2nd byte of the 3rd BAR for devices, which thankfully
> > > > is usually zero.  pcibus_get_dev_path() copied this code,
> > > > inheriting the same bug.  pcibus_get_dev_path() is used for
> > > > ramblock naming, so changing it can effect migration.  However,
> > > > I've only seen this byte be non-zero for an assigned device,
> > > > which can't migrate anyway, so hopefully we won't run into
> > > > any issues.
> > > > 
> > > > Signed-off-by: Alex Williamson 
> > > 
> > > Good catch. Applied.
> > > I don't really see why do we put the dev path
> > > in the bus object: why not let device supply its name?
> > 
> > Because the device name is not unique.  This came about from the
> > discussion about how to create a canonical device path that Gleb and
> > Markus are again trying to hash out.  If we go up to the bus and get the
> > bus address, we have a VM unique name.  Unfortunately, it's difficult to
> > define what the bus should print in all cases (ISA), but since they
> > don't do hotplug and typically don't allocate ramblocks, we can mostly
> > ignore it for this use case.
> > 
> > > And I think this will affect nested bridges. However they are currently
> > > broken anyway: we really must convert to topological names as bus number
> > > is guest-assigned - they don't have to be unique, even.
> > 
> > Yes, nested bridges are a problem.  How can the seg/bus/devfn not be
> > unique?
> 
> Bus numbers for nested bridges are guest assigned. We start with 0 after 
> reset.
> 
> > > What does fixing this involve? Just changing pcibus_get_dev_path?
> > 
> > How do you plan to fix it?  Don't forget that migration depends on these
> > names, so some kind of compatibility layer would be required.  Thanks,
> > 
> > Alex
> 
> Replace bus number with slot numbers of parent bridges up to the root.
> This works for root bridge in a compatible way because bus number there
> is hard-coded to 0.
> IMO nested bridges are broken anyway, no way to be compatible there.
> 
> 
> Gleb, Markus, I think the following should be sufficient for PCI.  What
> do you think?  Also - do we need to update QMP/monitor to teach them to
> work with these paths?
> 

I am no longer use bus's get_dev_path callback for my purpose (I added
another one get_fw_dev_path) since it is used for migration it should
be done for all buses to work properly. And by properly I mean produce
full path from system root to the device itself recursively. But what I
learned is that by changing get_dev_path output you will break migration
from older guest to newer once (something we have to support). And of
course, as Alex said already, you need to traverse bridges recursively and
domain does not provide any meaningful information.

> This is on top of Alex's patch, completely untested.
> 
> 
> pci: fix device path for devices behind nested bridges
> 
> We were using bus number in the device path, which is clearly
> broken as this number is guest-assigned for all devices
> except the root.
> 
> Fix by using hierarchical list of slots, walking the path
> from root down to device, instead. Add :00 as bus number
> so that if there are no nested bridges, this is compatible
> with what we have now.
> 
> Signed-off-by: Michael S. Tsirkin 
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 7d12473..fa98d94 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -1826,13 +1826,45 @@ static void pcibus_dev_print(Monitor *mon, 
> DeviceState *dev, int indent)
>  
>  static char *pcibus_get_dev_path(DeviceState *dev)
>  {
> -PCIDevice *d = (PCIDevice *)dev;
> -char path[16];
> -
> -snprintf(path, sizeof(path), "%04x:%02x:%02x.%x",
> - pci_find_domain(d->bus), pci_bus_num(d->bus),
> - PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
> -
> -return strdup(path);
> +PCIDevice *d = container_of(dev, PCIDevice, qdev);
> +PCIDevice *t;
> +int slot_depth;
> +/* Path format: Domain:00:Slot:Slot:Slot.Function.
> + * 00 is added here to make this format compatible with
> + * domain:Bus:Slot.Func for systems without nested PCI bridges.
> + * Slot list specifies the slot numbers for all devices on the
> + * path from root to the specific device. */
> +int domain_len = strlen(":00");
> +int func_len = strlen(".F");
> +int slot_len = strlen(":SS");
> +int path_len;
> +char *path, *p;
> +
> +/* Calculate # of slots on path between device and root. */;
> +slot_depth = 0;
> +for (t = d; t; t = t->bus->parent_dev)
> ++

Re: [Qemu-devel] [PATCH 1/2] Add a DTrace tracing backend targetted for SystemTAP compatability

2010-11-08 Thread Daniel P. Berrange
On Mon, Nov 08, 2010 at 04:52:01PM +, Stefan Hajnoczi wrote:
> On Mon, Nov 8, 2010 at 11:33 AM, Daniel P. Berrange  wrote:
> > diff --git a/Makefile b/Makefile
> > index 02698e9..384bf72 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -1,6 +1,10 @@
> >  # Makefile for QEMU.
> >
> >  GENERATED_HEADERS = config-host.h trace.h qemu-options.def
> > +GENERATED_HEADERS = config-host.h trace.h
> > +ifeq ($(TRACE_BACKEND),dtrace)
> > +GENERATED_HEADERS += trace-dtrace.h
> > +endif
> 
> What happened to qemu-options.def?  GENERATED_HEADERS is defined
> twice, the first line should probably be deleted.

Oh, the loss of qemu-options.def is a rebase/merge error. GENERATED_HEADERS
isn't defined twice, because the second line is using += to append to the
first definition.

Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|



Re: [Qemu-devel] [PATCH 0/3] v4 Decouple block device removal from device removal

2010-11-08 Thread Michael S. Tsirkin
On Mon, Nov 08, 2010 at 08:02:50AM -0600, Ryan Harper wrote:
> * Markus Armbruster  [2010-11-08 06:04]:
> > "Michael S. Tsirkin"  writes:
> > 
> > > On Mon, Nov 08, 2010 at 11:32:01AM +0100, Markus Armbruster wrote:
> > >> Ryan Harper  writes:
> > >> 
> > >> > * Markus Armbruster  [2010-11-06 04:19]:
> > >> >> Ryan Harper  writes:
> > >> >> 
> > >> >> > * Markus Armbruster  [2010-11-05 11:11]:
> > >> >> >> Ryan Harper  writes:
> > >> >> >> 
> > >> >> >> > * Markus Armbruster  [2010-11-05 08:28]:
> > >> >> >> >> I'd be fine with any of these:
> > >> >> >> >> 
> > >> >> >> >> 1. A new command "device_disconnet ID" (or similar name) to 
> > >> >> >> >> disconnect
> > >> >> >> >>device ID from any host parts.  Nice touch: you don't have 
> > >> >> >> >> to know
> > >> >> >> >>about the device's host part(s) to disconnect it.  But it 
> > >> >> >> >> might be
> > >> >> >> >>more work than the other two.
> > >> >> >> >
> > >> >> >> > This is sort of what netdev_del() and drive_unplug() are today; 
> > >> >> >> > we're
> > >> >> >> > just saying sever the connection of this device id.   
> > >> >> >> 
> > >> >> >> No, I have netdev_del as (3).
> > >> >> >> 
> > >> >> >> All three options are "sort of" the same, just different commands 
> > >> >> >> with
> > >> >> >> a common purpose.
> > >> >> >> 
> > >> >> >> > I'd like to rename drive_unplug() to blockdev_del() and call it 
> > >> >> >> > done.  I
> > >> >> >> > was looking at libvirt and the right call to netdev_del is 
> > >> >> >> > already
> > >> >> >> > in-place; I'd just need to re-spin my block patch to call 
> > >> >> >> > blockdev_del()
> > >> >> >> > after invoking device_del() to match what is done for net.
> > >> >> >> 
> > >> >> >> Unless I'm missing something, you can't just rename: your unplug 
> > >> >> >> does
> > >> >> >> not delete the host part.
> > >> >> >> 
> > >> >> >> >> 2. New commands netdev_disconnect, drive_disconnect (or similar 
> > >> >> >> >> names)
> > >> >> >> >>to disconnect a host part from a guest device.  Like (1), 
> > >> >> >> >> except you
> > >> >> >> >>have to point to the other end of the connection to cut it.
> > >> >> >> >
> > >> >> >> > What's the advantage here? We need an additional piece of info 
> > >> >> >> > (host
> > >> >> >> > part) in addition to the device id?
> > >> >> >> 
> > >> >> >> That's a disadvantage.
> > >> >> >> 
> > >> >> >> Possible advantage: implementation could be slightly easier than 
> > >> >> >> (1),
> > >> >> >> because you don't have to find the host parts.
> > >> >> >> 
> > >> >> >> >> 3. A new command "drive_del ID" similar to existing netdev_del. 
> > >> >> >> >>  This is
> > >> >> >> >>(2) fused with delete.  Conceptual wart: you can't 
> > >> >> >> >> disconnect and
> > >> >> >> >>keep the host part around.  Moreover, delete is slightly 
> > >> >> >> >> dangerous,
> > >> >> >> >>because it renders any guest device still using the host part
> > >> >> >> >>useless.
> > >> >> >> >
> > >> >> >> > Hrm, I thought that's what (1) is.
> > >> >> >> 
> > >> >> >> No.
> > >> >> >> 
> > >> >> >> With (1), the argument is a *device* ID, and we disconnect *all* 
> > >> >> >> host
> > >> >> >> parts connected to this device (typically just one).
> > >> >> >> 
> > >> >> >> With (3), the argument is a netdev/drive ID, and disconnect *this* 
> > >> >> >> host
> > >> >> >> part from the peer device.
> > >> >> >> 
> > >> >> >> > Well, either (1) or (3); I'd 
> > >> >> >> > like to
> > >> >> >> > rename drive_unplug() to blockdev_del() since they're similar 
> > >> >> >> > function
> > >> >> >> > w.r.t removing access to the host resource.  And we can invoke 
> > >> >> >> > them in
> > >> >> >> > the same way from libvirt (after doing guest notification, remove
> > >> >> >> > access).
> > >> >> >> 
> > >> >> >> I'd call it drive_del for now, to match drive_add.
> > >> >> >
> > >> >> > OK, drive_del() and as you mentioned, drive_unplug will take out the
> > >> >> > block driver, but doesn't remove the dinfo object; that ends up 
> > >> >> > dying
> > >> >> > when we call the device destructor.  I think for symmetry we'll want
> > >> >> > drive_del to remove the dinfo object as well.
> > >> >> 
> > >> >> Exactly.
> > >> >> 
> > >> >> a. bdrv_detach() to zap the pointer from bdrv to qdev
> > >> >> b. zap the pointer from qdev to bdrv
> > >> >> c. drive_uninit() to dispose of the host part
> > >> >
> > >> > a-c need to be done to match netdev_del symmetry?  How hard of a req is
> > >> > this?
> > >> 
> > >> Without (c), it's not a delete.  And (c) without (b) leaves a dangling
> > >> pointer.  (c) without (a) fails an assertion in bdrv_delete().
> > >> 
> > >> Aside: (b) should probably be folded into bdrv_detach().
> > >> 
> > >> >> Step b could be awkward with (3), because you don't know device 
> > >> >> details.
> > >> >> I guess you have to search device properties for a drive property
> > >> >> pointing to bdrv.  I like (1) beca

[Qemu-devel] Re: [PATCH 0/6] Save state error handling (kill off no_migrate)

2010-11-08 Thread Michael S. Tsirkin
On Mon, Nov 08, 2010 at 07:59:57AM -0700, Alex Williamson wrote:
> On Mon, 2010-11-08 at 13:40 +0200, Michael S. Tsirkin wrote:
> > On Wed, Oct 06, 2010 at 02:58:57PM -0600, Alex Williamson wrote:
> > > Our code paths for saving or migrating a VM are full of functions that
> > > return void, leaving no opportunity for a device to cancel a migration,
> > > either from error or incompatibility.  The ivshmem driver attempted to
> > > solve this with a no_migrate flag on the save state entry.  I think the
> > > more generic and flexible way to solve this is to allow driver save
> > > functions to fail.  This series implements that and converts ivshmem
> > > to uses a set_params function to NAK migration much earlier in the
> > > processes.  This touches a lot of files, but bulk of those changes are
> > > simply s/void/int/ and tacking a "return 0" to the end of functions.
> > > Thanks,
> > > 
> > > Alex
> > 
> > Well error handling is always tricky: it seems easier to
> > require save handlers to never fail.
> 
> Sure it's easier, but does that make it robust?

More robust in the face of wwhat kind of failure?

> > So there's a bunch of code here but what exactly is the benefit?
> > Since save handlers have no idea what does the remote do,
> > what is the compatibility you mention?
> 
> There are two users I currently have in mind.  ivshmem currently makes
> use of the register_device_unmigratable() because it makes use of host
> specific resources and connections (aiui).  This sets the no_migrate
> flag, which is not dynamic and a bit of a band-aide.
>  The other is
> device assignment, which needs a way to NAK a migration since physical
> devices are never migratable.

Well since all these can't be migrated ever, a fixed property actually seems
a good match.  Sure it's not dynamic but all the easier to debug.

>  I imagine we could at some point have
> devices with state tied to other features that can't always be detached
> from the host, this tries to provide the infrastructure for that to
> happen.
> 
> Alex

Let guest control whether you can migrate?
Sounds like something that is more likely to be abused
than used constructively. 

-- 
MST



Re: [Qemu-devel] [PATCH 1/2] Add a DTrace tracing backend targetted for SystemTAP compatability

2010-11-08 Thread Stefan Hajnoczi
On Mon, Nov 8, 2010 at 11:33 AM, Daniel P. Berrange  wrote:
> diff --git a/Makefile b/Makefile
> index 02698e9..384bf72 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1,6 +1,10 @@
>  # Makefile for QEMU.
>
>  GENERATED_HEADERS = config-host.h trace.h qemu-options.def
> +GENERATED_HEADERS = config-host.h trace.h
> +ifeq ($(TRACE_BACKEND),dtrace)
> +GENERATED_HEADERS += trace-dtrace.h
> +endif

What happened to qemu-options.def?  GENERATED_HEADERS is defined
twice, the first line should probably be deleted.

Looks good otherwise.

Stefan



[Qemu-devel] Re: [PATCH] PCI: Bus number from the bridge, not the device

2010-11-08 Thread Michael S. Tsirkin
On Mon, Nov 08, 2010 at 09:36:59AM -0700, Alex Williamson wrote:
> On Mon, 2010-11-08 at 18:26 +0200, Michael S. Tsirkin wrote:
> > On Mon, Nov 08, 2010 at 07:52:12AM -0700, Alex Williamson wrote:
> > > On Mon, 2010-11-08 at 13:22 +0200, Michael S. Tsirkin wrote:
> > > > On Mon, Oct 04, 2010 at 03:53:11PM -0600, Alex Williamson wrote:
> > > > > pcibus_dev_print() was erroneously retrieving the device bus
> > > > > number from the secondary bus number offset of the device
> > > > > instead of the bridge above the device.  This ends of landing
> > > > > in the 2nd byte of the 3rd BAR for devices, which thankfully
> > > > > is usually zero.  pcibus_get_dev_path() copied this code,
> > > > > inheriting the same bug.  pcibus_get_dev_path() is used for
> > > > > ramblock naming, so changing it can effect migration.  However,
> > > > > I've only seen this byte be non-zero for an assigned device,
> > > > > which can't migrate anyway, so hopefully we won't run into
> > > > > any issues.
> > > > > 
> > > > > Signed-off-by: Alex Williamson 
> > > > 
> > > > Good catch. Applied.
> > > > I don't really see why do we put the dev path
> > > > in the bus object: why not let device supply its name?
> > > 
> > > Because the device name is not unique.  This came about from the
> > > discussion about how to create a canonical device path that Gleb and
> > > Markus are again trying to hash out.  If we go up to the bus and get the
> > > bus address, we have a VM unique name.  Unfortunately, it's difficult to
> > > define what the bus should print in all cases (ISA), but since they
> > > don't do hotplug and typically don't allocate ramblocks, we can mostly
> > > ignore it for this use case.
> > > 
> > > > And I think this will affect nested bridges. However they are currently
> > > > broken anyway: we really must convert to topological names as bus number
> > > > is guest-assigned - they don't have to be unique, even.
> > > 
> > > Yes, nested bridges are a problem.  How can the seg/bus/devfn not be
> > > unique?
> > 
> > Bus numbers for nested bridges are guest assigned. We start with 0 after 
> > reset.
> 
> Right, invalid bus numbers are not unique.

Well, you can call them invalid, but this is the only
number there is until guest assigns something :)

> > > > What does fixing this involve? Just changing pcibus_get_dev_path?
> > > 
> > > How do you plan to fix it?  Don't forget that migration depends on these
> > > names, so some kind of compatibility layer would be required.  Thanks,
> > > 
> > > Alex
> > 
> > Replace bus number with slot numbers of parent bridges up to the root.
> > This works for root bridge in a compatible way because bus number there
> > is hard-coded to 0.
> > IMO nested bridges are broken anyway, no way to be compatible there.
> > 
> > 
> > Gleb, Markus, I think the following should be sufficient for PCI.  What
> > do you think?  Also - do we need to update QMP/monitor to teach them to
> > work with these paths?
> > 
> > This is on top of Alex's patch, completely untested.
> 
> This function was originally intended to be recursive, so that each bus
> could call the parent until we hit a canonical name.  It just so happens
> that the root bus provides that for non-nested devices.  So I think
> instead we should have a different function for bridges that appends to
> what the root get_dev_path returns.
> 
> Alex

This won't be compatible: the format that we have is
:BB:SS.F
which means that string concatenation can not get you from bus path
to device path (remember that bridge is a PCI device too).

And frankly if we can make it non-recursive I'd rather have it
that way. We are not solving the tower of hanoi here.

-- 
MST



Re: [Qemu-devel] [RFC] tracing: consistent usage of "disable" in "trace-events"

2010-11-08 Thread Lluís
Daniel P Berrange writes:

> On Mon, Nov 08, 2010 at 04:55:10PM +0100, Lluís wrote:
>> In any case, there might appear other events that could have performance
>> implications, although I understand the ease of usage of having all
>> trace events available by default.

> Ok, I agree that if we have tracepoints in the kind of places in TCG
> you describe, then this could have measurable performance impact.

>> That's why I would rather declare all trace-events without the "disable"
>> keyword, and leave it only on those events that are known to have a high
>> frequency, as no backend should have so poor performance as to force
>> events to "disappear".

> This sounds like a reasonable plan to me. 


Then I'll cook a simple patch for that later.

Lluis

-- 
 "And it's much the same thing with knowledge, for whenever you learn
 something new, the whole world becomes that much richer."
 -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
 Tollbooth



[Qemu-devel] Re: [PATCH] PCI: Bus number from the bridge, not the device

2010-11-08 Thread Alex Williamson
On Mon, 2010-11-08 at 18:26 +0200, Michael S. Tsirkin wrote:
> On Mon, Nov 08, 2010 at 07:52:12AM -0700, Alex Williamson wrote:
> > On Mon, 2010-11-08 at 13:22 +0200, Michael S. Tsirkin wrote:
> > > On Mon, Oct 04, 2010 at 03:53:11PM -0600, Alex Williamson wrote:
> > > > pcibus_dev_print() was erroneously retrieving the device bus
> > > > number from the secondary bus number offset of the device
> > > > instead of the bridge above the device.  This ends of landing
> > > > in the 2nd byte of the 3rd BAR for devices, which thankfully
> > > > is usually zero.  pcibus_get_dev_path() copied this code,
> > > > inheriting the same bug.  pcibus_get_dev_path() is used for
> > > > ramblock naming, so changing it can effect migration.  However,
> > > > I've only seen this byte be non-zero for an assigned device,
> > > > which can't migrate anyway, so hopefully we won't run into
> > > > any issues.
> > > > 
> > > > Signed-off-by: Alex Williamson 
> > > 
> > > Good catch. Applied.
> > > I don't really see why do we put the dev path
> > > in the bus object: why not let device supply its name?
> > 
> > Because the device name is not unique.  This came about from the
> > discussion about how to create a canonical device path that Gleb and
> > Markus are again trying to hash out.  If we go up to the bus and get the
> > bus address, we have a VM unique name.  Unfortunately, it's difficult to
> > define what the bus should print in all cases (ISA), but since they
> > don't do hotplug and typically don't allocate ramblocks, we can mostly
> > ignore it for this use case.
> > 
> > > And I think this will affect nested bridges. However they are currently
> > > broken anyway: we really must convert to topological names as bus number
> > > is guest-assigned - they don't have to be unique, even.
> > 
> > Yes, nested bridges are a problem.  How can the seg/bus/devfn not be
> > unique?
> 
> Bus numbers for nested bridges are guest assigned. We start with 0 after 
> reset.

Right, invalid bus numbers are not unique.

> > > What does fixing this involve? Just changing pcibus_get_dev_path?
> > 
> > How do you plan to fix it?  Don't forget that migration depends on these
> > names, so some kind of compatibility layer would be required.  Thanks,
> > 
> > Alex
> 
> Replace bus number with slot numbers of parent bridges up to the root.
> This works for root bridge in a compatible way because bus number there
> is hard-coded to 0.
> IMO nested bridges are broken anyway, no way to be compatible there.
> 
> 
> Gleb, Markus, I think the following should be sufficient for PCI.  What
> do you think?  Also - do we need to update QMP/monitor to teach them to
> work with these paths?
> 
> This is on top of Alex's patch, completely untested.

This function was originally intended to be recursive, so that each bus
could call the parent until we hit a canonical name.  It just so happens
that the root bus provides that for non-nested devices.  So I think
instead we should have a different function for bridges that appends to
what the root get_dev_path returns.

Alex

> pci: fix device path for devices behind nested bridges
> 
> We were using bus number in the device path, which is clearly
> broken as this number is guest-assigned for all devices
> except the root.
> 
> Fix by using hierarchical list of slots, walking the path
> from root down to device, instead. Add :00 as bus number
> so that if there are no nested bridges, this is compatible
> with what we have now.
> 
> Signed-off-by: Michael S. Tsirkin 
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 7d12473..fa98d94 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -1826,13 +1826,45 @@ static void pcibus_dev_print(Monitor *mon, 
> DeviceState *dev, int indent)
>  
>  static char *pcibus_get_dev_path(DeviceState *dev)
>  {
> -PCIDevice *d = (PCIDevice *)dev;
> -char path[16];
> -
> -snprintf(path, sizeof(path), "%04x:%02x:%02x.%x",
> - pci_find_domain(d->bus), pci_bus_num(d->bus),
> - PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
> -
> -return strdup(path);
> +PCIDevice *d = container_of(dev, PCIDevice, qdev);
> +PCIDevice *t;
> +int slot_depth;
> +/* Path format: Domain:00:Slot:Slot:Slot.Function.
> + * 00 is added here to make this format compatible with
> + * domain:Bus:Slot.Func for systems without nested PCI bridges.
> + * Slot list specifies the slot numbers for all devices on the
> + * path from root to the specific device. */
> +int domain_len = strlen(":00");
> +int func_len = strlen(".F");
> +int slot_len = strlen(":SS");
> +int path_len;
> +char *path, *p;
> +
> +/* Calculate # of slots on path between device and root. */;
> +slot_depth = 0;
> +for (t = d; t; t = t->bus->parent_dev)
> +++slot_depth;
> +
> +path_len = domain_len + bus_len + slot_len * slot_depth + func_len;
> +
> +/* Allocate memory, fill in the terminating null byte. */
> +path = malloc(path_len + 1 /* 

Re: [Qemu-devel] [PATCH 0/3] v4 Decouple block device removal from device removal

2010-11-08 Thread Michael S. Tsirkin
On Mon, Nov 08, 2010 at 01:03:18PM +0100, Markus Armbruster wrote:
> "Michael S. Tsirkin"  writes:
> 
> > On Mon, Nov 08, 2010 at 11:32:01AM +0100, Markus Armbruster wrote:
> >> Ryan Harper  writes:
> >> 
> >> > * Markus Armbruster  [2010-11-06 04:19]:
> >> >> Ryan Harper  writes:
> >> >> 
> >> >> > * Markus Armbruster  [2010-11-05 11:11]:
> >> >> >> Ryan Harper  writes:
> >> >> >> 
> >> >> >> > * Markus Armbruster  [2010-11-05 08:28]:
> >> >> >> >> I'd be fine with any of these:
> >> >> >> >> 
> >> >> >> >> 1. A new command "device_disconnet ID" (or similar name) to 
> >> >> >> >> disconnect
> >> >> >> >>device ID from any host parts.  Nice touch: you don't have to 
> >> >> >> >> know
> >> >> >> >>about the device's host part(s) to disconnect it.  But it 
> >> >> >> >> might be
> >> >> >> >>more work than the other two.
> >> >> >> >
> >> >> >> > This is sort of what netdev_del() and drive_unplug() are today; 
> >> >> >> > we're
> >> >> >> > just saying sever the connection of this device id.   
> >> >> >> 
> >> >> >> No, I have netdev_del as (3).
> >> >> >> 
> >> >> >> All three options are "sort of" the same, just different commands 
> >> >> >> with
> >> >> >> a common purpose.
> >> >> >> 
> >> >> >> > I'd like to rename drive_unplug() to blockdev_del() and call it 
> >> >> >> > done.  I
> >> >> >> > was looking at libvirt and the right call to netdev_del is already
> >> >> >> > in-place; I'd just need to re-spin my block patch to call 
> >> >> >> > blockdev_del()
> >> >> >> > after invoking device_del() to match what is done for net.
> >> >> >> 
> >> >> >> Unless I'm missing something, you can't just rename: your unplug does
> >> >> >> not delete the host part.
> >> >> >> 
> >> >> >> >> 2. New commands netdev_disconnect, drive_disconnect (or similar 
> >> >> >> >> names)
> >> >> >> >>to disconnect a host part from a guest device.  Like (1), 
> >> >> >> >> except you
> >> >> >> >>have to point to the other end of the connection to cut it.
> >> >> >> >
> >> >> >> > What's the advantage here? We need an additional piece of info 
> >> >> >> > (host
> >> >> >> > part) in addition to the device id?
> >> >> >> 
> >> >> >> That's a disadvantage.
> >> >> >> 
> >> >> >> Possible advantage: implementation could be slightly easier than (1),
> >> >> >> because you don't have to find the host parts.
> >> >> >> 
> >> >> >> >> 3. A new command "drive_del ID" similar to existing netdev_del.  
> >> >> >> >> This is
> >> >> >> >>(2) fused with delete.  Conceptual wart: you can't disconnect 
> >> >> >> >> and
> >> >> >> >>keep the host part around.  Moreover, delete is slightly 
> >> >> >> >> dangerous,
> >> >> >> >>because it renders any guest device still using the host part
> >> >> >> >>useless.
> >> >> >> >
> >> >> >> > Hrm, I thought that's what (1) is.
> >> >> >> 
> >> >> >> No.
> >> >> >> 
> >> >> >> With (1), the argument is a *device* ID, and we disconnect *all* host
> >> >> >> parts connected to this device (typically just one).
> >> >> >> 
> >> >> >> With (3), the argument is a netdev/drive ID, and disconnect *this* 
> >> >> >> host
> >> >> >> part from the peer device.
> >> >> >> 
> >> >> >> > Well, either (1) or (3); I'd 
> >> >> >> > like to
> >> >> >> > rename drive_unplug() to blockdev_del() since they're similar 
> >> >> >> > function
> >> >> >> > w.r.t removing access to the host resource.  And we can invoke 
> >> >> >> > them in
> >> >> >> > the same way from libvirt (after doing guest notification, remove
> >> >> >> > access).
> >> >> >> 
> >> >> >> I'd call it drive_del for now, to match drive_add.
> >> >> >
> >> >> > OK, drive_del() and as you mentioned, drive_unplug will take out the
> >> >> > block driver, but doesn't remove the dinfo object; that ends up dying
> >> >> > when we call the device destructor.  I think for symmetry we'll want
> >> >> > drive_del to remove the dinfo object as well.
> >> >> 
> >> >> Exactly.
> >> >> 
> >> >> a. bdrv_detach() to zap the pointer from bdrv to qdev
> >> >> b. zap the pointer from qdev to bdrv
> >> >> c. drive_uninit() to dispose of the host part
> >> >
> >> > a-c need to be done to match netdev_del symmetry?  How hard of a req is
> >> > this?
> >> 
> >> Without (c), it's not a delete.  And (c) without (b) leaves a dangling
> >> pointer.  (c) without (a) fails an assertion in bdrv_delete().
> >> 
> >> Aside: (b) should probably be folded into bdrv_detach().
> >> 
> >> >> Step b could be awkward with (3), because you don't know device details.
> >> >> I guess you have to search device properties for a drive property
> >> >> pointing to bdrv.  I like (1) because it puts that loop in the one place
> >> >> where it belongs: qdev core.  (3) duplicates it in every HOSTDEV_del.
> >> >> Except for netdev_del, which is special because of VLANs.
> >> >> 
> >> >> To avoid step b, you could try to keep the bdrv around in a special
> >> >> zombie state.  Still have to free the dinfo, but can't use
> >>

Re: [Qemu-devel] [PATCH 1/2] rtl8139: add vlan tag insertion

2010-11-08 Thread Stefan Hajnoczi
On Sun, Nov 7, 2010 at 9:25 PM, Benjamin Poirier
 wrote:
> Add support to the emulated hardware to add vlan tags in packets going
> from the guest to the network.
>
> Signed-off-by: Benjamin Poirier 
> Cc: Igor V. Kovalenko 
> ---
>  hw/rtl8139.c |   46 +++---
>  1 files changed, 35 insertions(+), 11 deletions(-)
>
> diff --git a/hw/rtl8139.c b/hw/rtl8139.c
> index d92981d..ac294da 100644
> --- a/hw/rtl8139.c
> +++ b/hw/rtl8139.c
> @@ -47,6 +47,8 @@
>  *                                  Darwin)
>  */
>
> +#include 
> +
>  #include "hw.h"
>  #include "pci.h"
>  #include "qemu-timer.h"
> @@ -58,6 +60,10 @@
>
>  #define PCI_FREQUENCY 3300L
>
> +/* bytes in VLAN tag */
> +#define VLAN_TCI_LEN 2
> +#define VLAN_HDR_LEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
> +
>  /* debug RTL8139 card C+ mode only */
>  //#define DEBUG_RTL8139CP 1
>
> @@ -1913,7 +1919,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
>
>     cpu_physical_memory_read(cplus_tx_ring_desc,    (uint8_t *)&val, 4);
>     txdw0 = le32_to_cpu(val);
> -    /* TODO: implement VLAN tagging support, VLAN tag data is read to txdw1 
> */
>     cpu_physical_memory_read(cplus_tx_ring_desc+4,  (uint8_t *)&val, 4);
>     txdw1 = le32_to_cpu(val);
>     cpu_physical_memory_read(cplus_tx_ring_desc+8,  (uint8_t *)&val, 4);
> @@ -1925,9 +1930,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
>            descriptor,
>            txdw0, txdw1, txbufLO, txbufHI));
>
> -    /* TODO: the following discard cast should clean clang analyzer output */
> -    (void)txdw1;
> -
>  /* w0 ownership flag */
>  #define CP_TX_OWN (1<<31)
>  /* w0 end of ring flag */
> @@ -1951,8 +1953,8 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
>  /* w0 bits 0...15 : buffer size */
>  #define CP_TX_BUFFER_SIZE (1<<16)
>  #define CP_TX_BUFFER_SIZE_MASK (CP_TX_BUFFER_SIZE - 1)
> -/* w1 tag available flag */
> -#define CP_RX_TAGC (1<<17)
> +/* w1 add tag flag */
> +#define CP_TX_TAGC (1<<17)
>  /* w1 bits 0...15 : VLAN tag */
>  #define CP_TX_VLAN_TAG_MASK ((1<<16) - 1)
>  /* w2 low  32bit of Rx buffer ptr */
> @@ -1978,12 +1980,22 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
>
>     DEBUG_PRINT(("RTL8139: +++ C+ Tx mode : transmitting from descriptor 
> %d\n", descriptor));
>
> +    int vlan_extra_size = 0;
>     if (txdw0 & CP_TX_FS)
>     {
>         DEBUG_PRINT(("RTL8139: +++ C+ Tx mode : descriptor %d is first 
> segment descriptor\n", descriptor));
>
> +        DEBUG_PRINT(("RTL8139: +++ C+ Tx mode : add vlan tag: %u tci: %u\n",
> +                !!(txdw1 & CP_TX_TAGC), bswap16(txdw1 &
> +                    CP_TX_VLAN_TAG_MASK)));

Please use leXX_to_cpu() and cpu_to_leXX() instead of bswapXX() for
little-endian conversion.  This is more explicit than unconditional
byteswapping and work on both big- and little-endian QEMU host
architectures.

Stefan



[Qemu-devel] Re: [PATCH] PCI: Bus number from the bridge, not the device

2010-11-08 Thread Michael S. Tsirkin
On Mon, Nov 08, 2010 at 07:52:12AM -0700, Alex Williamson wrote:
> On Mon, 2010-11-08 at 13:22 +0200, Michael S. Tsirkin wrote:
> > On Mon, Oct 04, 2010 at 03:53:11PM -0600, Alex Williamson wrote:
> > > pcibus_dev_print() was erroneously retrieving the device bus
> > > number from the secondary bus number offset of the device
> > > instead of the bridge above the device.  This ends of landing
> > > in the 2nd byte of the 3rd BAR for devices, which thankfully
> > > is usually zero.  pcibus_get_dev_path() copied this code,
> > > inheriting the same bug.  pcibus_get_dev_path() is used for
> > > ramblock naming, so changing it can effect migration.  However,
> > > I've only seen this byte be non-zero for an assigned device,
> > > which can't migrate anyway, so hopefully we won't run into
> > > any issues.
> > > 
> > > Signed-off-by: Alex Williamson 
> > 
> > Good catch. Applied.
> > I don't really see why do we put the dev path
> > in the bus object: why not let device supply its name?
> 
> Because the device name is not unique.  This came about from the
> discussion about how to create a canonical device path that Gleb and
> Markus are again trying to hash out.  If we go up to the bus and get the
> bus address, we have a VM unique name.  Unfortunately, it's difficult to
> define what the bus should print in all cases (ISA), but since they
> don't do hotplug and typically don't allocate ramblocks, we can mostly
> ignore it for this use case.
> 
> > And I think this will affect nested bridges. However they are currently
> > broken anyway: we really must convert to topological names as bus number
> > is guest-assigned - they don't have to be unique, even.
> 
> Yes, nested bridges are a problem.  How can the seg/bus/devfn not be
> unique?

Bus numbers for nested bridges are guest assigned. We start with 0 after reset.

> > What does fixing this involve? Just changing pcibus_get_dev_path?
> 
> How do you plan to fix it?  Don't forget that migration depends on these
> names, so some kind of compatibility layer would be required.  Thanks,
> 
> Alex

Replace bus number with slot numbers of parent bridges up to the root.
This works for root bridge in a compatible way because bus number there
is hard-coded to 0.
IMO nested bridges are broken anyway, no way to be compatible there.


Gleb, Markus, I think the following should be sufficient for PCI.  What
do you think?  Also - do we need to update QMP/monitor to teach them to
work with these paths?

This is on top of Alex's patch, completely untested.


pci: fix device path for devices behind nested bridges

We were using bus number in the device path, which is clearly
broken as this number is guest-assigned for all devices
except the root.

Fix by using hierarchical list of slots, walking the path
from root down to device, instead. Add :00 as bus number
so that if there are no nested bridges, this is compatible
with what we have now.

Signed-off-by: Michael S. Tsirkin 

diff --git a/hw/pci.c b/hw/pci.c
index 7d12473..fa98d94 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1826,13 +1826,45 @@ static void pcibus_dev_print(Monitor *mon, DeviceState 
*dev, int indent)
 
 static char *pcibus_get_dev_path(DeviceState *dev)
 {
-PCIDevice *d = (PCIDevice *)dev;
-char path[16];
-
-snprintf(path, sizeof(path), "%04x:%02x:%02x.%x",
- pci_find_domain(d->bus), pci_bus_num(d->bus),
- PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
-
-return strdup(path);
+PCIDevice *d = container_of(dev, PCIDevice, qdev);
+PCIDevice *t;
+int slot_depth;
+/* Path format: Domain:00:Slot:Slot:Slot.Function.
+ * 00 is added here to make this format compatible with
+ * domain:Bus:Slot.Func for systems without nested PCI bridges.
+ * Slot list specifies the slot numbers for all devices on the
+ * path from root to the specific device. */
+int domain_len = strlen(":00");
+int func_len = strlen(".F");
+int slot_len = strlen(":SS");
+int path_len;
+char *path, *p;
+
+/* Calculate # of slots on path between device and root. */;
+slot_depth = 0;
+for (t = d; t; t = t->bus->parent_dev)
+++slot_depth;
+
+path_len = domain_len + bus_len + slot_len * slot_depth + func_len;
+
+/* Allocate memory, fill in the terminating null byte. */
+path = malloc(path_len + 1 /* For '\0' */);
+path[path_len] = '\0';
+
+/* First field is the domain. */
+snprintf(path, domain_len, "%04x", pci_find_domain(d->bus));
+
+/* Leave space for slot numbers and fill in function number. */
+p = path + domain_len + slot_len * slot_depth;
+snprintf(p, func_len, ".%02x", PCI_FUNC(d->devfn));
+
+/* Fill in slot numbers. We walk up from device to root, so need to print
+ * them in the reverse order, last to first. */
+for (t = d; t; t = t->bus->parent_dev) {
+p -= slot_len;
+snprintf(p, slot_len, ":%x", PCI_SLOT(t->devfn));
+}
+
+return path;
 }
 



[Qemu-devel] [PATCH 1/3] usb-linux: Store devpath into USBHostDevice when usb_fs_type == USB_FS_SYS

2010-11-08 Thread Hans de Goede
This allows us to recreate the sysfspath used during scanning later
(which will be used in a later patch in this series).
---
 usb-linux.c |   29 +++--
 1 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/usb-linux.c b/usb-linux.c
index c3c38ec..61e8ec6 100644
--- a/usb-linux.c
+++ b/usb-linux.c
@@ -62,8 +62,8 @@ struct usb_ctrlrequest {
 uint16_t wLength;
 };
 
-typedef int USBScanFunc(void *opaque, int bus_num, int addr, int class_id,
-int vendor_id, int product_id,
+typedef int USBScanFunc(void *opaque, int bus_num, int addr, int devpath,
+int class_id, int vendor_id, int product_id,
 const char *product_name, int speed);
 
 //#define DEBUG
@@ -141,6 +141,7 @@ typedef struct USBHostDevice {
 /* Host side address */
 int bus_num;
 int addr;
+int devpath;
 struct USBAutoFilter match;
 
 QTAILQ_ENTRY(USBHostDevice) next;
@@ -885,7 +886,7 @@ static int usb_linux_update_endp_table(USBHostDevice *s)
 }
 
 static int usb_host_open(USBHostDevice *dev, int bus_num,
- int addr, const char *prod_name)
+ int addr, int devpath, const char *prod_name)
 {
 int fd = -1, ret;
 struct usbdevfs_connectinfo ci;
@@ -911,6 +912,7 @@ static int usb_host_open(USBHostDevice *dev, int bus_num,
 
 dev->bus_num = bus_num;
 dev->addr = addr;
+dev->devpath = devpath;
 dev->fd = fd;
 
 /* read the device description */
@@ -1173,7 +1175,7 @@ static int usb_host_scan_dev(void *opaque, USBScanFunc 
*func)
 if (line[0] == 'T' && line[1] == ':') {
 if (device_count && (vendor_id || product_id)) {
 /* New device.  Add the previously discovered device.  */
-ret = func(opaque, bus_num, addr, class_id, vendor_id,
+ret = func(opaque, bus_num, addr, 0, class_id, vendor_id,
product_id, product_name, speed);
 if (ret) {
 goto the_end;
@@ -1226,7 +1228,7 @@ static int usb_host_scan_dev(void *opaque, USBScanFunc 
*func)
 }
 if (device_count && (vendor_id || product_id)) {
 /* Add the last device.  */
-ret = func(opaque, bus_num, addr, class_id, vendor_id,
+ret = func(opaque, bus_num, addr, 0, class_id, vendor_id,
product_id, product_name, speed);
 }
  the_end:
@@ -1275,7 +1277,7 @@ static int usb_host_scan_sys(void *opaque, USBScanFunc 
*func)
 {
 DIR *dir = NULL;
 char line[1024];
-int bus_num, addr, speed, class_id, product_id, vendor_id;
+int bus_num, addr, devpath, speed, class_id, product_id, vendor_id;
 int ret = 0;
 char product_name[512];
 struct dirent *de;
@@ -1300,6 +1302,13 @@ static int usb_host_scan_sys(void *opaque, USBScanFunc 
*func)
 if (sscanf(line, "%d", &addr) != 1) {
 goto the_end;
 }
+if (!usb_host_read_file(line, sizeof(line), "devpath",
+de->d_name)) {
+goto the_end;
+}
+if (sscanf(line, "%d", &devpath) != 1) {
+goto the_end;
+}
 if (!usb_host_read_file(line, sizeof(line), "bDeviceClass",
 de->d_name)) {
 goto the_end;
@@ -1343,7 +1352,7 @@ static int usb_host_scan_sys(void *opaque, USBScanFunc 
*func)
 speed = USB_SPEED_FULL;
 }
 
-ret = func(opaque, bus_num, addr, class_id, vendor_id,
+ret = func(opaque, bus_num, addr, devpath, class_id, vendor_id,
product_id, product_name, speed);
 if (ret) {
 goto the_end;
@@ -1434,7 +1443,7 @@ static int usb_host_scan(void *opaque, USBScanFunc *func)
 
 static QEMUTimer *usb_auto_timer;
 
-static int usb_host_auto_scan(void *opaque, int bus_num, int addr,
+static int usb_host_auto_scan(void *opaque, int bus_num, int addr, int devpath,
   int class_id, int vendor_id, int product_id,
   const char *product_name, int speed)
 {
@@ -1470,7 +1479,7 @@ static int usb_host_auto_scan(void *opaque, int bus_num, 
int addr,
 }
 DPRINTF("husb: auto open: bus_num %d addr %d\n", bus_num, addr);
 
-usb_host_open(s, bus_num, addr, product_name);
+usb_host_open(s, bus_num, addr, devpath, product_name);
 }
 
 return 0;
@@ -1630,7 +1639,7 @@ static void usb_info_device(Monitor *mon, int bus_num, 
int addr, int class_id,
 }
 
 static int usb_host_info_device(void *opaque, int bus_num, int addr,
-int class_id,
+int devpath, int class_id,
 int vendor_id, int product_id,
 const char *product_name,
 int speed)
-- 
1.7.3.

[Qemu-devel] [PATCH 3/3] usb-linux: Get the active configuration from sysfs rather then asking the dev

2010-11-08 Thread Hans de Goede
Some devices seem to choke on receiving a USB_REQ_GET_CONFIGURATION ctrl msg
(witnessed with a digital picture frame usb id 1908:1320).
When usb_fs_type == USB_FS_SYS, the active configuration can be read directly
from sysfs, which allows using this device through qemu's usb redirection.
More in general it seems a good idea to not send needless control msg's to
devices, esp. as the code in question is called every time a set_interface
is done. Which happens multiple times during virtual machine startup, and
when device drivers are activating the usb device.
---
 usb-linux.c |   19 +++
 1 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/usb-linux.c b/usb-linux.c
index 0a46692..25ba5b7 100644
--- a/usb-linux.c
+++ b/usb-linux.c
@@ -152,6 +152,8 @@ static QTAILQ_HEAD(, USBHostDevice) hostdevs = 
QTAILQ_HEAD_INITIALIZER(hostdevs)
 static int usb_host_close(USBHostDevice *dev);
 static int parse_filter(const char *spec, struct USBAutoFilter *f);
 static void usb_host_auto_check(void *unused);
+static int usb_host_read_file(char *line, size_t line_size,
+const char *device_file, const char *device_name);
 
 static int is_isoc(USBHostDevice *s, int ep)
 {
@@ -781,6 +783,23 @@ static int usb_linux_get_configuration(USBHostDevice *s)
 struct usb_ctrltransfer ct;
 int ret;
 
+if (usb_fs_type == USB_FS_SYS) {
+char device_name[32], line[1024];
+int configuration;
+
+sprintf(device_name, "%d-%d", s->bus_num, s->devpath);
+
+if (!usb_host_read_file(line, sizeof(line), "bConfigurationValue",
+device_name)) {
+goto usbdevfs;
+}
+if (sscanf(line, "%d", &configuration) != 1) {
+goto usbdevfs;
+}
+return configuration;
+}
+
+usbdevfs:
 ct.bRequestType = USB_DIR_IN;
 ct.bRequest = USB_REQ_GET_CONFIGURATION;
 ct.wValue = 0;
-- 
1.7.3.2




[Qemu-devel] [PATCH 2/3] usb-linux: introduce a usb_linux_get_configuration function

2010-11-08 Thread Hans de Goede
The next patch in this series introduces multiple ways to get the
configuration dependent upon usb_fs_type, it is cleaner to put this
into its own function.
---
 usb-linux.c |   30 ++
 1 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/usb-linux.c b/usb-linux.c
index 61e8ec6..0a46692 100644
--- a/usb-linux.c
+++ b/usb-linux.c
@@ -775,13 +775,11 @@ static int usb_host_handle_packet(USBDevice *s, USBPacket 
*p)
 }
 }
 
-/* returns 1 on problem encountered or 0 for success */
-static int usb_linux_update_endp_table(USBHostDevice *s)
+static int usb_linux_get_configuration(USBHostDevice *s)
 {
-uint8_t *descriptors;
-uint8_t devep, type, configuration, alt_interface;
+uint8_t configuration;
 struct usb_ctrltransfer ct;
-int interface, ret, length, i;
+int ret;
 
 ct.bRequestType = USB_DIR_IN;
 ct.bRequest = USB_REQ_GET_CONFIGURATION;
@@ -793,15 +791,31 @@ static int usb_linux_update_endp_table(USBHostDevice *s)
 
 ret = ioctl(s->fd, USBDEVFS_CONTROL, &ct);
 if (ret < 0) {
-perror("usb_linux_update_endp_table");
-return 1;
+perror("usb_linux_get_configuration");
+return -1;
 }
 
 /* in address state */
 if (configuration == 0) {
-return 1;
+return -1;
 }
 
+return configuration;
+}
+
+/* returns 1 on problem encountered or 0 for success */
+static int usb_linux_update_endp_table(USBHostDevice *s)
+{
+uint8_t *descriptors;
+uint8_t devep, type, configuration, alt_interface;
+struct usb_ctrltransfer ct;
+int interface, ret, length, i;
+
+i = usb_linux_get_configuration(s);
+if (i < 0)
+return 1;
+configuration = i;
+
 /* get the desired configuration, interface, and endpoint descriptors
  * from device description */
 descriptors = &s->descr[18];
-- 
1.7.3.2




[Qemu-devel] [PATCH 0/3] usb-linux do not send unnecessary GET_CONFIGURATION

2010-11-08 Thread Hans de Goede
Hi All,

First a short self-into. I've been a FOSS developer for 10+ years,
working on a large variety of projects. Most relevant for my appearing
here on the qemu list is my experience in reverse engineering and writing
usb webcam drivers for the Linux kernel and libgphoto2 camlibs for various
small usb keychain picture frames (with proprietary protocols).

I've been a Red Hat employee for 2 years now and in August this year I've
joined Red Hat's SPICE team. I'm currently tasked with looking into /
developing an usb redirection solution for SPICE. So for starters I've begun
looking into the current usb redirection support in qemu.

It failed at the first device I threw at it (a usb keychain picture frame),
the problem is that this (el cheapo) device does not seem to grok
GET_CONFIGURATION. This patch sets makes this device work (and stops qemu
from unnecessary sending a GET_CONFIGURATION ctrl msg in general) by
reading this value directly from sysfs.

Regards,

Hans

p.s.

While looking at the code I also noticed that the ep numeration in usb-linux
seems completely wrong for multifunction devices. I'll go test that and fix
it (assuming I'm right about it being wrong) tomorrow.



[Qemu-devel] Access to specific isa card with Qemu???

2010-11-08 Thread Djamel Hakkar
Hello,

We have a software that runs on MS-DOS and must communicate with a specific 
card installed on port isa.
We  want to use this software in Qemu with a machine that runs XP.
Is it possible to access to the ISA port with Qemu in this case?

Do we have to do a specific development?
Can you help us in this development, how much would it cost?

Thank you,
Cordially


Re: [Qemu-devel] [RFC] tracing: consistent usage of "disable" in "trace-events"

2010-11-08 Thread Daniel P. Berrange
On Mon, Nov 08, 2010 at 04:55:10PM +0100, Lluís wrote:
> Daniel P Berrange writes:
> 
> > On Mon, Nov 08, 2010 at 03:42:15PM +0100, Lluís wrote:
> >> On the current implementation, the "disable" keyword in "trace-events"
> >> has different semantics, depending on the backend:
> >> 
> >> * nop: ignored (not a problem)
> >> * simple : enables tracing, but sets dynamic state to disable
> >> * ust: disables tracing (uses nop backend)
> >> * dtrace : same as simple
> >> 
> >> Would it be possible to just use nop whenever the event is disabled in
> >> trace-events? If you agree I can cook the patch, as it's pretty simple.
> 
> > I don't particularly see the point of the 'disable' keyword existing at
> > all, unless there are performance implications for a particular trace 
> > backend. For the DTrace backend I strip & ignore the disable keyword
> > because probes that are compiled in, reduce to a inline conditional 
> > check that has no serious overhead when no trace client is active.
> 
> I think the same is applicable to the UST backend.
> 
> But it is not true when tracing guest events (e.g., memory
> accesses). Not because of the backend, but because of the frequency of
> appearance of such events.
> 
> In this case, the auto-generated code is on the lines of (I haven't yet
> posted the patch series producing this):
> 
>   [called during TCG code generation -- e.g., translate.c]
> 
>   #define TRACE_CURR_CPU_STATE_SET (cpu_single_env->trace_state_set)
>   #define trace_guest_vmem_cpu_event 0 // number of per-CPU trace event
>
>   static inline void trace_gen_guest_vmem (TCGv_i64 addr, uint32_t size, 
> uint32_t write)
>   {
>  if (TRACE_CURR_CPU_STATE_SET & (1 << trace_guest_vmem_cpu_event)) {
> gen_helper_proxy_guest_vmem(addr, tcg_const_i32(size), 
> tcg_const_i32(write));
>  }
>   }
> 
>   [extra helper functions -- declared at helper.h]
>
>   void helper_proxy_guest_vmem (uint64_t addr, uint32_t size, uint32_t write)
>   {
>  trace_guest_vmem(addr, size, write);
>   }
> 
> (*) A state set is a bitset with the events that have been declared with
> the "gen" keyword in "trace-events".
> 
> This code has indeed a performance cost, so I opted to follow the
> approach taken by the UST backend ("disable" produces a trace event with
> "nop"). When I opted for this, only simple and ust where in 'tracetool'.
> 
> In any case, there might appear other events that could have performance
> implications, although I understand the ease of usage of having all
> trace events available by default.

Ok, I agree that if we have tracepoints in the kind of places in TCG
you describe, then this could have measurable performance impact.

> That's why I would rather declare all trace-events without the "disable"
> keyword, and leave it only on those events that are known to have a high
> frequency, as no backend should have so poor performance as to force
> events to "disappear".

This sounds like a reasonable plan to me. 

Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|



Re: [Qemu-devel] [RFC] tracing: consistent usage of "disable" in "trace-events"

2010-11-08 Thread Lluís
Daniel P Berrange writes:

> On Mon, Nov 08, 2010 at 03:42:15PM +0100, Lluís wrote:
>> On the current implementation, the "disable" keyword in "trace-events"
>> has different semantics, depending on the backend:
>> 
>> * nop: ignored (not a problem)
>> * simple : enables tracing, but sets dynamic state to disable
>> * ust: disables tracing (uses nop backend)
>> * dtrace : same as simple
>> 
>> Would it be possible to just use nop whenever the event is disabled in
>> trace-events? If you agree I can cook the patch, as it's pretty simple.

> I don't particularly see the point of the 'disable' keyword existing at
> all, unless there are performance implications for a particular trace 
> backend. For the DTrace backend I strip & ignore the disable keyword
> because probes that are compiled in, reduce to a inline conditional 
> check that has no serious overhead when no trace client is active.

I think the same is applicable to the UST backend.

But it is not true when tracing guest events (e.g., memory
accesses). Not because of the backend, but because of the frequency of
appearance of such events.

In this case, the auto-generated code is on the lines of (I haven't yet
posted the patch series producing this):

  [called during TCG code generation -- e.g., translate.c]

  #define TRACE_CURR_CPU_STATE_SET (cpu_single_env->trace_state_set)
  #define trace_guest_vmem_cpu_event 0 // number of per-CPU trace event
   
  static inline void trace_gen_guest_vmem (TCGv_i64 addr, uint32_t size, 
uint32_t write)
  {
 if (TRACE_CURR_CPU_STATE_SET & (1 << trace_guest_vmem_cpu_event)) {
gen_helper_proxy_guest_vmem(addr, tcg_const_i32(size), 
tcg_const_i32(write));
 }
  }

  [extra helper functions -- declared at helper.h]
   
  void helper_proxy_guest_vmem (uint64_t addr, uint32_t size, uint32_t write)
  {
 trace_guest_vmem(addr, size, write);
  }

(*) A state set is a bitset with the events that have been declared with
the "gen" keyword in "trace-events".

This code has indeed a performance cost, so I opted to follow the
approach taken by the UST backend ("disable" produces a trace event with
"nop"). When I opted for this, only simple and ust where in 'tracetool'.

In any case, there might appear other events that could have performance
implications, although I understand the ease of usage of having all
trace events available by default.

That's why I would rather declare all trace-events without the "disable"
keyword, and leave it only on those events that are known to have a high
frequency, as no backend should have so poor performance as to force
events to "disappear".


Lluis

-- 
 "And it's much the same thing with knowledge, for whenever you learn
 something new, the whole world becomes that much richer."
 -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
 Tollbooth



Re: [Qemu-devel] [RFC] tracing: consistent usage of "disable" in "trace-events"

2010-11-08 Thread Daniel P. Berrange
On Mon, Nov 08, 2010 at 03:42:15PM +0100, Lluís wrote:
> On the current implementation, the "disable" keyword in "trace-events"
> has different semantics, depending on the backend:
> 
> * nop: ignored (not a problem)
> * simple : enables tracing, but sets dynamic state to disable
> * ust: disables tracing (uses nop backend)
> * dtrace : same as simple
> 
> Would it be possible to just use nop whenever the event is disabled in
> trace-events? If you agree I can cook the patch, as it's pretty simple.

I don't particularly see the point of the 'disable' keyword existing at
all, unless there are performance implications for a particular trace 
backend. For the DTrace backend I strip & ignore the disable keyword
because probes that are compiled in, reduce to a inline conditional 
check that has no serious overhead when no trace client is active.

Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|



[Qemu-devel] [PATCH 1/3] Make paio subsystem use threadlets infrastructure

2010-11-08 Thread Arun R Bharadwaj
From: Aneesh Kumar K.V 

This patch creates a generic asynchronous-task-offloading infrastructure named
threadlets.

The patch creates a global queue on-to which subsystems can queue their tasks to
be executed asynchronously.

The patch also provides API's that allow a subsystem to create a private queue
with an associated pool of threads.

The patch has been tested with fstress.

Signed-off-by: Aneesh Kumar K.V 
Signed-off-by: Arun R Bharadwaj 
Signed-off-by: Gautham R Shenoy 
Signed-off-by: Sripathi Kodi 
---
 Makefile.objs  |2 
 posix-aio-compat.c |  340 
 2 files changed, 208 insertions(+), 134 deletions(-)

diff --git a/Makefile.objs b/Makefile.objs
index cd5a24b..3b7ec27 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -9,6 +9,7 @@ qobject-obj-y += qerror.o
 
 block-obj-y = cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o
 block-obj-y += nbd.o block.o aio.o aes.o osdep.o qemu-config.o
+block-obj-$(CONFIG_POSIX) += qemu-thread.o
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
@@ -124,7 +125,6 @@ endif
 common-obj-y += $(addprefix ui/, $(ui-obj-y))
 
 common-obj-y += iov.o acl.o
-common-obj-$(CONFIG_THREAD) += qemu-thread.o
 common-obj-y += notify.o event_notifier.o
 common-obj-y += qemu-timer.o
 
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 7b862b5..00b2a4e 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -29,7 +29,32 @@
 #include "block_int.h"
 
 #include "block/raw-posix-aio.h"
+#include "qemu-thread.h"
 
+#define MAX_GLOBAL_THREADS  64
+#define MIN_GLOBAL_THREADS   8
+
+QemuMutex aiocb_mutex;
+
+typedef struct ThreadletQueue
+{
+QemuMutex lock;
+QemuCond cond;
+int max_threads;
+int min_threads;
+int cur_threads;
+int idle_threads;
+QTAILQ_HEAD(, ThreadletWork) request_list;
+} ThreadletQueue;
+
+typedef struct ThreadletWork
+{
+QTAILQ_ENTRY(ThreadletWork) node;
+void (*func)(struct ThreadletWork *work);
+} ThreadletWork;
+
+static ThreadletQueue globalqueue;
+static int globalqueue_init;
 
 struct qemu_paiocb {
 BlockDriverAIOCB common;
@@ -44,13 +69,13 @@ struct qemu_paiocb {
 int ev_signo;
 off_t aio_offset;
 
-QTAILQ_ENTRY(qemu_paiocb) node;
 int aio_type;
 ssize_t ret;
 int active;
 struct qemu_paiocb *next;
 
 int async_context_id;
+ThreadletWork work;
 };
 
 typedef struct PosixAioState {
@@ -58,64 +83,169 @@ typedef struct PosixAioState {
 struct qemu_paiocb *first_aio;
 } PosixAioState;
 
+static void *threadlet_worker(void *data)
+{
+ThreadletQueue *queue = data;
 
-static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-static pthread_t thread_id;
-static pthread_attr_t attr;
-static int max_threads = 64;
-static int cur_threads = 0;
-static int idle_threads = 0;
-static QTAILQ_HEAD(, qemu_paiocb) request_list;
+qemu_mutex_lock(&queue->lock);
+while (1) {
+ThreadletWork *work;
+int ret = 0;
+
+while (QTAILQ_EMPTY(&queue->request_list) &&
+   (ret != ETIMEDOUT)) {
+/* wait for cond to be signalled or broadcast for 1000s */
+ret = qemu_cond_timedwait((&queue->cond),
+ &(queue->lock), 10*10);
+}
 
-#ifdef CONFIG_PREADV
-static int preadv_present = 1;
-#else
-static int preadv_present = 0;
-#endif
+assert(queue->idle_threads != 0);
+if (QTAILQ_EMPTY(&queue->request_list)) {
+if (queue->cur_threads > queue->min_threads) {
+/* We retain the minimum number of threads */
+break;
+}
+} else {
+work = QTAILQ_FIRST(&queue->request_list);
+QTAILQ_REMOVE(&queue->request_list, work, node);
 
-static void die2(int err, const char *what)
-{
-fprintf(stderr, "%s failed: %s\n", what, strerror(err));
-abort();
+queue->idle_threads--;
+qemu_mutex_unlock(&queue->lock);
+
+/* execute the work function */
+work->func(work);
+
+qemu_mutex_lock(&queue->lock);
+queue->idle_threads++;
+}
+}
+
+queue->idle_threads--;
+queue->cur_threads--;
+qemu_mutex_unlock(&queue->lock);
+
+return NULL;
 }
 
-static void die(const char *what)
+static void spawn_threadlet(ThreadletQueue *queue)
 {
-die2(errno, what);
+QemuThread thread;
+
+queue->cur_threads++;
+queue->idle_threads++;
+
+qemu_thread_create(&thread, threadlet_worker, queue);
 }
 
-static void mutex_lock(pthread_mutex_t *mutex)
+/**
+ * submit_work_to_queue: Submit a new task to a private queue to be
+ *executed asynchronously.
+ * @queue: Per-subsystem private queue to which the new task needs
+ * to be submitted.
+ * @work: Contains information about the task that needs to be submitted.
+ */
+static v

[Qemu-devel] Re: [PATCH 0/6] Save state error handling (kill off no_migrate)

2010-11-08 Thread Alex Williamson
On Mon, 2010-11-08 at 13:40 +0200, Michael S. Tsirkin wrote:
> On Wed, Oct 06, 2010 at 02:58:57PM -0600, Alex Williamson wrote:
> > Our code paths for saving or migrating a VM are full of functions that
> > return void, leaving no opportunity for a device to cancel a migration,
> > either from error or incompatibility.  The ivshmem driver attempted to
> > solve this with a no_migrate flag on the save state entry.  I think the
> > more generic and flexible way to solve this is to allow driver save
> > functions to fail.  This series implements that and converts ivshmem
> > to uses a set_params function to NAK migration much earlier in the
> > processes.  This touches a lot of files, but bulk of those changes are
> > simply s/void/int/ and tacking a "return 0" to the end of functions.
> > Thanks,
> > 
> > Alex
> 
> Well error handling is always tricky: it seems easier to
> require save handlers to never fail.

Sure it's easier, but does that make it robust?

> So there's a bunch of code here but what exactly is the benefit?
> Since save handlers have no idea what does the remote do,
> what is the compatibility you mention?

There are two users I currently have in mind.  ivshmem currently makes
use of the register_device_unmigratable() because it makes use of host
specific resources and connections (aiui).  This sets the no_migrate
flag, which is not dynamic and a bit of a band-aide.  The other is
device assignment, which needs a way to NAK a migration since physical
devices are never migratable.  I imagine we could at some point have
devices with state tied to other features that can't always be detached
from the host, this tries to provide the infrastructure for that to
happen.

Alex




[Qemu-devel] Re: [PATCH] PCI: Bus number from the bridge, not the device

2010-11-08 Thread Alex Williamson
On Mon, 2010-11-08 at 13:22 +0200, Michael S. Tsirkin wrote:
> On Mon, Oct 04, 2010 at 03:53:11PM -0600, Alex Williamson wrote:
> > pcibus_dev_print() was erroneously retrieving the device bus
> > number from the secondary bus number offset of the device
> > instead of the bridge above the device.  This ends of landing
> > in the 2nd byte of the 3rd BAR for devices, which thankfully
> > is usually zero.  pcibus_get_dev_path() copied this code,
> > inheriting the same bug.  pcibus_get_dev_path() is used for
> > ramblock naming, so changing it can effect migration.  However,
> > I've only seen this byte be non-zero for an assigned device,
> > which can't migrate anyway, so hopefully we won't run into
> > any issues.
> > 
> > Signed-off-by: Alex Williamson 
> 
> Good catch. Applied.
> I don't really see why do we put the dev path
> in the bus object: why not let device supply its name?

Because the device name is not unique.  This came about from the
discussion about how to create a canonical device path that Gleb and
Markus are again trying to hash out.  If we go up to the bus and get the
bus address, we have a VM unique name.  Unfortunately, it's difficult to
define what the bus should print in all cases (ISA), but since they
don't do hotplug and typically don't allocate ramblocks, we can mostly
ignore it for this use case.

> And I think this will affect nested bridges. However they are currently
> broken anyway: we really must convert to topological names as bus number
> is guest-assigned - they don't have to be unique, even.

Yes, nested bridges are a problem.  How can the seg/bus/devfn not be
unique?

> What does fixing this involve? Just changing pcibus_get_dev_path?

How do you plan to fix it?  Don't forget that migration depends on these
names, so some kind of compatibility layer would be required.  Thanks,

Alex

> > ---
> > 
> >  hw/pci.c |5 ++---
> >  1 files changed, 2 insertions(+), 3 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index 6d0934d..15416dd 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -1940,8 +1940,7 @@ static void pcibus_dev_print(Monitor *mon, 
> > DeviceState *dev, int indent)
> >  
> >  monitor_printf(mon, "%*sclass %s, addr %02x:%02x.%x, "
> > "pci id %04x:%04x (sub %04x:%04x)\n",
> > -   indent, "", ctxt,
> > -   d->config[PCI_SECONDARY_BUS],
> > +   indent, "", ctxt, pci_bus_num(d->bus),
> > PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
> > pci_get_word(d->config + PCI_VENDOR_ID),
> > pci_get_word(d->config + PCI_DEVICE_ID),
> > @@ -1965,7 +1964,7 @@ static char *pcibus_get_dev_path(DeviceState *dev)
> >  char path[16];
> >  
> >  snprintf(path, sizeof(path), "%04x:%02x:%02x.%x",
> > - pci_find_domain(d->bus), d->config[PCI_SECONDARY_BUS],
> > + pci_find_domain(d->bus), pci_bus_num(d->bus),
> >   PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
> >  
> >  return strdup(path);
> > 






[Qemu-devel] [RFC] tracing: consistent usage of "disable" in "trace-events"

2010-11-08 Thread Lluís
On the current implementation, the "disable" keyword in "trace-events"
has different semantics, depending on the backend:

* nop: ignored (not a problem)
* simple : enables tracing, but sets dynamic state to disable
* ust: disables tracing (uses nop backend)
* dtrace : same as simple

Would it be possible to just use nop whenever the event is disabled in
trace-events? If you agree I can cook the patch, as it's pretty simple.


Lluis

-- 
 "And it's much the same thing with knowledge, for whenever you learn
 something new, the whole world becomes that much richer."
 -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
 Tollbooth



Re: [Qemu-devel] [Bug 671831] [NEW] Sparc guest assert error

2010-11-08 Thread Stefan Hajnoczi
I just gave the SPARC Linux 2.6 image from qemu.org a spin on sun4m but no luck:

sparc-softmmu/qemu-system-sparc -kernel
'/tmp/sparc-test/vmlinux-2.6.11+tcx' -initrd
'/tmp/sparc-test/linux.img' -append "root=/dev/ram" -drive
if=scsi,file=test.raw,cache=none

The kernel correctly notices the sda device and detects partitions (so
it is doing disk reads).  There is no assertion error so this problem
may be specific to the Linux 2.4 ESP driver.

Stefan



[Qemu-devel] [RFC] tracing: per-CPU tracing state

2010-11-08 Thread Lluís
As more and more tracing backends appear (right now: nop, simple, ust
and the new dtrace), it is harder to provide a tracetool where these
backends can be efficiently used without being aware of per-CPU tracing
state.

What I have now is basically:

* trace.h : trace_##name
  Backend-specific declaration/definition of per-event tracing functions

* trace-cpu.h : trace_cpu_##name
  Generic "static inline" definitions of functions that check the
  per-CPU state and (if true) call the corresponding trace_##name

* trace-gen.h: trace_gen_##name
  Generic "static inline" definitions of functions that check the
  per-CPU state and (if true) generate TCG calls to a function
  redirecting the call to trace_##name

The last two are only generated for those events that have the "gen"
keyword in "trace-events".

The point is that the trace_##name functions do an extra check (the one
provided by the backend) that should not be necessary, as the per-CPU
state check already ensures that the trace must be produced.

Thus, I'd propose to provide an extra per-backend function that prodices
the trace without checking the tracing state.

Another important change would be to provide a generic interface inside
QEMU that provides a programmatic control of the tracing state of all
events (both per-cpu and global), and let each backend provide a small
.c file implementing that functionality. This would provide a
centralized point through QEMU monitor for the control of tracing
events, plus any backend-specific control interface that might also be
available.


Lluis

-- 
 "And it's much the same thing with knowledge, for whenever you learn
 something new, the whole world becomes that much richer."
 -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
 Tollbooth



[Qemu-devel] [PATCH 1/3] Make paio subsystem use threadlets infrastructure

2010-11-08 Thread Arun R Bharadwaj
* Arun R Bharadwaj  [2010-11-08 16:16:50]:

Make paio subsystem use threadlets infrastructure

From: Aneesh Kumar K.V 

This patch creates a generic asynchronous-task-offloading infrastructure named
threadlets.

The patch creates a global queue on-to which subsystems can queue their tasks to
be executed asynchronously.

The patch also provides API's that allow a subsystem to create a private queue
with an associated pool of threads.

The patch has been tested with fstress.

Signed-off-by: Aneesh Kumar K.V 
Signed-off-by: Arun R Bharadwaj 
Signed-off-by: Gautham R Shenoy 
Signed-off-by: Sripathi Kodi 
---
 Makefile.objs  |2 
 posix-aio-compat.c |  340 
 2 files changed, 208 insertions(+), 134 deletions(-)

diff --git a/Makefile.objs b/Makefile.objs
index cd5a24b..3b7ec27 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -9,6 +9,7 @@ qobject-obj-y += qerror.o
 
 block-obj-y = cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o
 block-obj-y += nbd.o block.o aio.o aes.o osdep.o qemu-config.o
+block-obj-$(CONFIG_POSIX) += qemu-thread.o
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 
@@ -124,7 +125,6 @@ endif
 common-obj-y += $(addprefix ui/, $(ui-obj-y))
 
 common-obj-y += iov.o acl.o
-common-obj-$(CONFIG_THREAD) += qemu-thread.o
 common-obj-y += notify.o event_notifier.o
 common-obj-y += qemu-timer.o
 
diff --git a/posix-aio-compat.c b/posix-aio-compat.c
index 7b862b5..00b2a4e 100644
--- a/posix-aio-compat.c
+++ b/posix-aio-compat.c
@@ -29,7 +29,32 @@
 #include "block_int.h"
 
 #include "block/raw-posix-aio.h"
+#include "qemu-thread.h"
 
+#define MAX_GLOBAL_THREADS  64
+#define MIN_GLOBAL_THREADS   8
+
+QemuMutex aiocb_mutex;
+
+typedef struct ThreadletQueue
+{
+QemuMutex lock;
+QemuCond cond;
+int max_threads;
+int min_threads;
+int cur_threads;
+int idle_threads;
+QTAILQ_HEAD(, ThreadletWork) request_list;
+} ThreadletQueue;
+
+typedef struct ThreadletWork
+{
+QTAILQ_ENTRY(ThreadletWork) node;
+void (*func)(struct ThreadletWork *work);
+} ThreadletWork;
+
+static ThreadletQueue globalqueue;
+static int globalqueue_init;
 
 struct qemu_paiocb {
 BlockDriverAIOCB common;
@@ -44,13 +69,13 @@ struct qemu_paiocb {
 int ev_signo;
 off_t aio_offset;
 
-QTAILQ_ENTRY(qemu_paiocb) node;
 int aio_type;
 ssize_t ret;
 int active;
 struct qemu_paiocb *next;
 
 int async_context_id;
+ThreadletWork work;
 };
 
 typedef struct PosixAioState {
@@ -58,64 +83,169 @@ typedef struct PosixAioState {
 struct qemu_paiocb *first_aio;
 } PosixAioState;
 
+static void *threadlet_worker(void *data)
+{
+ThreadletQueue *queue = data;
 
-static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-static pthread_t thread_id;
-static pthread_attr_t attr;
-static int max_threads = 64;
-static int cur_threads = 0;
-static int idle_threads = 0;
-static QTAILQ_HEAD(, qemu_paiocb) request_list;
+qemu_mutex_lock(&queue->lock);
+while (1) {
+ThreadletWork *work;
+int ret = 0;
+
+while (QTAILQ_EMPTY(&queue->request_list) &&
+   (ret != ETIMEDOUT)) {
+/* wait for cond to be signalled or broadcast for 1000s */
+ret = qemu_cond_timedwait((&queue->cond),
+ &(queue->lock), 10*10);
+}
 
-#ifdef CONFIG_PREADV
-static int preadv_present = 1;
-#else
-static int preadv_present = 0;
-#endif
+assert(queue->idle_threads != 0);
+if (QTAILQ_EMPTY(&queue->request_list)) {
+if (queue->cur_threads > queue->min_threads) {
+/* We retain the minimum number of threads */
+break;
+}
+} else {
+work = QTAILQ_FIRST(&queue->request_list);
+QTAILQ_REMOVE(&queue->request_list, work, node);
 
-static void die2(int err, const char *what)
-{
-fprintf(stderr, "%s failed: %s\n", what, strerror(err));
-abort();
+queue->idle_threads--;
+qemu_mutex_unlock(&queue->lock);
+
+/* execute the work function */
+work->func(work);
+
+qemu_mutex_lock(&queue->lock);
+queue->idle_threads++;
+}
+}
+
+queue->idle_threads--;
+queue->cur_threads--;
+qemu_mutex_unlock(&queue->lock);
+
+return NULL;
 }
 
-static void die(const char *what)
+static void spawn_threadlet(ThreadletQueue *queue)
 {
-die2(errno, what);
+QemuThread thread;
+
+queue->cur_threads++;
+queue->idle_threads++;
+
+qemu_thread_create(&thread, threadlet_worker, queue);
 }
 
-static void mutex_lock(pthread_mutex_t *mutex)
+/**
+ * submit_work_to_queue: Submit a new task to a private queue to be
+ *executed asynchronously.
+ * @queue: Per-subsystem private queue to which the new task needs
+ * to be submi

Re: [Qemu-devel] [Bug 671831] [NEW] Sparc guest assert error

2010-11-08 Thread Stefan Hajnoczi
On Mon, Nov 8, 2010 at 2:04 PM, Nigel Horne <671...@bugs.launchpad.net> wrote:
>> You can grab the code like this:
>> git clone -b scsi_assert git://repo.or.cz/qemu/stefanha.git
>>
>>
> That retrieves a directory called scsi_assert which is empty.  Are you
> expecting that?  Should I therefore use the directory in there called
> stehana?

Yes, 'stefanha' is the source tree directory.

When I do that git clones to a directory called 'stefanha', which
contains the QEMU source tree from the scsi_assert branch.  But you
can also use following alternative if your git version does not
support -b :

git clone git://repo.or.cz/qemu/stefanha.git
cd stefanha
git checkout origin/scsi_assert

Stefan



[Qemu-devel] KVM call agenda for Nov 9

2010-11-08 Thread Juan Quintela

Please send in any agenda items you are interested in covering.

For once I start:
- report from linux plumbers virt track (jes?, anthony?)
- report from linux kernel summit (avi?)

Cheers, Juan.



Re: [Qemu-devel] [PATCH v2 5/6] backdoor: [i386] provide and implement intruction-based backdoor interface

2010-11-08 Thread Lluís
Gleb Natapov writes:

> On Thu, Nov 04, 2010 at 11:36:15PM +0100, Lluís wrote:
>> Take the unused CPUID 0x40001xxx range as the backdoor instruction.
>> 
> In KVM (and it fits the spec nicely) cpuid is defined in terms of
> tables.  There is no callback that is called when particular cpuid is
> queried, so such backdoor interface will be impossible to implement
> in KVM. Furthermore any interface that changes/looks at vcpu state in
> userspace is broken for KVM. Look at vmware backdoor interface for
> instance. KVM  has a hack in emulator code to make it work.

I know. I looked into the KVM implementation and neither CPUID nor
VMCALL/VMMCALL (these two are, in fact, obsoleted) are implemented as
calls to the hypervisor (although the hardware supports it).

The only interfaces exported by KVM to that purpose are through
MMIO/PIO, but these are OS-dependant (aka, KVM-dependant).

As such, there is currently no generic, OS-independant and low-overhead
method for providing a backdoor communication channel from the guest
directly into QEMU.

As I see this is very tied to my setup, I'll move this patch series down
below the tracing series so that it does not interfere with the other
patches.

Lluis

-- 
 "And it's much the same thing with knowledge, for whenever you learn
 something new, the whole world becomes that much richer."
 -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
 Tollbooth



Re: [Qemu-devel] [Bug 671831] [NEW] Sparc guest assert error

2010-11-08 Thread Nigel Horne
Stefan,
> If it's not easy to share your disk image, could you please test this
> QEMU tree which backports the assert:
>
> http://repo.or.cz/w/qemu/stefanha.git/shortlog/refs/heads/scsi_assert
>
> You can grab the code like this:
> git clone -b scsi_assert git://repo.or.cz/qemu/stefanha.git
>
>
That retrieves a directory called scsi_assert which is empty.  Are you 
expecting that?  Should I therefore use the directory in there called 
stehana?

Regards,

-Nigel

-- 
Sparc guest assert error
https://bugs.launchpad.net/bugs/671831
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
The latest version in git (d33ea50a958b2e050d2b28e5f17e3b55e91c6d74) crashes 
with an assert error when booting a Sparc/Linux guest.

The last time I tried it (about a week ago) it worked fine.  Yesterdai, I did a 
git pull, make clean, reran configure and compiled.

Host OS: Debian Linux/x86_64 5.0
C Compiler: 4.4.5
Guest OS: Linux/Sparc (2.4)
Command Line: qemu-system-sparc -hda ~njh/qemu/sparc/debian.img -nographic -m 
128
Build Configure: ./configure --enable-linux-aio --enable-io-thread --enable-kvm
GIT commit: d33ea50a958b2e050d2b28e5f17e3b55e91c6d74

Output:

Adding Swap: 122532k swap-space (priority -1)
.
Will now check root file system:fsck 1.40-WIP (14-Nov-2006)
[/sbin/fsck.ext3 (1) -- /] fsck.ext3 -a -C0 /dev/sda2 
qemu-system-sparc: /home/njh/src/qemu/hw/scsi-disk.c:201: scsi_read_data: 
Assertion `r->req.aiocb == ((void *)0)' failed.


It crashes in the same place every time.

(gdb) thread apply all bt:

Thread 3 (Thread 17643):
#0  0x7f4db21bc8d3 in select () at ../sysdeps/unix/syscall-template.S:82
#1  0x004d02c4 in main_loop_wait (nonblocking=)
at /home/njh/src/qemu/vl.c:1246
#2  0x004d0e57 in main_loop (argc=, 
argv=, envp=)
at /home/njh/src/qemu/vl.c:1309
#3  main (argc=, argv=, 
envp=) at /home/njh/src/qemu/vl.c:2999

Thread 2 (Thread 17645):
#0  pthread_cond_timedwait@@GLIBC_2.3.2 ()
at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:211
#1  0x0042450b in cond_timedwait (unused=)
at posix-aio-compat.c:104
#2  aio_thread (unused=) at posix-aio-compat.c:325
#3  0x7f4db3b818ba in start_thread (arg=)
at pthread_create.c:300
#4  0x7f4db21c302d in clone ()
at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#5  0x in ?? ()
Current language:  auto
The current source language is "auto; currently asm".

Thread 1 (Thread 17644):
#0  0x7f4db2126165 in *__GI_raise (sig=)
at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
#1  0x7f4db2128f70 in *__GI_abort () at abort.c:92
#2  0x7f4db211f2b1 in *__GI___assert_fail (
assertion=0x52690a "r->req.aiocb == ((void *)0)", 
file=, line=201, function=0x527480 "scsi_read_data")
at assert.c:81
#3  0x0044f363 in scsi_read_data (d=, tag=0)
at /home/njh/src/qemu/hw/scsi-disk.c:201
#4  0x004ebd6c in esp_do_dma (s=0x20679d0)
at /home/njh/src/qemu/hw/esp.c:377
#5  0x004ec781 in handle_ti (opaque=0x20679d0, 
addr=, val=)
at /home/njh/src/qemu/hw/esp.c:443
#6  esp_mem_writeb (opaque=0x20679d0, addr=, 
val=) at /home/njh/src/qemu/hw/esp.c:595
#7  0x41b2d971 in ?? ()
#8  0x in ?? ()
#9  0x031ad000 in ?? ()
#10 0x000301adfa20 in ?? ()
#11 0x1007 in ?? ()
#12 0x7f4daf80e8a0 in ?? ()
#13 0x0001 in ?? ()
#14 0x in ?? ()





Re: [Qemu-devel] [PATCH 0/3] v4 Decouple block device removal from device removal

2010-11-08 Thread Ryan Harper
* Markus Armbruster  [2010-11-08 06:04]:
> "Michael S. Tsirkin"  writes:
> 
> > On Mon, Nov 08, 2010 at 11:32:01AM +0100, Markus Armbruster wrote:
> >> Ryan Harper  writes:
> >> 
> >> > * Markus Armbruster  [2010-11-06 04:19]:
> >> >> Ryan Harper  writes:
> >> >> 
> >> >> > * Markus Armbruster  [2010-11-05 11:11]:
> >> >> >> Ryan Harper  writes:
> >> >> >> 
> >> >> >> > * Markus Armbruster  [2010-11-05 08:28]:
> >> >> >> >> I'd be fine with any of these:
> >> >> >> >> 
> >> >> >> >> 1. A new command "device_disconnet ID" (or similar name) to 
> >> >> >> >> disconnect
> >> >> >> >>device ID from any host parts.  Nice touch: you don't have to 
> >> >> >> >> know
> >> >> >> >>about the device's host part(s) to disconnect it.  But it 
> >> >> >> >> might be
> >> >> >> >>more work than the other two.
> >> >> >> >
> >> >> >> > This is sort of what netdev_del() and drive_unplug() are today; 
> >> >> >> > we're
> >> >> >> > just saying sever the connection of this device id.   
> >> >> >> 
> >> >> >> No, I have netdev_del as (3).
> >> >> >> 
> >> >> >> All three options are "sort of" the same, just different commands 
> >> >> >> with
> >> >> >> a common purpose.
> >> >> >> 
> >> >> >> > I'd like to rename drive_unplug() to blockdev_del() and call it 
> >> >> >> > done.  I
> >> >> >> > was looking at libvirt and the right call to netdev_del is already
> >> >> >> > in-place; I'd just need to re-spin my block patch to call 
> >> >> >> > blockdev_del()
> >> >> >> > after invoking device_del() to match what is done for net.
> >> >> >> 
> >> >> >> Unless I'm missing something, you can't just rename: your unplug does
> >> >> >> not delete the host part.
> >> >> >> 
> >> >> >> >> 2. New commands netdev_disconnect, drive_disconnect (or similar 
> >> >> >> >> names)
> >> >> >> >>to disconnect a host part from a guest device.  Like (1), 
> >> >> >> >> except you
> >> >> >> >>have to point to the other end of the connection to cut it.
> >> >> >> >
> >> >> >> > What's the advantage here? We need an additional piece of info 
> >> >> >> > (host
> >> >> >> > part) in addition to the device id?
> >> >> >> 
> >> >> >> That's a disadvantage.
> >> >> >> 
> >> >> >> Possible advantage: implementation could be slightly easier than (1),
> >> >> >> because you don't have to find the host parts.
> >> >> >> 
> >> >> >> >> 3. A new command "drive_del ID" similar to existing netdev_del.  
> >> >> >> >> This is
> >> >> >> >>(2) fused with delete.  Conceptual wart: you can't disconnect 
> >> >> >> >> and
> >> >> >> >>keep the host part around.  Moreover, delete is slightly 
> >> >> >> >> dangerous,
> >> >> >> >>because it renders any guest device still using the host part
> >> >> >> >>useless.
> >> >> >> >
> >> >> >> > Hrm, I thought that's what (1) is.
> >> >> >> 
> >> >> >> No.
> >> >> >> 
> >> >> >> With (1), the argument is a *device* ID, and we disconnect *all* host
> >> >> >> parts connected to this device (typically just one).
> >> >> >> 
> >> >> >> With (3), the argument is a netdev/drive ID, and disconnect *this* 
> >> >> >> host
> >> >> >> part from the peer device.
> >> >> >> 
> >> >> >> > Well, either (1) or (3); I'd 
> >> >> >> > like to
> >> >> >> > rename drive_unplug() to blockdev_del() since they're similar 
> >> >> >> > function
> >> >> >> > w.r.t removing access to the host resource.  And we can invoke 
> >> >> >> > them in
> >> >> >> > the same way from libvirt (after doing guest notification, remove
> >> >> >> > access).
> >> >> >> 
> >> >> >> I'd call it drive_del for now, to match drive_add.
> >> >> >
> >> >> > OK, drive_del() and as you mentioned, drive_unplug will take out the
> >> >> > block driver, but doesn't remove the dinfo object; that ends up dying
> >> >> > when we call the device destructor.  I think for symmetry we'll want
> >> >> > drive_del to remove the dinfo object as well.
> >> >> 
> >> >> Exactly.
> >> >> 
> >> >> a. bdrv_detach() to zap the pointer from bdrv to qdev
> >> >> b. zap the pointer from qdev to bdrv
> >> >> c. drive_uninit() to dispose of the host part
> >> >
> >> > a-c need to be done to match netdev_del symmetry?  How hard of a req is
> >> > this?
> >> 
> >> Without (c), it's not a delete.  And (c) without (b) leaves a dangling
> >> pointer.  (c) without (a) fails an assertion in bdrv_delete().
> >> 
> >> Aside: (b) should probably be folded into bdrv_detach().
> >> 
> >> >> Step b could be awkward with (3), because you don't know device details.
> >> >> I guess you have to search device properties for a drive property
> >> >> pointing to bdrv.  I like (1) because it puts that loop in the one place
> >> >> where it belongs: qdev core.  (3) duplicates it in every HOSTDEV_del.
> >> >> Except for netdev_del, which is special because of VLANs.
> >> >> 
> >> >> To avoid step b, you could try to keep the bdrv around in a special
> >> >> zombie state.  Still have to free the dinfo, but can't use
> >> >> drive_uninit() for tha

Re: [Qemu-devel] [Bug 671831] [NEW] Sparc guest assert error

2010-11-08 Thread Nigel Horne
On 11/08/2010 05:42 AM, Stefan Hajnoczi wrote:
> Hi Nigel,
> Is there a disk image available to reproduce this bug?  I searched for
> Debian SPARC 2.4-based disk images but wasn't able to find one.
>
I got the image http://wiki.qemu.org/Download.  It was sometime ago and 
it may no longer be there - the image on that site now mentions Sparc 
2.6, I guess 2.4 has been removed.

I have no means for you to take a copy of my image.  Sorry.
> If it's not easy to share your disk image, could you please test this
> QEMU tree which backports the assert:
>
> http://repo.or.cz/w/qemu/stefanha.git/shortlog/refs/heads/scsi_assert
>

Will do - thanks for making it available.  It may take me a bit of time 
to get around to it, but I will do so as soon as I can.
> You can grab the code like this:
> git clone -b scsi_assert git://repo.or.cz/qemu/stefanha.git
>

-Nigel

-- 
Sparc guest assert error
https://bugs.launchpad.net/bugs/671831
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: New

Bug description:
The latest version in git (d33ea50a958b2e050d2b28e5f17e3b55e91c6d74) crashes 
with an assert error when booting a Sparc/Linux guest.

The last time I tried it (about a week ago) it worked fine.  Yesterdai, I did a 
git pull, make clean, reran configure and compiled.

Host OS: Debian Linux/x86_64 5.0
C Compiler: 4.4.5
Guest OS: Linux/Sparc (2.4)
Command Line: qemu-system-sparc -hda ~njh/qemu/sparc/debian.img -nographic -m 
128
Build Configure: ./configure --enable-linux-aio --enable-io-thread --enable-kvm
GIT commit: d33ea50a958b2e050d2b28e5f17e3b55e91c6d74

Output:

Adding Swap: 122532k swap-space (priority -1)
.
Will now check root file system:fsck 1.40-WIP (14-Nov-2006)
[/sbin/fsck.ext3 (1) -- /] fsck.ext3 -a -C0 /dev/sda2 
qemu-system-sparc: /home/njh/src/qemu/hw/scsi-disk.c:201: scsi_read_data: 
Assertion `r->req.aiocb == ((void *)0)' failed.


It crashes in the same place every time.

(gdb) thread apply all bt:

Thread 3 (Thread 17643):
#0  0x7f4db21bc8d3 in select () at ../sysdeps/unix/syscall-template.S:82
#1  0x004d02c4 in main_loop_wait (nonblocking=)
at /home/njh/src/qemu/vl.c:1246
#2  0x004d0e57 in main_loop (argc=, 
argv=, envp=)
at /home/njh/src/qemu/vl.c:1309
#3  main (argc=, argv=, 
envp=) at /home/njh/src/qemu/vl.c:2999

Thread 2 (Thread 17645):
#0  pthread_cond_timedwait@@GLIBC_2.3.2 ()
at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:211
#1  0x0042450b in cond_timedwait (unused=)
at posix-aio-compat.c:104
#2  aio_thread (unused=) at posix-aio-compat.c:325
#3  0x7f4db3b818ba in start_thread (arg=)
at pthread_create.c:300
#4  0x7f4db21c302d in clone ()
at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#5  0x in ?? ()
Current language:  auto
The current source language is "auto; currently asm".

Thread 1 (Thread 17644):
#0  0x7f4db2126165 in *__GI_raise (sig=)
at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
#1  0x7f4db2128f70 in *__GI_abort () at abort.c:92
#2  0x7f4db211f2b1 in *__GI___assert_fail (
assertion=0x52690a "r->req.aiocb == ((void *)0)", 
file=, line=201, function=0x527480 "scsi_read_data")
at assert.c:81
#3  0x0044f363 in scsi_read_data (d=, tag=0)
at /home/njh/src/qemu/hw/scsi-disk.c:201
#4  0x004ebd6c in esp_do_dma (s=0x20679d0)
at /home/njh/src/qemu/hw/esp.c:377
#5  0x004ec781 in handle_ti (opaque=0x20679d0, 
addr=, val=)
at /home/njh/src/qemu/hw/esp.c:443
#6  esp_mem_writeb (opaque=0x20679d0, addr=, 
val=) at /home/njh/src/qemu/hw/esp.c:595
#7  0x41b2d971 in ?? ()
#8  0x in ?? ()
#9  0x031ad000 in ?? ()
#10 0x000301adfa20 in ?? ()
#11 0x1007 in ?? ()
#12 0x7f4daf80e8a0 in ?? ()
#13 0x0001 in ?? ()
#14 0x in ?? ()





[Qemu-devel] [PATCH 1/4] intel-hda: exit cleanup

2010-11-08 Thread Gerd Hoffmann
Add pci exit callback for the intel-hda device and cleanup properly.
Also add an exit callback to the HDA bus implementation and make sure
it is called on qdev_free().

Signed-off-by: Gerd Hoffmann 
---
 hw/intel-hda.c |   20 
 hw/intel-hda.h |1 +
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index ccb059d..78c32da 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -61,9 +61,20 @@ static int hda_codec_dev_init(DeviceState *qdev, DeviceInfo 
*base)
 return info->init(dev);
 }
 
+static int hda_codec_dev_exit(DeviceState *qdev)
+{
+HDACodecDevice *dev = DO_UPCAST(HDACodecDevice, qdev, qdev);
+
+if (dev->info->exit) {
+dev->info->exit(dev);
+}
+return 0;
+}
+
 void hda_codec_register(HDACodecDeviceInfo *info)
 {
 info->qdev.init = hda_codec_dev_init;
+info->qdev.exit = hda_codec_dev_exit;
 info->qdev.bus_info = &hda_codec_bus_info;
 qdev_register(&info->qdev);
 }
@@ -1137,6 +1148,14 @@ static int intel_hda_init(PCIDevice *pci)
 return 0;
 }
 
+static int intel_hda_exit(PCIDevice *pci)
+{
+IntelHDAState *d = DO_UPCAST(IntelHDAState, pci, pci);
+
+cpu_unregister_io_memory(d->mmio_addr);
+return 0;
+}
+
 static int intel_hda_post_load(void *opaque, int version)
 {
 IntelHDAState* d = opaque;
@@ -1219,6 +1238,7 @@ static PCIDeviceInfo intel_hda_info = {
 .qdev.vmsd= &vmstate_intel_hda,
 .qdev.reset   = intel_hda_reset,
 .init = intel_hda_init,
+.exit = intel_hda_exit,
 .qdev.props   = (Property[]) {
 DEFINE_PROP_UINT32("debug", IntelHDAState, debug, 0),
 DEFINE_PROP_END_OF_LIST(),
diff --git a/hw/intel-hda.h b/hw/intel-hda.h
index ba290ec..4e44e38 100644
--- a/hw/intel-hda.h
+++ b/hw/intel-hda.h
@@ -32,6 +32,7 @@ struct HDACodecDevice {
 struct HDACodecDeviceInfo {
 DeviceInfo qdev;
 int (*init)(HDACodecDevice *dev);
+int (*exit)(HDACodecDevice *dev);
 void (*command)(HDACodecDevice *dev, uint32_t nid, uint32_t data);
 void (*stream)(HDACodecDevice *dev, uint32_t stnr, bool running);
 };
-- 
1.7.1




[Qemu-devel] [PATCH 2/4] hda-audio: exit cleanup

2010-11-08 Thread Gerd Hoffmann
Add exit callback to the driver.  Unregister the sound card properly
on exit.

Signed-off-by: Gerd Hoffmann 
---
 hw/hda-audio.c |   23 +++
 1 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/hw/hda-audio.c b/hw/hda-audio.c
index 1035774..5593c84 100644
--- a/hw/hda-audio.c
+++ b/hw/hda-audio.c
@@ -808,6 +808,27 @@ static int hda_audio_init(HDACodecDevice *hda, const 
struct desc_codec *desc)
 return 0;
 }
 
+static int hda_audio_exit(HDACodecDevice *hda)
+{
+HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+HDAAudioStream *st;
+int i;
+
+dprint(a, 1, "%s\n", __FUNCTION__);
+for (i = 0; i < ARRAY_SIZE(a->st); i++) {
+st = a->st + i;
+if (st->node == NULL)
+continue;
+if (st->output) {
+AUD_close_out(&a->card, st->voice.out);
+} else {
+AUD_close_in(&a->card, st->voice.in);
+}
+}
+AUD_remove_card(&a->card);
+return 0;
+}
+
 static int hda_audio_post_load(void *opaque, int version)
 {
 HDAAudioState *a = opaque;
@@ -879,6 +900,7 @@ static HDACodecDeviceInfo hda_audio_info_output = {
 .qdev.vmsd= &vmstate_hda_audio,
 .qdev.props   = hda_audio_properties,
 .init = hda_audio_init_output,
+.exit = hda_audio_exit,
 .command  = hda_audio_command,
 .stream   = hda_audio_stream,
 };
@@ -890,6 +912,7 @@ static HDACodecDeviceInfo hda_audio_info_duplex = {
 .qdev.vmsd= &vmstate_hda_audio,
 .qdev.props   = hda_audio_properties,
 .init = hda_audio_init_duplex,
+.exit = hda_audio_exit,
 .command  = hda_audio_command,
 .stream   = hda_audio_stream,
 };
-- 
1.7.1




[Qemu-devel] [PATCH 0/4] intel-hda: fixes

2010-11-08 Thread Gerd Hoffmann
  Hi,

This patch series brings a few fixes for the intel-hda driver.

please apply,
  Gerd

François Revol (1):
  intel-hda: Honor WAKEEN bits.

Gerd Hoffmann (3):
  intel-hda: exit cleanup
  hda-audio: exit cleanup
  intel-hda: update irq status on WAKEEN changes.

 hw/hda-audio.c |   23 +++
 hw/intel-hda.c |   29 -
 hw/intel-hda.h |1 +
 3 files changed, 52 insertions(+), 1 deletions(-)




[Qemu-devel] [PATCH 4/4] intel-hda: update irq status on WAKEEN changes.

2010-11-08 Thread Gerd Hoffmann
When the guest updates the WAKEEN register we
must re-calculate the IRQ status.

Signed-off-by: Gerd Hoffmann 
---
 hw/intel-hda.c |6 ++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index 2c1ef12..e478e67 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -508,6 +508,11 @@ static void intel_hda_set_g_ctl(IntelHDAState *d, const 
IntelHDAReg *reg, uint32
 }
 }
 
+static void intel_hda_set_wake_en(IntelHDAState *d, const IntelHDAReg *reg, 
uint32_t old)
+{
+intel_hda_update_irq(d);
+}
+
 static void intel_hda_set_state_sts(IntelHDAState *d, const IntelHDAReg *reg, 
uint32_t old)
 {
 intel_hda_update_irq(d);
@@ -630,6 +635,7 @@ static const struct IntelHDAReg regtab[] = {
 .size = 2,
 .wmask= 0x3fff,
 .offset   = offsetof(IntelHDAState, wake_en),
+.whandler = intel_hda_set_wake_en,
 },
 [ ICH6_REG_STATESTS ] = {
 .name = "STATESTS",
-- 
1.7.1




[Qemu-devel] [PATCH 3/4] intel-hda: Honor WAKEEN bits.

2010-11-08 Thread Gerd Hoffmann
From: François Revol 

HDA: Honor WAKEEN bits when deciding to raise an interrupt on codec
status change.  This prevents an interrupt storm with the Haiku HDA
driver which does not handle codec status changes in the irq handler.

Signed-off-by: François Revol 
Signed-off-by: Gerd Hoffmann 
---
 hw/intel-hda.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index 78c32da..2c1ef12 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -246,7 +246,7 @@ static void intel_hda_update_int_sts(IntelHDAState *d)
 if (d->rirb_sts & ICH6_RBSTS_OVERRUN) {
 sts |= (1 << 30);
 }
-if (d->state_sts) {
+if (d->state_sts & d->wake_en) {
 sts |= (1 << 30);
 }
 
@@ -628,6 +628,7 @@ static const struct IntelHDAReg regtab[] = {
 [ ICH6_REG_WAKEEN ] = {
 .name = "WAKEEN",
 .size = 2,
+.wmask= 0x3fff,
 .offset   = offsetof(IntelHDAState, wake_en),
 },
 [ ICH6_REG_STATESTS ] = {
-- 
1.7.1




Re: [Qemu-devel] [REFACTORED CODE] [PATCH 0/3] Threadlets: A generic task offloading framework.

2010-11-08 Thread Arun R Bharadwaj
* Arun R Bharadwaj  [2010-11-08 16:16:50]:

> Hi,
> 
> This is the v9 of the refactored patch-series to have a generic
> asynchronous task offloading framework (called threadlets)
> within qemu.
> 
> 

Testing carried out:

I have run KVM autotest suite with this patch. This test suite
ran successfully for the following tests:
connecthon, dbench, fstress, disktest, ebizzy, cpu_hotplug,
hackbench and sleeptest.

-arun
> Changes from earlier iteration:
> 
> * Code is refactored such that it reflects the following:
> patch1: In-place code changes in paio subsystem to use the
> generic threadlets infrastructure.
> patch2: Move the threadlets infrastructure to a new file -
> qemu-threadlets.c
> patch3: Add helper functions to enable virtio-9p make
> use of the threadlets
> 
> * Remove the infinite while loop in paio_cancel and use mutex
>   to wait for request to complete.
> 
> * Address the issue of dead code due to
>   MAX_GLOBAL_THREADS = MIN_GLOBAL_THREADS



[Qemu-devel] [Bug 544527] Re: usbfs is bugged with >2.6.32.9 and <=2.6.33 (breaks VMWare, Qemu, sane scanners, ...)

2010-11-08 Thread David Kühling
This is not fixed in Ubuntu 10.10 (i.e. 32-bit apps running on 64-bit
kernel get incorrectly truncated isochronous transfers).  Re-opened a
new bug report:

https://bugs.launchpad.net/ubuntu/+source/linux/+bug/672516

-- 
usbfs is bugged with >2.6.32.9 and <=2.6.33 (breaks VMWare, Qemu, sane 
scanners, ...)
https://bugs.launchpad.net/bugs/544527
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.

Status in QEMU: Fix Committed
Status in SANE-backends - Backends for SANE: Fix Committed
Status in Tv Time: Fix Committed
Status in Virtualbox: Fix Committed
Status in “linux” package in Ubuntu: Fix Committed

Bug description:
Binary package hint: tvtime

There's a problem with isochronous and usbfs, suse tried to improve usbfs but 
it end up that it broke usbfs.
For isochronous the entire packet needs to be copied and not only a part of it.

http://lkml.org/lkml/2010/2/26/490  (Report)
http://lkml.org/lkml/2010/2/27/226 (Bugfix)

please merge this bugfix asap.

ProblemType: Bug
Architecture: amd64
Date: Mon Mar 22 21:09:00 2010
DistroRelease: Ubuntu 10.04
LiveMediaBuild: Ubuntu 10.04 "Lucid Lynx" - Alpha amd64 (20100322)
Package: tvtime 1.0.2-5ubuntu2
ProcEnviron:
 LANG=de_DE.UTF-8
 SHELL=/bin/bash
ProcVersionSignature: Ubuntu 2.6.32-16.25-generic
SourcePackage: tvtime
Uname: Linux 2.6.32-16-generic x86_64





Re: [Qemu-devel] [PATCH 0/3] v4 Decouple block device removal from device removal

2010-11-08 Thread Markus Armbruster
"Michael S. Tsirkin"  writes:

> On Mon, Nov 08, 2010 at 11:32:01AM +0100, Markus Armbruster wrote:
>> Ryan Harper  writes:
>> 
>> > * Markus Armbruster  [2010-11-06 04:19]:
>> >> Ryan Harper  writes:
>> >> 
>> >> > * Markus Armbruster  [2010-11-05 11:11]:
>> >> >> Ryan Harper  writes:
>> >> >> 
>> >> >> > * Markus Armbruster  [2010-11-05 08:28]:
>> >> >> >> I'd be fine with any of these:
>> >> >> >> 
>> >> >> >> 1. A new command "device_disconnet ID" (or similar name) to 
>> >> >> >> disconnect
>> >> >> >>device ID from any host parts.  Nice touch: you don't have to 
>> >> >> >> know
>> >> >> >>about the device's host part(s) to disconnect it.  But it might 
>> >> >> >> be
>> >> >> >>more work than the other two.
>> >> >> >
>> >> >> > This is sort of what netdev_del() and drive_unplug() are today; we're
>> >> >> > just saying sever the connection of this device id.   
>> >> >> 
>> >> >> No, I have netdev_del as (3).
>> >> >> 
>> >> >> All three options are "sort of" the same, just different commands with
>> >> >> a common purpose.
>> >> >> 
>> >> >> > I'd like to rename drive_unplug() to blockdev_del() and call it 
>> >> >> > done.  I
>> >> >> > was looking at libvirt and the right call to netdev_del is already
>> >> >> > in-place; I'd just need to re-spin my block patch to call 
>> >> >> > blockdev_del()
>> >> >> > after invoking device_del() to match what is done for net.
>> >> >> 
>> >> >> Unless I'm missing something, you can't just rename: your unplug does
>> >> >> not delete the host part.
>> >> >> 
>> >> >> >> 2. New commands netdev_disconnect, drive_disconnect (or similar 
>> >> >> >> names)
>> >> >> >>to disconnect a host part from a guest device.  Like (1), except 
>> >> >> >> you
>> >> >> >>have to point to the other end of the connection to cut it.
>> >> >> >
>> >> >> > What's the advantage here? We need an additional piece of info (host
>> >> >> > part) in addition to the device id?
>> >> >> 
>> >> >> That's a disadvantage.
>> >> >> 
>> >> >> Possible advantage: implementation could be slightly easier than (1),
>> >> >> because you don't have to find the host parts.
>> >> >> 
>> >> >> >> 3. A new command "drive_del ID" similar to existing netdev_del.  
>> >> >> >> This is
>> >> >> >>(2) fused with delete.  Conceptual wart: you can't disconnect and
>> >> >> >>keep the host part around.  Moreover, delete is slightly 
>> >> >> >> dangerous,
>> >> >> >>because it renders any guest device still using the host part
>> >> >> >>useless.
>> >> >> >
>> >> >> > Hrm, I thought that's what (1) is.
>> >> >> 
>> >> >> No.
>> >> >> 
>> >> >> With (1), the argument is a *device* ID, and we disconnect *all* host
>> >> >> parts connected to this device (typically just one).
>> >> >> 
>> >> >> With (3), the argument is a netdev/drive ID, and disconnect *this* host
>> >> >> part from the peer device.
>> >> >> 
>> >> >> > Well, either (1) or (3); I'd 
>> >> >> > like to
>> >> >> > rename drive_unplug() to blockdev_del() since they're similar 
>> >> >> > function
>> >> >> > w.r.t removing access to the host resource.  And we can invoke them 
>> >> >> > in
>> >> >> > the same way from libvirt (after doing guest notification, remove
>> >> >> > access).
>> >> >> 
>> >> >> I'd call it drive_del for now, to match drive_add.
>> >> >
>> >> > OK, drive_del() and as you mentioned, drive_unplug will take out the
>> >> > block driver, but doesn't remove the dinfo object; that ends up dying
>> >> > when we call the device destructor.  I think for symmetry we'll want
>> >> > drive_del to remove the dinfo object as well.
>> >> 
>> >> Exactly.
>> >> 
>> >> a. bdrv_detach() to zap the pointer from bdrv to qdev
>> >> b. zap the pointer from qdev to bdrv
>> >> c. drive_uninit() to dispose of the host part
>> >
>> > a-c need to be done to match netdev_del symmetry?  How hard of a req is
>> > this?
>> 
>> Without (c), it's not a delete.  And (c) without (b) leaves a dangling
>> pointer.  (c) without (a) fails an assertion in bdrv_delete().
>> 
>> Aside: (b) should probably be folded into bdrv_detach().
>> 
>> >> Step b could be awkward with (3), because you don't know device details.
>> >> I guess you have to search device properties for a drive property
>> >> pointing to bdrv.  I like (1) because it puts that loop in the one place
>> >> where it belongs: qdev core.  (3) duplicates it in every HOSTDEV_del.
>> >> Except for netdev_del, which is special because of VLANs.
>> >> 
>> >> To avoid step b, you could try to keep the bdrv around in a special
>> >> zombie state.  Still have to free the dinfo, but can't use
>> >> drive_uninit() for that then.
>> >> 
>> >> If you think I'm overcomplicating this, feel free to prove me wrong with
>> >> working code :)
>> >
>> > drive_unplug() works as-is today; so it does feel very combursome at
>> > this point.  Other than the name change and agreement on how mgmt should
>> > invoke the command, it's been a long ride to get her

[Qemu-devel] Re: [PATCH 0/6] Save state error handling (kill off no_migrate)

2010-11-08 Thread Michael S. Tsirkin
On Wed, Oct 06, 2010 at 02:58:57PM -0600, Alex Williamson wrote:
> Our code paths for saving or migrating a VM are full of functions that
> return void, leaving no opportunity for a device to cancel a migration,
> either from error or incompatibility.  The ivshmem driver attempted to
> solve this with a no_migrate flag on the save state entry.  I think the
> more generic and flexible way to solve this is to allow driver save
> functions to fail.  This series implements that and converts ivshmem
> to uses a set_params function to NAK migration much earlier in the
> processes.  This touches a lot of files, but bulk of those changes are
> simply s/void/int/ and tacking a "return 0" to the end of functions.
> Thanks,
> 
> Alex

Well error handling is always tricky: it seems easier to
require save handlers to never fail.
So there's a bunch of code here but what exactly is the benefit?
Since save handlers have no idea what does the remote do,
what is the compatibility you mention?

> ---
> 
> Alex Williamson (6):
>   savevm: Remove register_device_unmigratable()
>   savevm: Allow set_params and save_live_state to error
>   virtio: Allow virtio_save() errors
>   pci: Allow pci_device_save() to return error
>   savevm: Allow vmsd->pre_save to return error
>   savevm: Allow SaveStateHandler() to return error
> 
> 
>  block-migration.c   |4 +-
>  hw/adb.c|8 +++-
>  hw/ads7846.c|4 +-
>  hw/arm_gic.c|4 +-
>  hw/arm_timer.c  |6 ++-
>  hw/armv7m_nvic.c|4 +-
>  hw/cuda.c   |4 +-
>  hw/fdc.c|3 +
>  hw/g364fb.c |4 +-
>  hw/grackle_pci.c|4 +-
>  hw/gt64xxx.c|4 +-
>  hw/heathrow_pic.c   |4 +-
>  hw/hpet.c   |3 +
>  hw/hw.h |   12 ++
>  hw/i2c.c|3 +
>  hw/ide/core.c   |4 +-
>  hw/ivshmem.c|   30 +++
>  hw/lsi53c895a.c |4 +-
>  hw/m48t59.c |4 +-
>  hw/mac_dbdma.c  |4 +-
>  hw/mac_nvram.c  |4 +-
>  hw/max111x.c|4 +-
>  hw/mipsnet.c|4 +-
>  hw/mst_fpga.c   |3 +
>  hw/nand.c   |3 +
>  hw/openpic.c|4 +-
>  hw/pci.c|9 +++-
>  hw/pci.h|2 -
>  hw/piix4.c  |4 +-
>  hw/pl011.c  |4 +-
>  hw/pl022.c  |4 +-
>  hw/pl061.c  |4 +-
>  hw/ppc4xx_pci.c |   11 -
>  hw/ppce500_pci.c|   11 -
>  hw/pxa2xx.c |   28 ++
>  hw/pxa2xx_dma.c |4 +-
>  hw/pxa2xx_gpio.c|4 +-
>  hw/pxa2xx_keypad.c  |3 +
>  hw/pxa2xx_lcd.c |4 +-
>  hw/pxa2xx_mmci.c|4 +-
>  hw/pxa2xx_pic.c |4 +-
>  hw/pxa2xx_timer.c   |4 +-
>  hw/rc4030.c |4 +-
>  hw/rtl8139.c|4 +-
>  hw/serial.c |3 +
>  hw/spitz.c  |   14 +--
>  hw/ssd0323.c|4 +-
>  hw/ssi-sd.c |4 +-
>  hw/stellaris.c  |   20 +++---
>  hw/stellaris_enet.c |4 +-
>  hw/stellaris_input.c|4 +-
>  hw/syborg_fb.c  |4 +-
>  hw/syborg_interrupt.c   |3 +
>  hw/syborg_keyboard.c|3 +
>  hw/syborg_pointer.c |3 +
>  hw/syborg_rtc.c |4 +-
>  hw/syborg_serial.c  |4 +-
>  hw/syborg_timer.c   |4 +-
>  hw/tsc2005.c|4 +-
>  hw/tsc210x.c|4 +-
>  hw/twl92230.c   |3 +
>  hw/unin_pci.c   |4 +-
>  hw/usb-uhci.c   |3 +
>  hw/virtio-balloon.c |9 +++-
>  hw/virtio-blk.c |   10 -
>  hw/virtio-net.c |   11 -
>  hw/virtio-pci.c |   10 -
>  hw/virtio-serial-bus.c  |   10 -
>  hw/virtio.c |   14 +--
>  hw/virtio.h |4 +-
>  hw/wm8750.c |3 +
>  hw/zaurus.c |4 +-
>  qemu-common.h   |2 -
>  savevm.c|   88 
> +++
>  slirp/slirp.c   |6 ++-
>  target-arm/machine.c|3 +
>  target-cris/machine.c   |3 +
>  target-i386/machine.c   |7 ++-
>  target-microblaze/machine.c |3 +
>  target-mips/machine.c   |3 +
>  target-ppc/machine.c|3 +
>  target-s390x/machine.c  |3 +
>  target-sparc/machine.c  |3 +
>  83 files changed, 365 insertions(+), 181 deletions(-)
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a mes

  1   2   >