date:20100324

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Avi Kivity


On 03/24/2010 10:32 PM, Anthony Liguori wrote:


So far, a libqemu.so with a flexible transport that could be used 
directly by a libvirt user (ala cairo/gdk type interactions) seems 
like the best solution to me.



libqemu.so would be a C API.  C is not the first choice for writing GUIs 
or management applications.  So it would need to be further wrapped.


We also need to allow qemu to control the display directly, without 
going through vnc.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

[Qemu-devel] [PATCH v3 1/1] Shared memory uio_pci driver

2010-03-24 Thread Cam Macdonell

This patch adds a driver for my shared memory PCI device using the uio_pci
interface.  The driver has three memory regions.  The first memory region is for
device registers for sending interrupts. The second BAR is for receiving MSI-X
interrupts and the third memory region maps the shared memory.  The device only
exports the first and third memory regions to userspace.

This driver supports MSI-X and regular pin interrupts.  Currently, the number of
MSI vectors is set to 4 which could be increased, but the driver will work with
fewer vectors.  If MSI is not available, then regular interrupts will be used.
---
 drivers/uio/Kconfig   |8 ++
 drivers/uio/Makefile  |1 +
 drivers/uio/uio_ivshmem.c |  235 +
 3 files changed, 244 insertions(+), 0 deletions(-)
 create mode 100644 drivers/uio/uio_ivshmem.c

diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
index 1da73ec..b92cded 100644
--- a/drivers/uio/Kconfig
+++ b/drivers/uio/Kconfig
@@ -74,6 +74,14 @@ config UIO_SERCOS3
 
  If you compile this as a module, it will be called uio_sercos3.
 
+config UIO_IVSHMEM
+   tristate "KVM shared memory PCI driver"
+   default n
+   help
+ Userspace I/O interface for the KVM shared memory device.  This
+ driver will make available two memory regions, the first is
+ registers and the second is a region for sharing between VMs.
+
 config UIO_PCI_GENERIC
tristate "Generic driver for PCI 2.3 and PCI Express cards"
depends on PCI
diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile
index 18fd818..25c1ca5 100644
--- a/drivers/uio/Makefile
+++ b/drivers/uio/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_UIO_AEC)   += uio_aec.o
 obj-$(CONFIG_UIO_SERCOS3)  += uio_sercos3.o
 obj-$(CONFIG_UIO_PCI_GENERIC)  += uio_pci_generic.o
 obj-$(CONFIG_UIO_NETX) += uio_netx.o
+obj-$(CONFIG_UIO_IVSHMEM) += uio_ivshmem.o
diff --git a/drivers/uio/uio_ivshmem.c b/drivers/uio/uio_ivshmem.c
new file mode 100644
index 000..607435b
--- /dev/null
+++ b/drivers/uio/uio_ivshmem.c
@@ -0,0 +1,235 @@
+/*
+ * UIO IVShmem Driver
+ *
+ * (C) 2009 Cam Macdonell
+ * based on Hilscher CIF card driver (C) 2007 Hans J. Koch 
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#define IntrStatus 0x04
+#define IntrMask 0x00
+
+struct ivshmem_info {
+struct uio_info *uio;
+struct pci_dev *dev;
+char (*msix_names)[256];
+struct msix_entry *msix_entries;
+int nvectors;
+};
+
+static irqreturn_t ivshmem_handler(int irq, struct uio_info *dev_info)
+{
+
+void __iomem *plx_intscr = dev_info->mem[0].internal_addr
++ IntrStatus;
+u32 val;
+
+val = readl(plx_intscr);
+if (val == 0)
+return IRQ_NONE;
+
+printk(KERN_INFO "Regular interrupt (val = %d)\n", val);
+return IRQ_HANDLED;
+}
+
+static irqreturn_t ivshmem_msix_handler(int irq, void *opaque)
+{
+
+struct uio_info * dev_info = (struct uio_info *) opaque;
+
+/* we have to do this explicitly when using MSI-X */
+uio_event_notify(dev_info);
+printk(KERN_INFO "MSI-X interrupt (%d)\n", irq);
+return IRQ_HANDLED;
+
+}
+
+static int request_msix_vectors(struct ivshmem_info *ivs_info, int nvectors)
+{
+int i, err;
+const char *name = "ivshmem";
+
+printk(KERN_INFO "devname is %s\n", name);
+ivs_info->nvectors = nvectors;
+
+
+ivs_info->msix_entries = kmalloc(nvectors * sizeof *ivs_info->msix_entries,
+GFP_KERNEL);
+ivs_info->msix_names = kmalloc(nvectors * sizeof *ivs_info->msix_names,
+GFP_KERNEL);
+
+for (i = 0; i < nvectors; ++i)
+ivs_info->msix_entries[i].entry = i;
+
+err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries,
+ivs_info->nvectors);
+if (err > 0) {
+ivs_info->nvectors = err; /* msi-x positive error code
+ returns the number available*/
+err = pci_enable_msix(ivs_info->dev, ivs_info->msix_entries,
+ivs_info->nvectors);
+if (err > 0) {
+printk(KERN_INFO "no MSI (%d). Back to INTx.\n", err);
+return -ENOSPC;
+}
+}
+
+printk(KERN_INFO "err is %d\n", err);
+if (err) return err;
+
+for (i = 0; i < ivs_info->nvectors; i++) {
+
+snprintf(ivs_info->msix_names[i], sizeof *ivs_info->msix_names,
+"%s-config", name);
+
+ivs_info->msix_entries[i].entry = i;
+err = request_irq(ivs_info->msix_entries[i].vector,
+ivshmem_msix_handler, 0,
+ivs_info->msix_names[i], ivs_info->uio);
+
+if (err) {
+return -ENOSPC;
+}
+}
+
+return 0;
+}
+
+static int __devinit ivshmem_pci_probe(struct pci_dev *dev,
+const struct pci_device_id *id)
+{
+struct uio_info *info;
+struct ivshmem_info * ivshmem_info;
+int nvectors = 4;
+
+inf

[Qemu-devel] [PATCH v3 2/2] Inter-VM shared memory PCI device

2010-03-24 Thread Cam Macdonell

Support an inter-vm shared memory device that maps a shared-memory object as a
PCI device in the guest.  This patch also supports interrupts between guest by
communicating over a unix domain socket.  This patch applies to the qemu-kvm
repository.

-device ivshmem,size=[,shm=]

Interrupts are supported between multiple VMs by using a shared memory server
by using a chardev socket.

-device ivshmem,size=[,shm=][,chardev=][,msi=on]
[,irqfd=on][,nvectors=n]
-chardev socket,path=,id=

Sample programs, init scripts and the shared memory server are available in a
git repo here:

www.gitorious.org/nahanni
---
 Makefile.target |3 +
 hw/ivshmem.c|  622 +++
 qemu-char.c |6 +
 qemu-char.h |3 +
 4 files changed, 634 insertions(+), 0 deletions(-)
 create mode 100644 hw/ivshmem.c

diff --git a/Makefile.target b/Makefile.target
index 4d88543..15edf19 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -219,6 +219,9 @@ obj-y += pcnet.o
 obj-y += rtl8139.o
 obj-y += e1000.o
 
+# Inter-VM PCI shared memory
+obj-y += ivshmem.o
+
 # Hardware support
 obj-i386-y = ide/core.o ide/qdev.o ide/isa.o ide/pci.o ide/piix.o
 obj-i386-y += pckbd.o $(sound-obj-y) dma.o
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
new file mode 100644
index 000..c76aae3
--- /dev/null
+++ b/hw/ivshmem.c
@@ -0,0 +1,622 @@
+/*
+ * Inter-VM Shared Memory PCI device.
+ *
+ * Author:
+ *  Cam Macdonell 
+ *
+ * Based On: cirrus_vga.c and rtl8139.c
+ *
+ * This code is licensed under the GNU GPL v2.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "hw.h"
+#include "console.h"
+#include "pc.h"
+#include "pci.h"
+#include "sysemu.h"
+
+#include "msix.h"
+#include "qemu-kvm.h"
+#include "libkvm.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#define PCI_COMMAND_IOACCESS0x0001
+#define PCI_COMMAND_MEMACCESS   0x0002
+
+#define DEBUG_IVSHMEM
+
+#define IVSHMEM_IRQFD   0
+#define IVSHMEM_MSI 1
+
+#ifdef DEBUG_IVSHMEM
+#define IVSHMEM_DPRINTF(fmt, args...)\
+do {printf("IVSHMEM: " fmt, ##args); } while (0)
+#else
+#define IVSHMEM_DPRINTF(fmt, args...)
+#endif
+
+#define NEW_GUEST_VAL UINT_MAX
+
+typedef struct IVShmemState {
+PCIDevice dev;
+uint32_t intrmask;
+uint32_t intrstatus;
+uint32_t doorbell;
+
+CharDriverState * chr;
+CharDriverState * eventfd_chr;
+int ivshmem_mmio_io_addr;
+
+pcibus_t mmio_addr;
+uint8_t *ivshmem_ptr;
+unsigned long ivshmem_offset;
+unsigned int ivshmem_size;
+int shm_fd; /* shared memory file descriptor */
+
+struct kvm_ioeventfd ioeventfds[16];
+int eventfds[16]; /* for now we have a limit of 16 inter-connected guests 
*/
+int eventfd_posn;
+int num_eventfds;
+uint32_t vectors;
+uint32_t features;
+
+char * shmobj;
+uint32_t size; /*size of shared memory in MB*/
+} IVShmemState;
+
+/* registers for the Inter-VM shared memory device */
+enum ivshmem_registers {
+IntrMask = 0,
+IntrStatus = 4,
+IVPosition = 8,
+Doorbell = 12,
+};
+
+static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, int feature) {
+return (ivs->features & (1 << feature));
+}
+
+static inline int is_power_of_two(int x) {return (x & (x-1)) == 0;}
+
+static void ivshmem_map(PCIDevice *pci_dev, int region_num,
+pcibus_t addr, pcibus_t size, int type)
+{
+IVShmemState *s = DO_UPCAST(IVShmemState, dev, pci_dev);
+
+IVSHMEM_DPRINTF("addr = %u size = %u\n", (uint32_t)addr, (uint32_t)size);
+cpu_register_physical_memory(addr, s->ivshmem_size, s->ivshmem_offset);
+
+}
+
+/* accessing registers - based on rtl8139 */
+static void ivshmem_update_irq(IVShmemState *s, int val)
+{
+int isr;
+isr = (s->intrstatus & s->intrmask) & 0x;
+
+/* don't print ISR resets */
+if (isr) {
+IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
+   isr ? 1 : 0, s->intrstatus, s->intrmask);
+}
+
+qemu_set_irq(s->dev.irq[0], (isr != 0));
+}
+
+static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
+{
+IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
+
+s->intrmask = val;
+
+ivshmem_update_irq(s, val);
+}
+
+static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
+{
+uint32_t ret = s->intrmask;
+
+IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
+
+return ret;
+}
+
+static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
+{
+IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
+
+s->intrstatus = val;
+
+ivshmem_update_irq(s, val);
+return;
+}
+
+static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
+{
+uint32_t ret = s->intrstatus;
+
+/* reading ISR clears all interrupts */
+s->intrstatus = 0;
+
+ivshmem_update_irq(s, 0);
+
+return ret;
+}
+
+static void ivshmem_io_writew(void *opaque, uint8_t addr, uint32_t val)
+{
+
+IVSHMEM_DP

[Qemu-devel] [PATCH v3 0/2] Inter-VM shared memory PCI device

2010-03-24 Thread Cam Macdonell

Support an inter-vm shared memory device that maps a shared-memory object
as a PCI device in the guest.  This patch also supports interrupts between
guest by communicating over a unix domain socket.  This patch applies to the
qemu-kvm repository.

Changes in this version are using the qdev format and optional use of MSI and
ioeventfd/irqfd.

The non-interrupt version is supported by passing the shm parameter

-device ivshmem,size=,[shm=]

which will simply map the shm object into a BAR.

Interrupts are supported between multiple VMs by using a shared memory server
that is connected to with a socket character device

-device ivshmem,size=[,chardev=][,irqfd=on]
[,msi=on][,nvectors=n]
-chardev socket,path=,id=

The server passes file descriptors for the shared memory object and eventfds 
(our
interrupt mechanism) to the respective qemu instances.

When using interrupts, VMs communicate with a shared memory server that passes
the shared memory object file descriptor using SCM_RIGHTS.  The server assigns
each VM an ID number and sends this ID number to the Qemu process along with a
series of eventfd file descriptors, one per guest using the shared memory
server.  These eventfds will be used to send interrupts between guests.  Each
guest listens on the eventfd corresponding to their ID and may use the others
for sending interrupts to other guests.

enum ivshmem_registers {
IntrMask = 0,
IntrStatus = 4,
IVPosition = 8,
Doorbell = 12
};

The first two registers are the interrupt mask and status registers.  Mask and
status are only used with pin-based interrupts.  They are unused with MSI
interrupts.  The IVPosition register is read-only and reports the guest's ID
number.  Interrupts are triggered when a message is received on the guest's
eventfd from another VM.  To trigger an event, a guest must write to another
guest's Doorbell.  The "Doorbells" begin at offset 12.  A particular guest's
doorbell offset in the MMIO region is equal to

guest_id * 32 + Doorbell

The doorbell register for each guest is 32-bits.  The doorbell-per-guest
design was motivated for use with ioeventfd.

The semantics of the value written to the doorbell depends on whether the
device is using MSI or a regular pin-based interrupt.

Regular Interrupts
--

If regular interrupts are used (due to either a guest not supporting MSI or the
user specifying not to use them on the command-line) then the value written to
a guest's doorbell is what the guest's status register will be set to.

An status of (2^32 - 1) indicates that a new guest has joined.  Guests
should not send a message of this value for any other reason.

Message Signalled Interrupts


The important thing to remember with MSI is that it is only a signal, no
status is set (since MSI interrupts are not shared).  All information other
than the interrupt itself should be communicated via the shared memory region.
MSI is on by default.  It can be turned off with the msi=off to the parameter.

If the device uses MSI then the value written to the doorbell is the MSI vector
that will be raised.  Vector 0 is used to notify that a new guest has joined.
Vector 0 cannot be triggered by another guest since a value of 0 does not
trigger an eventfd.

ioeventfd/irqfd
---

ioeventfd/irqfd is turned on by irqfd=on passed to the device parameter (it is
off by default).  When using ioeventfd/irqfd the only interrupt value that can
be passed to another guest is 1 despite what value is written to a guest's
Doorbell.

Sample programs, init scripts and the shared memory server are available in a
git repo here:

www.gitorious.org/nahanni

Cam Macdonell (2):
  Support adding a file to qemu's ram allocation
  Inter-VM shared memory PCI device

 Makefile.target |3 +
 cpu-common.h|1 +
 exec.c  |   33 +++
 hw/ivshmem.c|  622 +++
 qemu-char.c |6 +
 qemu-char.h |3 +
 6 files changed, 668 insertions(+), 0 deletions(-)
 create mode 100644 hw/ivshmem.c

[Qemu-devel] [PATCH v3 1/2] Support adding a file to qemu's ram allocation

2010-03-24 Thread Cam Macdonell

This avoids the need of using qemu_ram_alloc and mmap with MAP_FIXED to map a
host file into guest RAM.  This function mmaps the opened file anywhere and adds
the memory to the ram blocks.

Usage is

qemu_ram_mmap(fd, size, MAP_SHARED, offset);
---
 cpu-common.h |1 +
 exec.c   |   33 +
 2 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/cpu-common.h b/cpu-common.h
index 6cae15b..dffe4e7 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -32,6 +32,7 @@ static inline void 
cpu_register_physical_memory(target_phys_addr_t start_addr,
 }
 
 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr);
+ram_addr_t qemu_ram_mmap(int, ram_addr_t, int, int);
 ram_addr_t qemu_ram_alloc(ram_addr_t);
 void qemu_ram_free(ram_addr_t addr);
 /* This should only be used for ram local to a device.  */
diff --git a/exec.c b/exec.c
index 3b4426e..6f4e747 100644
--- a/exec.c
+++ b/exec.c
@@ -2727,6 +2727,39 @@ static void *file_ram_alloc(ram_addr_t memory, const 
char *path)
 }
 #endif
 
+ram_addr_t qemu_ram_mmap(int fd, ram_addr_t size, int flags, int offset)
+{
+RAMBlock *new_block;
+
+size = TARGET_PAGE_ALIGN(size);
+new_block = qemu_malloc(sizeof(*new_block));
+
+// map the file passed as a parameter to be this part of memory
+new_block->host = mmap(0, size, PROT_READ|PROT_WRITE, flags, fd, offset);
+
+#ifdef MADV_MERGEABLE
+madvise(new_block->host, size, MADV_MERGEABLE);
+#endif
+
+new_block->offset = last_ram_offset;
+new_block->length = size;
+
+new_block->next = ram_blocks;
+ram_blocks = new_block;
+
+phys_ram_dirty = qemu_realloc(phys_ram_dirty,
+(last_ram_offset + size) >> TARGET_PAGE_BITS);
+memset(phys_ram_dirty + (last_ram_offset >> TARGET_PAGE_BITS),
+   0xff, size >> TARGET_PAGE_BITS);
+
+last_ram_offset += size;
+
+if (kvm_enabled())
+kvm_setup_guest_memory(new_block->host, size);
+
+return new_block->offset;
+}
+
 ram_addr_t qemu_ram_alloc(ram_addr_t size)
 {
 RAMBlock *new_block;
-- 
1.6.6.1

[Qemu-devel] [PATCH 4/4] Add virtio disk identification support

2010-03-24 Thread john cooper

Return serial string to the guest application via
ioctl driver call.

Note this form of interface to the guest userland
was the consensus when the prior version using
the ATA_IDENTIFY came under dispute.

Signed-off-by: john cooper 
---

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index cd66806..0954193 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -225,6 +225,21 @@ static int virtblk_ioctl(struct block_device *bdev, 
fmode_t mode,
struct gendisk *disk = bdev->bd_disk;
struct virtio_blk *vblk = disk->private_data;
 
+   if (cmd == 'VBID') {
+   void *usr_data = (void __user *)data;
+   char *id_str;
+   int err;
+
+   if (!(id_str = kmalloc(VIRTIO_BLK_ID_BYTES, GFP_KERNEL)))
+   err = -ENOMEM;
+   else if ((err = virtblk_get_id(disk, id_str)))
+   ;
+   else if (copy_to_user(usr_data, id_str, VIRTIO_BLK_ID_BYTES))
+   err = -EFAULT;
+   if (id_str)
+   kfree(id_str);
+   return err;
+   }
/*
 * Only allow the generic SCSI ioctls if the host can support it.
 */
-- 
john.coo...@redhat.com

[Qemu-devel] [PATCH 3/4] Add virtio disk identification support

2010-03-24 Thread john cooper

Add virtio-blk device id (s/n) support via virtio request.

Signed-off-by: john cooper 
---

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 3c64af0..cd66806 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -69,6 +69,8 @@ static void blk_done(struct virtqueue *vq)
vbr->req->sense_len = vbr->in_hdr.sense_len;
vbr->req->errors = vbr->in_hdr.errors;
}
+   if (blk_special_request(vbr->req))
+   vbr->req->errors = (error != 0);
 
__blk_end_request_all(vbr->req, error);
list_del(&vbr->list);
@@ -102,6 +104,11 @@ static bool do_req(struct request_queue *q, struct 
virtio_blk *vblk,
vbr->out_hdr.sector = 0;
vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
break;
+   case REQ_TYPE_SPECIAL:
+   vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
+   vbr->out_hdr.sector = 0;
+   vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+   break;
case REQ_TYPE_LINUX_BLOCK:
if (req->cmd[0] == REQ_LB_OP_FLUSH) {
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
@@ -188,6 +195,30 @@ static void virtblk_prepare_flush(struct request_queue *q, 
struct request *req)
req->cmd[0] = REQ_LB_OP_FLUSH;
 }
 
+/* return id (s/n) string for *disk to *id_str
+ */
+static int virtblk_get_id(struct gendisk *disk, char *id_str)
+{
+   struct virtio_blk *vblk = disk->private_data;
+   struct request *req;
+   struct bio *bio;
+
+   bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
+  GFP_KERNEL);
+   if (IS_ERR(bio)) {
+   return PTR_ERR(bio);
+   }
+
+   req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
+   if (IS_ERR(req)) {
+   bio_put(bio);
+   return PTR_ERR(req);
+   }
+
+   req->cmd_type = REQ_TYPE_SPECIAL;
+   return blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
+}
+
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 unsigned cmd, unsigned long data)
 {
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index e52029e..167720d 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -17,6 +17,8 @@
 #define VIRTIO_BLK_F_FLUSH 9   /* Cache flush command support */
 #define VIRTIO_BLK_F_TOPOLOGY  10  /* Topology information is available */
 
+#define VIRTIO_BLK_ID_BYTES20  /* ID string length */
+
 struct virtio_blk_config {
/* The capacity (in 512-byte sectors). */
__u64 capacity;
@@ -67,6 +69,9 @@ struct virtio_blk_config {
 /* Cache flush command */
 #define VIRTIO_BLK_T_FLUSH 4
 
+/* Get device ID command */
+#define VIRTIO_BLK_T_GET_ID8
+
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER   0x8000

-- 
john.coo...@redhat.com

[Qemu-devel] [PATCH 2/4] Add virtio disk identification support

2010-03-24 Thread john cooper

Fix bug which truncated serial string to 8 bytes, nul terminate.

Signed-off-by: john cooper 
---

diff --git a/vl.c b/vl.c
index d69250c..b74cbba 100644
--- a/vl.c
+++ b/vl.c
@@ -1162,7 +1162,7 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque,
 dinfo->on_write_error = on_write_error;
 dinfo->opts = opts;
 if (serial)
-strncpy(dinfo->serial, serial, sizeof(serial));
+strncpy(dinfo->serial, serial, sizeof(dinfo->serial) - 1);
 QTAILQ_INSERT_TAIL(&drives, dinfo, next);
 
 switch(type) {

-- 
john.coo...@redhat.com

[Qemu-devel] [PATCH 1/4] Add virtio disk identification support

2010-03-24 Thread john cooper

Add virtio-blk device id (s/n) support via virtio request.
Remove artifacts of pci and ATA_IDENTIFY implementation
relative to prior versions.

Signed-off-by: john cooper 
---

diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 9915840..358b0af 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -19,6 +19,8 @@
 # include 
 #endif
 
+#define min(a,b) ((a) < (b) ? (a) : (b))
+
 typedef struct VirtIOBlock
 {
 VirtIODevice vdev;
@@ -28,6 +30,7 @@ typedef struct VirtIOBlock
 QEMUBH *bh;
 BlockConf *conf;
 unsigned short sector_mask;
+char sn[BLOCK_SERIAL_STRLEN];
 } VirtIOBlock;
 
 static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
@@ -317,6 +320,12 @@ static void virtio_blk_handle_request(VirtIOBlockReq *req,
 virtio_blk_handle_flush(req);
 } else if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
 virtio_blk_handle_scsi(req);
+} else if (req->out->type & VIRTIO_BLK_T_GET_ID) {
+VirtIOBlock *s = req->dev;
+
+memcpy(req->elem.in_sg[0].iov_base, s->sn,
+   min(req->elem.in_sg[0].iov_len, sizeof(s->sn)));
+virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
 } else if (req->out->type & VIRTIO_BLK_T_OUT) {
 qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
  req->elem.out_num - 1);
@@ -496,6 +505,8 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf 
*conf)
 bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
 bdrv_set_geometry_hint(s->bs, cylinders, heads, secs);
 
+strncpy(s->sn, drive_get_serial(s->bs), sizeof (s->sn));
+
 s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
 
 qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
index 7a7ece3..fff46da 100644
--- a/hw/virtio-blk.h
+++ b/hw/virtio-blk.h
@@ -59,6 +59,9 @@ struct virtio_blk_config
 /* Flush the volatile write cache */
 #define VIRTIO_BLK_T_FLUSH  4
 
+/* return the device ID string */
+#define VIRTIO_BLK_T_GET_ID 8
+
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER0x8000

-- 
john.coo...@redhat.com

[Qemu-devel] [PATCH 0/4] Add virtio disk identification support

2010-03-24 Thread john cooper

This series adds the minimal support to qemu and virtio_blk
to support passing of a virtio_blk serial id string from qemu
through the guest driver and to the guest userland.

This is derived in part from a patch set posted by Rusty some
time ago, but has been minimized to remove support for prior
versions which attempted to provide the same functionality via
pci config/io space.  This version rather uses a virtio request
as proposed in Rusty's example.

Also removed is the packaging of the serial/id string within
the glorious bag of bits returned by the ATA_IDENTIFY command.
Here we transfer only the 20 bytes of serial/id string from
qemu to the guest userland.  In the proposed interface, this
is made available by an ioctl() into the virtio_blk driver
however other interfaces (eg: /sys) have also been proposed.
A code snippet is attached below as an example of ioctl usage.

The resulting code is quite minimal and I believe it addresses
all concerns raised in prior versions.

-john



#include 
#include 
#include 
#include 
#include 

#define IOCTL_CMD   'VBID'

main()
{
int fd, rv;
char buf[512];

bzero(buf, sizeof (buf));
if ((fd = open("/dev/vda", O_RDONLY)) < 0)
perror("open");
else if (ioctl(fd, IOCTL_CMD, buf) < 0)
perror("ioctl");
else
printf("[%s]\n", buf);
}

-- 
john.coo...@redhat.com

Re: [Qemu-devel] virtio block device and sysfs

2010-03-24 Thread john cooper

john cooper wrote:
> Marc Haber wrote:
>> Hi,
>>
>> On Mon, Mar 22, 2010 at 02:26:11AM -0400, john cooper wrote:
>>> I could have swore I sent out a guest-driver-app-interface-less
>>> version of the patch using virtio to pass the S/N but didn't find it in
>>> the archives.  I did however locate it and can bring it forward as a
>>> reference for the above if interest exists.
>>
>> If it brings the issue forward and gives me hope to be able to do what
>> I want to do in a reasonable time frame, why not.
> 
> Just time.  But I'll resurrect the patch so we don't have to go through
> all of this again.

I found the old patch, tossed out the remaining pci and
ATA_IDENTIFY artifacts and brought it forward.  The result
is downright diminutive.

Patches and details follow.

-john
  
-- 
john.coo...@redhat.com

[Qemu-devel] Re: [PATCH 10/15] virtio-serial: Add QMP events for failed port/device add

2010-03-24 Thread Amit Shah

On (Wed) Mar 24 2010 [17:34:15], Luiz Capitulino wrote:
> On Wed, 24 Mar 2010 20:19:28 +0530
> Amit Shah  wrote:
> 
> > When adding a port or a device to the guest fails, management software
> > might be interested in knowing and then cleaning up the host-side of the
> > port. Introduce QMP events to signal such errors.
> 
>  I'm completely unfamiliar with virtio-serial, so let me ask: how are ports
> added? I'd expect the command performing this operation to fail in this case.

If adding the port fails in qemu, then the command will fail. However if
adding the port in the guest fails, we raise this QMP event. Adding in
the guest could fail because of several reasons, like ENOMEM. In this
case, the mgmt might want to hot-unplug the port from qemu so that
resources are freed and also apps are notified of guest side not ready.

Amit

[Qemu-devel] about cpu_register_physical_memory_offset

2010-03-24 Thread Michael Qiu

Hi all,
  I'm really confused by the code in this function.
Can anyone tell me the exact meaning for the arguments used for this function?
target_phys_addr_t start_addr
ram_addr_t size
ram_addr_t phys_offset
ram_addr_t region_offset

Best regards

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Jamie Lokier

Anthony Liguori wrote:
> It's a statement of correctness really.  Devices should never deal with 
> target_phys_addr_t's.

Shouldn't they?  On real hardware, 64-bit PCI devices switch to being
32-bit PCI when plugged into a 32-bit motherboard slot.

> The question is, should a pci_addr_t or a sysbus_addr_t be 64 bit or 
> should it be 32-bit on 32-bit platforms.

Depends what you want to emulate.  It's not an accurate emulation if
all the old PCI devices provide 64-bit BARs; that could conceivably
bite some old OS, which expects NE2000-PCI to be a 32-bit device, for example.

And it's not a repeatable emulation if switching beteen 32-bit and
64-bit hosts means the guest sees a change in the PCI devices.  It
should be possible to change hosts with qemu willy nilly with zero
change seen be the guest.

So perhaps the width of pci_addr_t should be a per-device property,
not a host property?

> Honestly, I am extremely sceptical that there would be any
> measurable performance difference.

You may be right, but surely the way to find out is to have a way to
build either, and then compare them.  Not have it dictated by the build system.

64-bit ops on 32-bit hosts, especially x86 due to register pressure,
are noticably more expensive than 32-bit ops.  The question is whether
they are sparse enough among all the other logic that it doesn't matter.

With a bit of make cleverness it should be quite easy to support a mix
of build-once files and build-few-times files according to the minimal
compile time variations those files depend on - and express those
dependencies is a simple, one-liner way.  GNU Make is good for that
sort of thing.

- Jamie

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Jamie Lokier

Juan Quintela wrote:
> Blue Swirl  wrote:
> > Hi,
> >
> > Here's some planning for getting most files compiled as few times as
> > possible. Comments and suggestions are welcome.
> 
> I took some thought about this at some point.  Problems here start from
> "Recursive Makefile condered Harmful (tm)".

Anyone who remembers the old Linux kernel recursive make compared
with the current "dancing makefile" magic will appreciate the big leap
forward in usability.

-- Jamie

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Jamie Lokier

Anthony Liguori wrote:
> But the advantage is that if libvirt provided an API for a QMP transport 
> encapsulated in their secure protocol, then provided the plumbed that 
> API through their Python interface, you could use it for free in Python 
> without having to reinvent the wheel.

It's not free if the only "free" way to access all qemu's capabilities
from Python requires you to switch all your config files to libvirt's
format and libvirt's way of doing things. There's quite a big jump
from the qemu/kvm way of doing things and the libvirt way, and the
latter isn't well matched to all uses of qemu.

But if libvirt exposes the same QMP as direct to qemu, or something
very similar (it could wrap it, and add it's own libvirt events,
commands and properties), that would be great for scripts that could
then work with either with minimal change.

-- Jamie

RE: [Qemu-devel] Re: KVM call agenda for Mar 23

2010-03-24 Thread Zhang, Xiantao

Jes Sorensen wrote:
> On 03/23/10 13:45, Anthony Liguori wrote:
>> I don't think we can pull in:
>> 
>> - extboot
>> - ia64
>> - in-kernel pit[1]
>> - associated command line options
>> - device passthrough
>> 
>> The question is, if we dropped those things, would people actually
>> use qemu.git instead of qemu-kvm.git. If the answer is "no", what
>> set of things do we need in order for people to focus on qemu.git
>> instead of qemu-kvm.git.
> 
> I am not sure if anyone is still actively working on ia64. According
> to the qemu-kvm.git logs, there hasn't been any real ia64 changes to
> the code since my last commit in June of last year and then a couple
> of minor configure bits.
> 
> IMHO we can just let it rot - not sure if Xiantao is still interested?
For ia64 part, maybe we can keep the current qemu-kvm.git for the users. And it 
is not a must to push it into Qemu upstream. 
Xiantao

[Qemu-devel] [PATCH 08/10] target-alpha: Emit goto_tb opcodes.

2010-03-24 Thread Richard Henderson

Use an ExitStatus enumeration instead of magic numbers as the return
value from translate_one.  Emit goto_tb opcodes when ending a TB via
a direct branch.

Signed-off-by: Richard Henderson 
---
 target-alpha/translate.c |  339 ++
 1 files changed, 193 insertions(+), 146 deletions(-)

diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 8e98b15..fe693b3 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -43,12 +43,13 @@
 
 typedef struct DisasContext DisasContext;
 struct DisasContext {
+struct TranslationBlock *tb;
+CPUAlphaState *env;
 uint64_t pc;
 int mem_idx;
 #if !defined (CONFIG_USER_ONLY)
 int pal_mode;
 #endif
-CPUAlphaState *env;
 uint32_t amask;
 
 /* Current rounding mode for this TB.  */
@@ -57,6 +58,25 @@ struct DisasContext {
 int tb_ftz;
 };
 
+/* Return values from translate_one, indicating the state of the TB.
+   Note that zero indicates that we are not exiting the TB.  */
+
+typedef enum {
+NO_EXIT,
+
+/* We have emitted one or more goto_tb.  No fixup required.  */
+EXIT_GOTO_TB,
+
+/* We are not using a goto_tb (for whatever reason), but have updated
+   the PC (for whatever reason), so there's no need to do it again on
+   exiting the TB.  */
+EXIT_PC_UPDATED,
+
+/* We are exiting the TB, but have neither emitted a goto_tb, nor
+   updated the PC for the next instruction to be executed.  */
+EXIT_PC_STALE
+} ExitStatus;
+
 /* global register indexes */
 static TCGv_ptr cpu_env;
 static TCGv cpu_ir[31];
@@ -300,77 +320,126 @@ static inline void gen_store_mem(DisasContext *ctx,
 tcg_temp_free(addr);
 }
 
-static void gen_bcond_pcload(DisasContext *ctx, int32_t disp, int lab_true)
+static int use_goto_tb(DisasContext *ctx, uint64_t dest)
 {
-int lab_over = gen_new_label();
+/* Check for the dest on the same page as the start of the TB.  We
+   also want to suppress goto_tb in the case of single-steping and IO.  */
+return (((ctx->tb->pc ^ dest) & TARGET_PAGE_MASK) == 0
+&& !ctx->env->singlestep_enabled
+&& !(ctx->tb->cflags & CF_LAST_IO));
+}
 
-tcg_gen_movi_i64(cpu_pc, ctx->pc);
-tcg_gen_br(lab_over);
-gen_set_label(lab_true);
-tcg_gen_movi_i64(cpu_pc, ctx->pc + (int64_t)(disp << 2));
-gen_set_label(lab_over);
+static ExitStatus gen_bdirect(DisasContext *ctx, int ra, int32_t disp)
+{
+uint64_t dest = ctx->pc + (disp << 2);
+
+if (ra != 31) {
+tcg_gen_movi_i64(cpu_ir[ra], ctx->pc);
+}
+
+/* Notice branch-to-next; used to initialize RA with the PC.  */
+if (disp == 0) {
+return 0;
+} else if (use_goto_tb(ctx, dest)) {
+tcg_gen_goto_tb(0);
+tcg_gen_movi_i64(cpu_pc, dest);
+tcg_gen_exit_tb((long)ctx->tb);
+return EXIT_GOTO_TB;
+} else {
+tcg_gen_movi_i64(cpu_pc, dest);
+return EXIT_PC_UPDATED;
+}
 }
 
-static void gen_bcond(DisasContext *ctx, TCGCond cond, int ra,
-  int32_t disp, int mask)
+static ExitStatus gen_bcond_internal(DisasContext *ctx, TCGCond cond,
+ TCGv cmp, int32_t disp)
 {
+uint64_t dest = ctx->pc + (disp << 2);
 int lab_true = gen_new_label();
 
-if (likely(ra != 31)) {
+if (use_goto_tb(ctx, dest)) {
+tcg_gen_brcondi_i64(cond, cmp, 0, lab_true);
+
+tcg_gen_goto_tb(0);
+tcg_gen_movi_i64(cpu_pc, ctx->pc);
+tcg_gen_exit_tb((long)ctx->tb);
+
+gen_set_label(lab_true);
+tcg_gen_goto_tb(1);
+tcg_gen_movi_i64(cpu_pc, dest);
+tcg_gen_exit_tb((long)ctx->tb + 1);
+
+return EXIT_GOTO_TB;
+} else {
+int lab_over = gen_new_label();
+
+/* ??? Consider using either
+ movi pc, next
+ addi tmp, pc, disp
+ movcond pc, cond, 0, tmp, pc
+   or
+ setcond tmp, cond, 0
+ movi pc, next
+ neg tmp, tmp
+ andi tmp, tmp, disp
+ add pc, pc, tmp
+   The current diamond subgraph surely isn't efficient.  */
+
+tcg_gen_brcondi_i64(cond, cmp, 0, lab_true);
+tcg_gen_movi_i64(cpu_pc, ctx->pc);
+tcg_gen_br(lab_over);
+gen_set_label(lab_true);
+tcg_gen_movi_i64(cpu_pc, dest);
+gen_set_label(lab_over);
+
+return EXIT_PC_UPDATED;
+}
+}
+
+static ExitStatus gen_bcond(DisasContext *ctx, TCGCond cond, int ra,
+int32_t disp, int mask)
+{
+TCGv cmp_tmp;
+
+if (unlikely(ra == 31)) {
+cmp_tmp = tcg_const_i64(0);
+} else {
+cmp_tmp = tcg_temp_new();
 if (mask) {
-TCGv tmp = tcg_temp_new();
-tcg_gen_andi_i64(tmp, cpu_ir[ra], 1);
-tcg_gen_brcondi_i64(cond, tmp, 0, lab_true);
-tcg_temp_free(tmp);
+tcg_gen_andi_i64(cmp_tmp, cpu_ir[ra], 1);
 } else {
-

[Qemu-devel] [PATCH 10/10] target-alpha: Enable NPTL.

2010-03-24 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 configure|1 +
 linux-user/syscall.c |2 +-
 target-alpha/cpu.h   |   28 +---
 3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/configure b/configure
index 6bc40a3..6b50b6a 100755
--- a/configure
+++ b/configure
@@ -2368,6 +2368,7 @@ case "$target_arch2" in
   ;;
   alpha)
 target_phys_bits=64
+target_nptl="yes"
   ;;
   arm|armeb)
 TARGET_ARCH=arm
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 80d8633..b921076 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -5755,7 +5755,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 ret = get_errno(fsync(arg1));
 break;
 case TARGET_NR_clone:
-#if defined(TARGET_SH4)
+#if defined(TARGET_SH4) || defined(TARGET_ALPHA)
 ret = get_errno(do_fork(cpu_env, arg1, arg2, arg3, arg5, arg4));
 #elif defined(TARGET_CRIS)
 ret = get_errno(do_fork(cpu_env, arg2, arg1, arg3, arg4, arg5));
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index cf2b587..bbc5ac2 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -412,15 +412,6 @@ static inline int cpu_mmu_index (CPUState *env)
 return (env->ps >> 3) & 3;
 }
 
-#if defined(CONFIG_USER_ONLY)
-static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
-{
-if (newsp)
-env->ir[30] = newsp;
-/* FIXME: Zero syscall return value.  */
-}
-#endif
-
 #include "cpu-all.h"
 #include "exec-all.h"
 
@@ -478,7 +469,7 @@ enum {
 IR_S4   = 13,
 IR_S5   = 14,
 IR_S6   = 15,
-#define IR_FP IR_S6
+IR_FP   = IR_S6,
 IR_A0   = 16,
 IR_A1   = 17,
 IR_A2   = 18,
@@ -491,7 +482,7 @@ enum {
 IR_T11  = 25,
 IR_RA   = 26,
 IR_T12  = 27,
-#define IR_PV IR_T12
+IR_PV   = IR_T12,
 IR_AT   = 28,
 IR_GP   = 29,
 IR_SP   = 30,
@@ -532,4 +523,19 @@ static inline void cpu_get_tb_cpu_state(CPUState *env, 
target_ulong *pc,
 *flags = env->ps;
 }
 
+#if defined(CONFIG_USER_ONLY)
+static inline void cpu_clone_regs(CPUState *env, target_ulong newsp)
+{
+if (newsp)
+env->ir[IR_SP] = newsp;
+env->ir[IR_V0] = 0;
+env->ir[IR_A3] = 0;
+}
+
+static inline void cpu_set_tls(CPUState *env, target_ulong newtls)
+{
+env->unique = newtls;
+}
+#endif
+
 #endif /* !defined (__CPU_ALPHA_H__) */
-- 
1.6.6.1

[Qemu-devel] [PATCH 09/10] target-alpha: Implement load-locked/store-conditional properly.

2010-03-24 Thread Richard Henderson

Use __sync_bool_compare_and_swap to yield correctly atomic results.
As yet, this assumes running on an strict-memory-ordering host (i.e. x86),
since we're still "implementing" the memory-barrier instructions as nops.

Rename the "lock" cpu field to "lock_addr" and add a "lock_value" field
to be used as the "old" value for the compare-and-swap.  Use -1 in the
lock_addr field to indicate no lock held.  Break the lock when processing
any sort of exception.

Signed-off-by: Richard Henderson 
---
 linux-user/main.c|   11 
 target-alpha/cpu.h   |3 +-
 target-alpha/helper.c|6 +-
 target-alpha/helper.h|3 +
 target-alpha/op_helper.c |   70 ++
 target-alpha/translate.c |  146 -
 6 files changed, 168 insertions(+), 71 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index d4a29cb..cbff027 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2361,6 +2361,14 @@ void cpu_loop (CPUState *env)
   that the intr_flag should be cleared.  */
env->intr_flag = 0;
 
+/* Similarly with the lock_flag, where "clear" is -1 here.
+   ??? Note that it's not possible to single-step through
+   a ldl_l/stl_c sequence on real hardware, but let's see
+   if we can do better than that in the emulator.  */
+if (trapnr != EXCP_DEBUG) {
+env->lock_addr = -1;
+}
+
 switch (trapnr) {
 case EXCP_RESET:
 fprintf(stderr, "Reset requested. Exit\n");
@@ -2512,6 +2520,9 @@ void cpu_loop (CPUState *env)
 case EXCP_DEBUG:
 info.si_signo = gdb_handlesig (env, TARGET_SIGTRAP);
 if (info.si_signo) {
+/* ??? See above re single-stepping and ldl_l.  If we're
+   actually going to deliver a signal, break the lock.  */
+env->lock_addr = -1;
 info.si_errno = 0;
 info.si_code = TARGET_TRAP_BRKPT;
 queue_signal(env, info.si_signo, &info);
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 8afe16d..cf2b587 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -355,11 +355,12 @@ struct CPUAlphaState {
 uint64_t ir[31];
 float64 fir[31];
 uint64_t pc;
-uint64_t lock;
 uint32_t pcc[2];
 uint64_t ipr[IPR_LAST];
 uint64_t ps;
 uint64_t unique;
+uint64_t lock_addr;
+uint64_t lock_value;
 float_status fp_status;
 /* The following fields make up the FPCR, but in FP_STATUS format.  */
 uint8_t fpcr_exc_status;
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index 46335cd..e4dd124 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -556,12 +556,14 @@ void cpu_dump_state (CPUState *env, FILE *f,
 if ((i % 3) == 2)
 cpu_fprintf(f, "\n");
 }
-cpu_fprintf(f, "\n");
+
+cpu_fprintf(f, "lock_a   " TARGET_FMT_lx " lock_v   " TARGET_FMT_lx "\n",
+env->lock_addr, env->lock_value);
+
 for (i = 0; i < 31; i++) {
 cpu_fprintf(f, "FIR%02d" TARGET_FMT_lx " ", i,
 *((uint64_t *)(&env->fir[i])));
 if ((i % 3) == 2)
 cpu_fprintf(f, "\n");
 }
-cpu_fprintf(f, "\nlock " TARGET_FMT_lx "\n", env->lock);
 }
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index ccf6a2a..a540eeb 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -99,6 +99,9 @@ DEF_HELPER_1(ieee_input, i64, i64)
 DEF_HELPER_1(ieee_input_cmp, i64, i64)
 DEF_HELPER_1(ieee_input_s, i64, i64)
 
+DEF_HELPER_2(stl_c, i64, i64, i64)
+DEF_HELPER_2(stq_c, i64, i64, i64)
+
 #if !defined (CONFIG_USER_ONLY)
 DEF_HELPER_0(hw_rei, void)
 DEF_HELPER_1(hw_ret, void, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index a209130..ea68222 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -1152,6 +1152,74 @@ uint64_t helper_cvtqg (uint64_t a)
 return float64_to_g(fr);
 }
 
+uint64_t helper_stl_c (uint64_t addr, uint64_t val)
+{
+uint64_t ret = 0;
+
+/* The resultant virtual and physical address must specify a location
+   within the same natually aligned 16-byte block as the preceeding
+   load instruction.  Note that (1) we're only checking the physical
+   address here, since ADDR has already been through virt_to_phys,
+   and (2) LOCK_ADDR has already been masked with -16.  We do this
+   ahead of time so that we can assign LOCK_ADDR = -1 to force a 
+   failure here.  */
+/* ??? The "16" in the spec is no doubt the minimum architectural
+   cacheline size.  If we are attempting to model the real hardware
+   implementation, we should probably expand this to the real cache
+   line size.  But this is certainly good enough for now.  */
+if ((addr & -16) == env->lock_addr) {
+int32_t *host_addr;
+
+#if defined(CONFIG_USER_ONLY)
+host_addr = (int32_t *)addr;
+#el

[Qemu-devel] [PATCH 00/10, v3] target-alpha improvements

2010-03-24 Thread Richard Henderson

Changes from v1->v2:
  * Use setcond and goto_tb.

Changes from v2->v3:
  * Enable NPTL.

I don't see how any sort of emulation of cmpxchg/load-locked is working
for any currently enabled nptl target.  I think how I've approached
handling load-locked for alpha is probably the easiest way.  Slightly
better would be if TCG had a (set of) cmpxchg opcodes, which would have
the benefit of getting the virt->phys->host address (and segfault handling)
more correct.  I've sort of totally ignored the faulting for now.

This is just good enough to not immediately fail the glibc testsuite for
alpha, as the default glibc test skeleton uses clone.  I'm concurrently
debuging the glibc-alpha port and the qemu-alpha syscall emulation, so
it's not always clear which is at fault.  


r~


Richard Henderson (10):
  target-alpha: Add flags markups to helpers.h.
  target-alpha: Implement cpys{,n,e} inline.
  target-alpha: Implement rs/rc properly.
  target-alpha: Implement cvtql inline.
  target-alpha: Implement cvtlq inline.
  target-alpha: Use setcond for int comparisons.
  target-alpha: Use non-inverted arguments to gen_{f}cmov.
  target-alpha: Emit goto_tb opcodes.
  target-alpha: Implement load-locked/store-conditional properly.
  target-alpha: Enable NPTL.

 configure|1 +
 linux-user/main.c|   16 +
 linux-user/syscall.c |2 +-
 target-alpha/cpu.h   |   31 ++-
 target-alpha/helper.c|6 +-
 target-alpha/helper.h|  182 ++--
 target-alpha/op_helper.c |  131 +
 target-alpha/translate.c |  722 +-
 8 files changed, 651 insertions(+), 440 deletions(-)

[Qemu-devel] [PATCH 03/10] target-alpha: Implement rs/rc properly.

2010-03-24 Thread Richard Henderson

This is a per-cpu flag; there's no need for a spinlock of any kind.

We were also failing to manipulate the flag with $31 as a target reg
and failing to clear the flag on execution of a return-from-interrupt
instruction.

Signed-off-by: Richard Henderson 
---
 linux-user/main.c|5 +
 target-alpha/helper.h|2 --
 target-alpha/op_helper.c |   28 ++--
 target-alpha/translate.c |   19 +++
 4 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 4614e3c..d4a29cb 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2356,6 +2356,11 @@ void cpu_loop (CPUState *env)
 while (1) {
 trapnr = cpu_alpha_exec (env);
 
+   /* All of the traps imply a transition through PALcode, which
+  implies an REI instruction has been executed.  Which means
+  that the intr_flag should be cleared.  */
+   env->intr_flag = 0;
+
 switch (trapnr) {
 case EXCP_RESET:
 fprintf(stderr, "Reset requested. Exit\n");
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index 8e11304..c378195 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -2,8 +2,6 @@
 
 DEF_HELPER_2(excp, void, int, int)
 DEF_HELPER_FLAGS_0(load_pcc, TCG_CALL_CONST | TCG_CALL_PURE, i64)
-DEF_HELPER_FLAGS_0(rc, TCG_CALL_CONST, i64)
-DEF_HELPER_FLAGS_0(rs, TCG_CALL_CONST, i64)
 
 DEF_HELPER_2(addqv, i64, i64, i64)
 DEF_HELPER_2(addlv, i64, i64, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index 2419dc4..84867b8 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -47,32 +47,6 @@ void helper_store_fpcr (uint64_t val)
 cpu_alpha_store_fpcr (env, val);
 }
 
-static spinlock_t intr_cpu_lock = SPIN_LOCK_UNLOCKED;
-
-uint64_t helper_rs(void)
-{
-uint64_t tmp;
-
-spin_lock(&intr_cpu_lock);
-tmp = env->intr_flag;
-env->intr_flag = 1;
-spin_unlock(&intr_cpu_lock);
-
-return tmp;
-}
-
-uint64_t helper_rc(void)
-{
-uint64_t tmp;
-
-spin_lock(&intr_cpu_lock);
-tmp = env->intr_flag;
-env->intr_flag = 0;
-spin_unlock(&intr_cpu_lock);
-
-return tmp;
-}
-
 uint64_t helper_addqv (uint64_t op1, uint64_t op2)
 {
 uint64_t tmp = op1;
@@ -1211,6 +1185,7 @@ void helper_hw_rei (void)
 {
 env->pc = env->ipr[IPR_EXC_ADDR] & ~3;
 env->ipr[IPR_EXC_ADDR] = env->ipr[IPR_EXC_ADDR] & 1;
+env->intr_flag = 0;
 /* XXX: re-enable interrupts and memory mapping */
 }
 
@@ -1218,6 +1193,7 @@ void helper_hw_ret (uint64_t a)
 {
 env->pc = a & ~3;
 env->ipr[IPR_EXC_ADDR] = a & 1;
+env->intr_flag = 0;
 /* XXX: re-enable interrupts and memory mapping */
 }
 
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index b677378..188e76c 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -1266,6 +1266,19 @@ static inline void gen_cmp(TCGCond cond, int ra, int rb, 
int rc, int islit,
 gen_set_label(l2);
 }
 
+static void gen_rx(int ra, int set)
+{
+TCGv_i32 tmp;
+
+if (ra != 31) {
+tcg_gen_ld8u_i64(cpu_ir[ra], cpu_env, offsetof(CPUState, intr_flag));
+}
+
+tmp = tcg_const_i32(set);
+tcg_gen_st8_i32(tmp, cpu_env, offsetof(CPUState, intr_flag));
+tcg_temp_free_i32(tmp);
+}
+
 static inline int translate_one(DisasContext *ctx, uint32_t insn)
 {
 uint32_t palcode;
@@ -2359,16 +2372,14 @@ static inline int translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0xE000:
 /* RC */
-if (ra != 31)
-gen_helper_rc(cpu_ir[ra]);
+gen_rx(ra, 0);
 break;
 case 0xE800:
 /* ECB */
 break;
 case 0xF000:
 /* RS */
-if (ra != 31)
-gen_helper_rs(cpu_ir[ra]);
+gen_rx(ra, 1);
 break;
 case 0xF800:
 /* WH64 */
-- 
1.6.6.1

[Qemu-devel] [PATCH 07/10] target-alpha: Use non-inverted arguments to gen_{f}cmov.

2010-03-24 Thread Richard Henderson

The inverted conditions as argument to the function looks wrong
at a glance inside translate_one.  Since we have an easy function
to produce the inversion now, use it.

Signed-off-by: Richard Henderson 
---
 target-alpha/translate.c |   37 +++--
 1 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 80b5f52..8e98b15 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -394,9 +394,10 @@ static void gen_fbcond(DisasContext *ctx, TCGCond cond, 
int ra, int32_t disp)
 gen_bcond_pcload(ctx, disp, lab_true);
 }
 
-static inline void gen_cmov(TCGCond inv_cond, int ra, int rb, int rc,
-int islit, uint8_t lit, int mask)
+static void gen_cmov(TCGCond cond, int ra, int rb, int rc,
+int islit, uint8_t lit, int mask)
 {
+TCGCond inv_cond = tcg_invert_cond(cond);
 int l1;
 
 if (unlikely(rc == 31))
@@ -426,7 +427,7 @@ static inline void gen_cmov(TCGCond inv_cond, int ra, int 
rb, int rc,
 gen_set_label(l1);
 }
 
-static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int rc)
+static void gen_fcmov(TCGCond cond, int ra, int rb, int rc)
 {
 TCGv va = cpu_fir[ra];
 int l1;
@@ -439,7 +440,7 @@ static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int 
rc)
 }
 
 l1 = gen_new_label();
-gen_fbcond_internal(inv_cond, va, l1);
+gen_fbcond_internal(tcg_invert_cond(cond), va, l1);
 
 if (rb != 31)
 tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[rb]);
@@ -1765,11 +1766,11 @@ static inline int translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0x14:
 /* CMOVLBS */
-gen_cmov(TCG_COND_EQ, ra, rb, rc, islit, lit, 1);
+gen_cmov(TCG_COND_NE, ra, rb, rc, islit, lit, 1);
 break;
 case 0x16:
 /* CMOVLBC */
-gen_cmov(TCG_COND_NE, ra, rb, rc, islit, lit, 1);
+gen_cmov(TCG_COND_EQ, ra, rb, rc, islit, lit, 1);
 break;
 case 0x20:
 /* BIS */
@@ -1789,11 +1790,11 @@ static inline int translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0x24:
 /* CMOVEQ */
-gen_cmov(TCG_COND_NE, ra, rb, rc, islit, lit, 0);
+gen_cmov(TCG_COND_EQ, ra, rb, rc, islit, lit, 0);
 break;
 case 0x26:
 /* CMOVNE */
-gen_cmov(TCG_COND_EQ, ra, rb, rc, islit, lit, 0);
+gen_cmov(TCG_COND_NE, ra, rb, rc, islit, lit, 0);
 break;
 case 0x28:
 /* ORNOT */
@@ -1829,11 +1830,11 @@ static inline int translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0x44:
 /* CMOVLT */
-gen_cmov(TCG_COND_GE, ra, rb, rc, islit, lit, 0);
+gen_cmov(TCG_COND_LT, ra, rb, rc, islit, lit, 0);
 break;
 case 0x46:
 /* CMOVGE */
-gen_cmov(TCG_COND_LT, ra, rb, rc, islit, lit, 0);
+gen_cmov(TCG_COND_GE, ra, rb, rc, islit, lit, 0);
 break;
 case 0x48:
 /* EQV */
@@ -1873,11 +1874,11 @@ static inline int translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0x64:
 /* CMOVLE */
-gen_cmov(TCG_COND_GT, ra, rb, rc, islit, lit, 0);
+gen_cmov(TCG_COND_LE, ra, rb, rc, islit, lit, 0);
 break;
 case 0x66:
 /* CMOVGT */
-gen_cmov(TCG_COND_LE, ra, rb, rc, islit, lit, 0);
+gen_cmov(TCG_COND_GT, ra, rb, rc, islit, lit, 0);
 break;
 case 0x6C:
 /* IMPLVER */
@@ -2351,27 +2352,27 @@ static inline int translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0x02A:
 /* FCMOVEQ */
-gen_fcmov(TCG_COND_NE, ra, rb, rc);
+gen_fcmov(TCG_COND_EQ, ra, rb, rc);
 break;
 case 0x02B:
 /* FCMOVNE */
-gen_fcmov(TCG_COND_EQ, ra, rb, rc);
+gen_fcmov(TCG_COND_NE, ra, rb, rc);
 break;
 case 0x02C:
 /* FCMOVLT */
-gen_fcmov(TCG_COND_GE, ra, rb, rc);
+gen_fcmov(TCG_COND_LT, ra, rb, rc);
 break;
 case 0x02D:
 /* FCMOVGE */
-gen_fcmov(TCG_COND_LT, ra, rb, rc);
+gen_fcmov(TCG_COND_GE, ra, rb, rc);
 break;
 case 0x02E:
 /* FCMOVLE */
-gen_fcmov(TCG_COND_GT, ra, rb, rc);
+gen_fcmov(TCG_COND_LE, ra, rb, rc);
 break;
 case 0x02F:
 /* FCMOVGT */
-gen_fcmov(TCG_COND_LE, ra, rb, rc);
+gen_fcmov(TCG_COND_GT, ra, rb, rc);
 break;
 case 0x030:
 /* CVTQL */
-- 
1.6.6.1

[Qemu-devel] [PATCH 06/10] target-alpha: Use setcond for int comparisons.

2010-03-24 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target-alpha/translate.c |   43 ++-
 1 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 90a14f5..80b5f52 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -1288,33 +1288,34 @@ MVIOP2(pkwb)
 MVIOP2(unpkbl)
 MVIOP2(unpkbw)
 
-static inline void gen_cmp(TCGCond cond, int ra, int rb, int rc, int islit,
-   uint8_t lit)
+static void gen_cmp(TCGCond cond, int ra, int rb, int rc,
+int islit, uint8_t lit)
 {
-int l1, l2;
-TCGv tmp;
+TCGv va, vb;
 
-if (unlikely(rc == 31))
+if (unlikely(rc == 31)) {
 return;
+}
 
-l1 = gen_new_label();
-l2 = gen_new_label();
+if (ra == 31) {
+va = tcg_const_i64(0);
+} else {
+va = cpu_ir[ra];
+}
+if (islit) {
+vb = tcg_const_i64(lit);
+} else {
+vb = cpu_ir[rb];
+}
 
-if (ra != 31) {
-tmp = tcg_temp_new();
-tcg_gen_mov_i64(tmp, cpu_ir[ra]);
-} else
-tmp = tcg_const_i64(0);
-if (islit)
-tcg_gen_brcondi_i64(cond, tmp, lit, l1);
-else
-tcg_gen_brcond_i64(cond, tmp, cpu_ir[rb], l1);
+tcg_gen_setcond_i64(cond, cpu_ir[rc], va, vb);
 
-tcg_gen_movi_i64(cpu_ir[rc], 0);
-tcg_gen_br(l2);
-gen_set_label(l1);
-tcg_gen_movi_i64(cpu_ir[rc], 1);
-gen_set_label(l2);
+if (ra == 31) {
+tcg_temp_free(va);
+}
+if (islit) {
+tcg_temp_free(vb);
+}
 }
 
 static void gen_rx(int ra, int set)
-- 
1.6.6.1

[Qemu-devel] [PATCH 05/10] target-alpha: Implement cvtlq inline.

2010-03-24 Thread Richard Henderson

It's a simple shift and mask sequence.

Signed-off-by: Richard Henderson 
---
 target-alpha/helper.h|1 -
 target-alpha/op_helper.c |7 ---
 target-alpha/translate.c |   21 -
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index 10c78d0..ccf6a2a 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -83,7 +83,6 @@ DEF_HELPER_FLAGS_1(cvtqf, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvtgf, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvtgq, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvtqg, TCG_CALL_CONST, i64, i64)
-DEF_HELPER_FLAGS_1(cvtlq, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
 
 DEF_HELPER_FLAGS_1(cvttq, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvttq_c, TCG_CALL_CONST, i64, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index f9cd07a..a209130 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -1152,13 +1152,6 @@ uint64_t helper_cvtqg (uint64_t a)
 return float64_to_g(fr);
 }
 
-uint64_t helper_cvtlq (uint64_t a)
-{
-int32_t lo = a >> 29;
-int32_t hi = a >> 32;
-return (lo & 0x3FFF) | (hi & 0xc000);
-}
-
 /* PALcode support special instructions */
 #if !defined (CONFIG_USER_ONLY)
 void helper_hw_rei (void)
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 44ce830..90a14f5 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -597,6 +597,26 @@ static inline void gen_fp_exc_raise(int rc, int fn11)
 gen_fp_exc_raise_ignore(rc, fn11, fn11 & QUAL_I ? 0 : float_flag_inexact);
 }
 
+static void gen_fcvtlq(int rb, int rc)
+{
+if (unlikely(rc == 31)) {
+return;
+}
+if (unlikely(rb == 31)) {
+tcg_gen_movi_i64(cpu_fir[rc], 0);
+} else {
+TCGv tmp = tcg_temp_new();
+
+tcg_gen_shri_i64(tmp, cpu_fir[rb], 32);
+tcg_gen_shri_i64(cpu_fir[rc], cpu_fir[rb], 29);
+tcg_gen_andi_i64(tmp, tmp, 0xc000);
+tcg_gen_andi_i64(cpu_fir[rc], cpu_fir[rc], 0x3FFF);
+tcg_gen_or_i64(cpu_fir[rc], cpu_fir[rc], tmp);
+
+tcg_temp_free(tmp);
+}
+}
+
 static void gen_fcvtql(int rb, int rc)
 {
 if (unlikely(rc == 31)) {
@@ -646,7 +666,6 @@ static inline void glue(gen_f, name)(int rb, int rc)\
 tcg_temp_free(tmp); \
 }   \
 }
-FARITH2(cvtlq)
 
 /* ??? VAX instruction qualifiers ignored.  */
 FARITH2(sqrtf)
-- 
1.6.6.1

[Qemu-devel] [PATCH 02/10] target-alpha: Implement cpys{, n, e} inline.

2010-03-24 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target-alpha/helper.h|4 --
 target-alpha/op_helper.c |   18 --
 target-alpha/translate.c |   78 +++--
 3 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index a508077..8e11304 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -77,10 +77,6 @@ DEF_HELPER_FLAGS_2(cmpgeq, TCG_CALL_CONST | TCG_CALL_PURE, 
i64, i64, i64)
 DEF_HELPER_FLAGS_2(cmpgle, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(cmpglt, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(cpys, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(cpysn, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(cpyse, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-
 DEF_HELPER_FLAGS_1(cvtts, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvtst, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvtqs, TCG_CALL_CONST, i64, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index 4d2c2ee..2419dc4 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -921,24 +921,6 @@ uint64_t helper_sqrtt (uint64_t a)
 return float64_to_t(fr);
 }
 
-
-/* Sign copy */
-uint64_t helper_cpys(uint64_t a, uint64_t b)
-{
-return (a & 0x8000ULL) | (b & ~0x8000ULL);
-}
-
-uint64_t helper_cpysn(uint64_t a, uint64_t b)
-{
-return ((~a) & 0x8000ULL) | (b & ~0x8000ULL);
-}
-
-uint64_t helper_cpyse(uint64_t a, uint64_t b)
-{
-return (a & 0xFFF0ULL) | (b & ~0xFFF0ULL);
-}
-
-
 /* Comparisons */
 uint64_t helper_cmptun (uint64_t a, uint64_t b)
 {
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 719b423..b677378 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -741,6 +741,80 @@ static inline void glue(gen_f, name)(DisasContext *ctx,
 \
 IEEE_INTCVT(cvtqs)
 IEEE_INTCVT(cvtqt)
 
+static void gen_cpys_internal(int ra, int rb, int rc, int inv_a, uint64_t mask)
+{
+TCGv va, vb, vmask;
+int za = 0, zb = 0;
+
+if (unlikely(rc == 31)) {
+return;
+}
+
+vmask = tcg_const_i64(mask);
+
+TCGV_UNUSED_I64(va);
+if (ra == 31) {
+if (inv_a) {
+va = vmask;
+} else {
+za = 1;
+}
+} else {
+va = tcg_temp_new_i64();
+tcg_gen_mov_i64(va, cpu_fir[ra]);
+if (inv_a) {
+tcg_gen_not_i64(va, va);
+}
+tcg_gen_and_i64(va, va, vmask);
+}
+
+TCGV_UNUSED_I64(vb);
+if (rb == 31) {
+zb = 1;
+} else {
+vb = tcg_temp_new_i64();
+tcg_gen_andc_i64(vb, cpu_fir[rb], vmask);
+}
+
+switch (za * 2 + zb) {
+case 0:
+tcg_gen_or_i64(cpu_fir[rc], va, vb);
+break;
+case 1:
+tcg_gen_mov_i64(cpu_fir[rc], va);
+break;
+case 2:
+tcg_gen_mov_i64(cpu_fir[rc], vb);
+break;
+case 3:
+tcg_gen_movi_i64(cpu_fir[rc], 0);
+break;
+}
+
+tcg_temp_free(vmask);
+if (ra != 31) {
+tcg_temp_free(va);
+}
+if (rb != 31) {
+tcg_temp_free(vb);
+}
+}
+
+static inline void gen_fcpys(int ra, int rb, int rc)
+{
+gen_cpys_internal(ra, rb, rc, 0, 0x8000ULL);
+}
+
+static inline void gen_fcpysn(int ra, int rb, int rc)
+{
+gen_cpys_internal(ra, rb, rc, 1, 0x8000ULL);
+}
+
+static inline void gen_fcpyse(int ra, int rb, int rc)
+{
+gen_cpys_internal(ra, rb, rc, 0, 0xFFF0ULL);
+}
+
 #define FARITH3(name)   \
 static inline void glue(gen_f, name)(int ra, int rb, int rc)\
 {   \
@@ -769,10 +843,6 @@ static inline void glue(gen_f, name)(int ra, int rb, int 
rc)\
 tcg_temp_free(vb);  \
 }   \
 }
-/* ??? Ought to expand these inline; simple masking operations.  */
-FARITH3(cpys)
-FARITH3(cpysn)
-FARITH3(cpyse)
 
 /* ??? VAX instruction qualifiers ignored.  */
 FARITH3(addf)
-- 
1.6.6.1

[Qemu-devel] [PATCH 01/10] target-alpha: Add flags markups to helpers.h.

2010-03-24 Thread Richard Henderson

Almost all alpha helpers are at least TCG_CALL_CONST
and a fair few are also TCG_CALL_PURE.

Signed-off-by: Richard Henderson 
---
 target-alpha/helper.h |  184 
 1 files changed, 92 insertions(+), 92 deletions(-)

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index 79cf375..a508077 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -1,9 +1,9 @@
 #include "def-helper.h"
 
 DEF_HELPER_2(excp, void, int, int)
-DEF_HELPER_0(load_pcc, i64)
-DEF_HELPER_0(rc, i64)
-DEF_HELPER_0(rs, i64)
+DEF_HELPER_FLAGS_0(load_pcc, TCG_CALL_CONST | TCG_CALL_PURE, i64)
+DEF_HELPER_FLAGS_0(rc, TCG_CALL_CONST, i64)
+DEF_HELPER_FLAGS_0(rs, TCG_CALL_CONST, i64)
 
 DEF_HELPER_2(addqv, i64, i64, i64)
 DEF_HELPER_2(addlv, i64, i64, i64)
@@ -11,98 +11,98 @@ DEF_HELPER_2(subqv, i64, i64, i64)
 DEF_HELPER_2(sublv, i64, i64, i64)
 DEF_HELPER_2(mullv, i64, i64, i64)
 DEF_HELPER_2(mulqv, i64, i64, i64)
-DEF_HELPER_2(umulh, i64, i64, i64)
-
-DEF_HELPER_1(ctpop, i64, i64)
-DEF_HELPER_1(ctlz, i64, i64)
-DEF_HELPER_1(cttz, i64, i64)
-
-DEF_HELPER_2(zap, i64, i64, i64)
-DEF_HELPER_2(zapnot, i64, i64, i64)
-
-DEF_HELPER_2(cmpbge, i64, i64, i64)
-
-DEF_HELPER_2(minub8, i64, i64, i64)
-DEF_HELPER_2(minsb8, i64, i64, i64)
-DEF_HELPER_2(minuw4, i64, i64, i64)
-DEF_HELPER_2(minsw4, i64, i64, i64)
-DEF_HELPER_2(maxub8, i64, i64, i64)
-DEF_HELPER_2(maxsb8, i64, i64, i64)
-DEF_HELPER_2(maxuw4, i64, i64, i64)
-DEF_HELPER_2(maxsw4, i64, i64, i64)
-DEF_HELPER_2(perr, i64, i64, i64)
-DEF_HELPER_1(pklb, i64, i64)
-DEF_HELPER_1(pkwb, i64, i64)
-DEF_HELPER_1(unpkbl, i64, i64)
-DEF_HELPER_1(unpkbw, i64, i64)
-
-DEF_HELPER_0(load_fpcr, i64)
-DEF_HELPER_1(store_fpcr, void, i64)
-
-DEF_HELPER_1(f_to_memory, i32, i64)
-DEF_HELPER_1(memory_to_f, i64, i32)
-DEF_HELPER_2(addf, i64, i64, i64)
-DEF_HELPER_2(subf, i64, i64, i64)
-DEF_HELPER_2(mulf, i64, i64, i64)
-DEF_HELPER_2(divf, i64, i64, i64)
-DEF_HELPER_1(sqrtf, i64, i64)
-
-DEF_HELPER_1(g_to_memory, i64, i64)
-DEF_HELPER_1(memory_to_g, i64, i64)
-DEF_HELPER_2(addg, i64, i64, i64)
-DEF_HELPER_2(subg, i64, i64, i64)
-DEF_HELPER_2(mulg, i64, i64, i64)
-DEF_HELPER_2(divg, i64, i64, i64)
-DEF_HELPER_1(sqrtg, i64, i64)
-
-DEF_HELPER_1(s_to_memory, i32, i64)
-DEF_HELPER_1(memory_to_s, i64, i32)
-DEF_HELPER_2(adds, i64, i64, i64)
-DEF_HELPER_2(subs, i64, i64, i64)
-DEF_HELPER_2(muls, i64, i64, i64)
-DEF_HELPER_2(divs, i64, i64, i64)
-DEF_HELPER_1(sqrts, i64, i64)
-
-DEF_HELPER_2(addt, i64, i64, i64)
-DEF_HELPER_2(subt, i64, i64, i64)
-DEF_HELPER_2(mult, i64, i64, i64)
-DEF_HELPER_2(divt, i64, i64, i64)
-DEF_HELPER_1(sqrtt, i64, i64)
-
-DEF_HELPER_2(cmptun, i64, i64, i64)
-DEF_HELPER_2(cmpteq, i64, i64, i64)
-DEF_HELPER_2(cmptle, i64, i64, i64)
-DEF_HELPER_2(cmptlt, i64, i64, i64)
-DEF_HELPER_2(cmpgeq, i64, i64, i64)
-DEF_HELPER_2(cmpgle, i64, i64, i64)
-DEF_HELPER_2(cmpglt, i64, i64, i64)
-
-DEF_HELPER_2(cpys, i64, i64, i64)
-DEF_HELPER_2(cpysn, i64, i64, i64)
-DEF_HELPER_2(cpyse, i64, i64, i64)
-
-DEF_HELPER_1(cvtts, i64, i64)
-DEF_HELPER_1(cvtst, i64, i64)
-DEF_HELPER_1(cvtqs, i64, i64)
-DEF_HELPER_1(cvtqt, i64, i64)
-DEF_HELPER_1(cvtqf, i64, i64)
-DEF_HELPER_1(cvtgf, i64, i64)
-DEF_HELPER_1(cvtgq, i64, i64)
-DEF_HELPER_1(cvtqg, i64, i64)
-DEF_HELPER_1(cvtlq, i64, i64)
-
-DEF_HELPER_1(cvttq, i64, i64)
-DEF_HELPER_1(cvttq_c, i64, i64)
-DEF_HELPER_1(cvttq_svic, i64, i64)
-
-DEF_HELPER_1(cvtql, i64, i64)
+DEF_HELPER_FLAGS_2(umulh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_1(ctpop, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(ctlz, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(cttz, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+
+DEF_HELPER_FLAGS_2(zap, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(zapnot, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(cmpbge, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(minub8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(minsb8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(minuw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(minsw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxub8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxsb8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxuw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxsw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(perr, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(pklb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(pkwb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(unpkbl, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(unpkbw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+
+DEF_HELPER_FLAGS_0(load_fpcr, TCG_CALL_CONST | TCG_CALL_PURE, i64)
+DEF_HELPER_FLAGS_1(store_fpcr, TCG_CALL_CONST,

[Qemu-devel] [PATCH 04/10] target-alpha: Implement cvtql inline.

2010-03-24 Thread Richard Henderson

It's a simple mask and shift sequence.
Also, fix a typo in the actual masks used.

Signed-off-by: Richard Henderson 
---
 target-alpha/helper.h|4 
 target-alpha/op_helper.c |   20 
 target-alpha/translate.c |   45 +++--
 3 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index c378195..10c78d0 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -89,10 +89,6 @@ DEF_HELPER_FLAGS_1(cvttq, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvttq_c, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvttq_svic, TCG_CALL_CONST, i64, i64)
 
-DEF_HELPER_FLAGS_1(cvtql, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-DEF_HELPER_1(cvtql_v, i64, i64)
-DEF_HELPER_1(cvtql_sv, i64, i64)
-
 DEF_HELPER_FLAGS_1(setroundmode, TCG_CALL_CONST, void, i32)
 DEF_HELPER_FLAGS_1(setflushzero, TCG_CALL_CONST, void, i32)
 DEF_HELPER_FLAGS_0(fp_exc_clear, TCG_CALL_CONST, void)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index 84867b8..f9cd07a 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -1159,26 +1159,6 @@ uint64_t helper_cvtlq (uint64_t a)
 return (lo & 0x3FFF) | (hi & 0xc000);
 }
 
-uint64_t helper_cvtql (uint64_t a)
-{
-return ((a & 0xC000) << 32) | ((a & 0x7FFF) << 29);
-}
-
-uint64_t helper_cvtql_v (uint64_t a)
-{
-if ((int32_t)a != (int64_t)a)
-helper_excp(EXCP_ARITH, EXC_M_IOV);
-return helper_cvtql(a);
-}
-
-uint64_t helper_cvtql_sv (uint64_t a)
-{
-/* ??? I'm pretty sure there's nothing that /sv needs to do that /v
-   doesn't do.  The only thing I can think is that /sv is a valid
-   instruction merely for completeness in the ISA.  */
-return helper_cvtql_v(a);
-}
-
 /* PALcode support special instructions */
 #if !defined (CONFIG_USER_ONLY)
 void helper_hw_rei (void)
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 188e76c..44ce830 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -597,6 +597,41 @@ static inline void gen_fp_exc_raise(int rc, int fn11)
 gen_fp_exc_raise_ignore(rc, fn11, fn11 & QUAL_I ? 0 : float_flag_inexact);
 }
 
+static void gen_fcvtql(int rb, int rc)
+{
+if (unlikely(rc == 31)) {
+return;
+}
+if (unlikely(rb == 31)) {
+tcg_gen_movi_i64(cpu_fir[rc], 0);
+} else {
+TCGv tmp = tcg_temp_new();
+
+tcg_gen_andi_i64(tmp, cpu_fir[rb], 0xC000);
+tcg_gen_andi_i64(cpu_fir[rc], cpu_fir[rb], 0x3FFF);
+tcg_gen_shli_i64(tmp, tmp, 32);
+tcg_gen_shli_i64(cpu_fir[rc], cpu_fir[rc], 29);
+tcg_gen_or_i64(cpu_fir[rc], cpu_fir[rc], tmp);
+
+tcg_temp_free(tmp);
+}
+}
+
+static void gen_fcvtql_v(DisasContext *ctx, int rb, int rc)
+{
+if (rb != 31) {
+int lab = gen_new_label();
+TCGv tmp = tcg_temp_new();
+
+tcg_gen_ext32s_i64(tmp, cpu_fir[rb]);
+tcg_gen_brcond_i64(TCG_COND_EQ, tmp, cpu_fir[rb], lab);
+gen_excp(ctx, EXCP_ARITH, EXC_M_IOV);
+
+gen_set_label(lab);
+}
+gen_fcvtql(rb, rc);
+}
+
 #define FARITH2(name)   \
 static inline void glue(gen_f, name)(int rb, int rc)\
 {   \
@@ -612,9 +647,6 @@ static inline void glue(gen_f, name)(int rb, int rc)\
 }   \
 }
 FARITH2(cvtlq)
-FARITH2(cvtql)
-FARITH2(cvtql_v)
-FARITH2(cvtql_sv)
 
 /* ??? VAX instruction qualifiers ignored.  */
 FARITH2(sqrtf)
@@ -2327,11 +2359,12 @@ static inline int translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0x130:
 /* CVTQL/V */
-gen_fcvtql_v(rb, rc);
-break;
 case 0x530:
 /* CVTQL/SV */
-gen_fcvtql_sv(rb, rc);
+/* ??? I'm pretty sure there's nothing that /sv needs to do that
+   /v doesn't do.  The only thing I can think is that /sv is a
+   valid instruction merely for completeness in the ISA.  */
+gen_fcvtql_v(ctx, rb, rc);
 break;
 default:
 goto invalid_opc;
-- 
1.6.6.1

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Jamie Lokier

Avi Kivity wrote:
> On 03/24/2010 02:32 PM, Anthony Liguori wrote:
> >>You don't get a directory filled with a zillion socket files pointing 
> >>at dead guests.  Agree that's a poor return on investment.
> >
> >
> >Deleting it on atexit combined with flushing the whole directory at 
> >startup is a pretty reasonable solution to this (which is ultimately 
> >how the entirety of /var/run behaves).
> >
> >If you're really paranoid, you can fork() a helper with a shared pipe 
> >to implement unlink on close.
> 
> My paranoia comes nowhere near to my dislike of forked helpers.

Use clone() then, it's cheaper ;-)

Anyway, Linux at least *does* have unlink-on-exit unix sockets: use
the abstract unix namespace.

Enumeration is a different problem from being able to connect to an
instance, and there are several approaches to enumerating multiple
running instances.

One of the most well known at the moment is mDNS service discovery,
and each instance registering with freedesktop's Avahi enumeration
service.

-- Jamie

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Anthony Liguori


On 03/24/2010 06:05 PM, Paul Brook wrote:

On 03/24/2010 05:33 PM, Paul Brook wrote:
 

But now there is a bigger problem, how to pass the property to the
device. It's not fair to require the user to remember to set it.
 

It should not be a property of the device. All devices have a native
endianness (for PCI this is little-endian), and the intermediate
busses/interconnects should determine whether byteswapping occurs.
   

Right, the byte swapping needs to happen at the bus level which requires
that the PCI regions use a different callback mechanism (and don't
register directly with the cpu).
 

Not necessarily a different callback mechanism, but probably a different
registration mechanism.
   


Problem with the current scheme is that it's tied to 
target_phys_addr_t.  It's not a target_phys_addr_t and cannot be used 
with functions that take target_phys_addr_ts.


We can either introduce a generic address type or we can introduce bus 
specific addresses and have different callbacks.  The later has better 
type safety and since this isn't an obvious issue to most developers, 
that's probably an important feature.


Regards,

Anthony Liguori


Paul

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Paul Brook

> On 03/24/2010 05:33 PM, Paul Brook wrote:
> >> But now there is a bigger problem, how to pass the property to the
> >> device. It's not fair to require the user to remember to set it.
> >
> > It should not be a property of the device. All devices have a native
> > endianness (for PCI this is little-endian), and the intermediate
> > busses/interconnects should determine whether byteswapping occurs.
> 
> Right, the byte swapping needs to happen at the bus level which requires
> that the PCI regions use a different callback mechanism (and don't
> register directly with the cpu).

Not necessarily a different callback mechanism, but probably a different 
registration mechanism.

Paul

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Anthony Liguori


On 03/24/2010 05:47 PM, Paul Brook wrote:

Actually, Anthony suggested at some point to just use 64 bits for
TARGET_PHYS_ADDR_BITS and remove the need for hw32/64.

I think that people emulationg 32bits on 32bits would suffer, but have
no clue how much.  Anthony, what was the idea?
 

Sacrificing runtime performance to avoid rebuilding a few files is not
acceptable. I consider the fact that TARGET_PHYS_ADDR_BITS is always 64 on 64-
bit hosts is a bug. It's just hard to fix, and probably even less of a
performance hit, so I haven't bothered yet.
   


It's a statement of correctness really.  Devices should never deal with 
target_phys_addr_t's.


The question is, should a pci_addr_t or a sysbus_addr_t be 64 bit or 
should it be 32-bit on 32-bit platforms.  Honestly, I am extremely 
sceptical that there would be any measurable performance difference.


Regards,

Anthony Liguori


Paul

[Qemu-devel] Re: hi, may I ask some help on the paravirtualization of KVM?

2010-03-24 Thread Charles Duffy


Liang YANG wrote:

Maybe I solove the problem.

I use the qemu-img make a new GustOS img. And install the debian5 on
the image file with option -net nic, model=virtio.
./x86_64-softmmu/qemu-system-x86_64 -hda debian.img -cdrom debian.iso
-net nic, model=virtio


This email reads as:

  -net nic, model=virtio

If this is in fact what you're running with, the whitespace is broken; 
correct would be to have no space after the comma, as so:


  -net nic,model=virtio

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Anthony Liguori


On 03/24/2010 05:33 PM, Paul Brook wrote:

But now there is a bigger problem, how to pass the property to the
device. It's not fair to require the user to remember to set it.
 

It should not be a property of the device. All devices have a native
endianness (for PCI this is little-endian), and the intermediate
busses/interconnects should determine whether byteswapping occurs.
   


Right, the byte swapping needs to happen at the bus level which requires 
that the PCI regions use a different callback mechanism (and don't 
register directly with the cpu).


Regards,

Anthony Liguori


Paul

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Paul Brook

> Actually, Anthony suggested at some point to just use 64 bits for
> TARGET_PHYS_ADDR_BITS and remove the need for hw32/64.
> 
> I think that people emulationg 32bits on 32bits would suffer, but have
> no clue how much.  Anthony, what was the idea?

Sacrificing runtime performance to avoid rebuilding a few files is not 
acceptable. I consider the fact that TARGET_PHYS_ADDR_BITS is always 64 on 64-
bit hosts is a bug. It's just hard to fix, and probably even less of a 
performance hit, so I haven't bothered yet.

Paul

[Qemu-devel] Significant performance regression in qemu-system-mips.

2010-03-24 Thread Rob Landley

I have a native build under qemu that gets killed if it doesn't produce a line 
of output for 60 seconds (hang detection enforced by the host monitoring 
qemu's stdout with --nographic, not from within qemu).

In the most recent release version, it never came close to triggering on mips 
with a 30 second timeout.  In the current -git version (well, as of Thursday 
anyway), it triggers frequently (about 90% of the time) even with a 60 second 
timeout.

I bisected it to this:

commit 1828be316f6637d43dd4c4f5f32925b17fb8107f
Author: Paolo Bonzini 
Date:   Wed Mar 10 11:38:41 2010 +0100

more alarm timer cleanup

The timer_alarm_pending variable is related to the alarm timer but not
placed in the struct.  Also, in qemu_mod_timer the wrong flag was being
tested: the timer is rearmed in the alarm timer "bottom half", so the
right flag to test there is the "pending" flag.

Finally, I hoisted the NULL checks from alarm_has_dynticks to
host_alarm_handler.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Anthony Liguori 

Reverting that patch fixed it (git show HEAD | patch -R -p1), by which I mean 
three consecutive runs with 30 second timeout didn't trigger the hang 
detection.

Unfortunately, I can't revert that patch in current origin/master because most 
of the hunks fail...

Help?

Rob
-- 
Latency is more important than throughput. It's that simple. - Linus Torvalds

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Paul Brook

> But now there is a bigger problem, how to pass the property to the
> device. It's not fair to require the user to remember to set it.

It should not be a property of the device. All devices have a native 
endianness (for PCI this is little-endian), and the intermediate 
busses/interconnects should determine whether byteswapping occurs.

Paul

[Qemu-devel] Re: [PATCH] update bochs vbe interface

2010-03-24 Thread Juan Quintela

Gerd Hoffmann  wrote:
> On 03/24/10 18:04, Juan Quintela wrote:
>> Gerd Hoffmann  wrote:
>>> The bochs vbe interface got a new register a while back, which specifies
>>> the linear framebuffer size in 64k units.  This patch adds support for
>>> the new register to qemu.  With this patch applied vgabios 0.6c works
>>> with qemu.
>>>
>>> Signed-off-by: Gerd Hoffmann
>>
>> It breaks migration.
>>
>> vga.c:2218:VMSTATE_UINT16_ARRAY(vbe_regs, VGACommonState, 
>> VBE_DISPI_INDEX_NB),
>
>> 2218 is the interesting line.  Can we freeze this patch until the
>> subsections stuff is done?
>
> Yea, I see.  Well, the new register doesn't carry any state, it is
> read-only information for the guest.  So the easy way out is to simply
> not save it as we don't have to.

Thinking a bit more about it.

Humm, I think it means.  Can you migrate from a "new" vga to an old one,
and maintain it working?

-s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID0;
+s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID5;
+s->vbe_regs[VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = s->vram_size / (64 * 1024);

After migration, vbe_regs[VBE_DISPI_INDEX_ID] would have the value
VBE_DISPI_ID5, but vbe_regs[VBE_DISPI_INDEX_VIDEO_MEMORY_64K] will have
any random value, no?

>
> cheers,
>   Gerd

[Qemu-devel] Re: [PATCH v2] update bochs vbe interface

2010-03-24 Thread Juan Quintela

Gerd Hoffmann  wrote:
> The bochs vbe interface got a new register a while back, which specifies
> the linear framebuffer size in 64k units.  This patch adds support for
> the new register to qemu.  With this patch applied vgabios 0.6c works
> with qemu.
>
> [ v2:  Don't savevm the new register.  Doing so breaks migration,
>and as it carries read-only information for the guest there
>is no need to save it. ]


It don't compile (as expected).  VMSTATE_UINT16_ARRAY() checks that you
sent the whole array.


/mnt/kvm/qemu/qemu-negotiate/hw/vga.c:2219: error: invalid operands to binary - 
(have ‘uint16_t (*)[10]’ and ‘uint16_t (*)[11]’)
make[1]: *** [vga.o] Error 1
make[1]: *** Waiting for unfinished jobs
^Cmake[1]: *** [translate.o] Interrupt
make[1]: *** [op_helper.o] Interrupt
make: *** [subdir-x86_64-softmmu] Interrupt

> ---
>  hw/vga.c |5 +++--
>  hw/vga_int.h |6 +-
>  2 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/hw/vga.c b/hw/vga.c
> index 6a1a059..f9e07cf 100644
> --- a/hw/vga.c
> +++ b/hw/vga.c
> @@ -1955,7 +1955,8 @@ void vga_common_reset(VGACommonState *s)
>  #ifdef CONFIG_BOCHS_VBE
>  s->vbe_index = 0;
>  memset(s->vbe_regs, '\0', sizeof(s->vbe_regs));
> -s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID0;
> +s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID5;

Now, to show my ignorance, what does this change means?

I can't understand it looking at the whole file (but I don't understand
vga.c too well anyways).

Later, Juan.

Re: [Qemu-devel] TBL register permissions for PPC

2010-03-24 Thread Dmitry Ilyevsky

Hello All,

Please review patch for TBL SPR read access for generic PPC.

*Description:*

POWER specification docs define TBL/TBU SPRs as readable in user
and privileged modes. Therefore SPRs permissions were changed in gen_tbl
function in target-ppc/translate_init.c file.

*Testing:*

Tested with vxworks-6.2 bsp and OS on custom qemu board that includes ppc405
emulated core


BR,
Dmitry Ilyevsky

On Wed, Dec 2, 2009 at 2:23 AM, Alexander Graf  wrote:

>
> On 01.12.2009, at 19:33, Dima Ilyevsky wrote:
>
> > Hello All,
> >
> > I have a question about read permissions of TBL SPR for all ppc
> processors:
> > I have discovered that my application, compiled by WindRiver diab
> compiler and running in vxworks OS on ppc405 architecture bumps into
> exception generated when trying to read TBL or TBU registers:
>
> Unless Linux does something funky, mftlb, mftbu (and mftb on 64 bit) are
> readable from PR=1.
>
> int main()
> {
>long tbu=0, tbl=0;
>
>asm("mftbu %0" : "=r" (tbu));
>asm("mftbl %0" : "=r" (tbl));
>
>printf("TB: %#x %#x\n", tbl, tbu);
> }
>
> ag...@lychee:/tmp> ./mftb
> TB: 0xc0397180 0x603
>
> However it can't be written to:
>
> asm("mttbl %0" : : "r" (tbl));
>
> ag...@lychee:/tmp> ./mftb
> Illegal instruction
>
>
> So yes, I'd suspect a bug in qemu here. Feel free to send a patch.
>
> Alex
>
From 141bf29f5355f163205c57e98590730ed15bfb86 Mon Sep 17 00:00:00 2001
From: n/a 
Date: Thu, 25 Mar 2010 00:22:25 +0300
Subject: [PATCH] Generic PowerPC time base SPR should be accessible in user/priv modes for reading

---
 target-ppc/translate_init.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index db4dc17..e8eadf4 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -777,16 +777,16 @@ static void gen_tbl (CPUPPCState *env)
  &spr_read_tbl, SPR_NOACCESS,
  0x);
 spr_register(env, SPR_TBL,   "TBL",
- SPR_NOACCESS, SPR_NOACCESS,
- SPR_NOACCESS, &spr_write_tbl,
+ &spr_read_tbl, SPR_NOACCESS,
+ &spr_read_tbl, &spr_write_tbl,
  0x);
 spr_register(env, SPR_VTBU,  "TBU",
  &spr_read_tbu, SPR_NOACCESS,
  &spr_read_tbu, SPR_NOACCESS,
  0x);
 spr_register(env, SPR_TBU,   "TBU",
- SPR_NOACCESS, SPR_NOACCESS,
- SPR_NOACCESS, &spr_write_tbu,
+ &spr_read_tbu, SPR_NOACCESS,
+ &spr_read_tbu, &spr_write_tbu,
  0x);
 }
 
-- 
1.7.0

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Anthony Liguori


On 03/24/2010 04:25 PM, Luiz Capitulino wrote:


  I see it as a related problem, because what seems to be under discussion
is the quality of our interfaces with humans and tools.

  Also, when we were discussing the usuability problems I remember that
you

*WARNING: I might be wrong here, please correct me if so*

  you said that you don't push users to libvirt because it's out of sync with
our features.


Yes.


  The point is that, even if this true and even if we solve that,
I don't think it will solve the problem of a good experience for a
'single VM user', because libvirt is more than that and people will likely
be annoyed as much as they are today.

  I believe this problem is up to us to solve.
   


With my qemu hat on, I'm happy to ignore libvirt and say we need to own 
our interfaces and to compete with libvirt for users.


But with my Linux virtualization hat on, I want to see a single 
management interface that users can use without having to make a choice 
between libvirt features or libqemu features.



   Then we make virt-manager optional and this is good because we can sync
features way faster and we don't have to care about _managing_ several
VMs, our world in terms of usability and maintainability is about one VM.

   IMVHO, everything else should be done by third-party tools like libvirt,
we just provide the means for it.

   

We need to have a common management interface for third party tools.
 

  QMP? :-)


Only if QMP is compatible with libvirt.  I don't want a user to have to 
choose between QMP and libvirt.

So far, a libqemu.so with a flexible transport that could be used
directly by a libvirt user (ala cairo/gdk type interactions) seems like
the best solution to me.
 

  I tend to disagree.

  First, I think we should invest our time and effort on the text protocol
business, which is QMP. Having yet another public interface will likely split
efforts a bit and will make clients' life harder (which one should I choose?
What if they get out of sync?). Not to mention that I think Paul has a point,
if QMP is not useful here, why do we have it in the first place (vs. a C library
from the beginning)?

  You mentioned dynamic dispatch, but this is useful only for C clients right?
If so, what C clients you expected beyond libvirt?


Users want a C API.  I don't agree that libvirt is the only C interface 
consumer out there.



  Note that libvirt has added
a new events API recently.

  The second most important point for me is: why do you believe that
libqemu.so is going to improve things? Do you expect that libvirt will
sync faster?


With GDK and Cairo, when Cairo adds a new feature, GDK doesn't have to 
do anything to support it.  Users just get a cairo context from GDK and 
use the cairo API directly.


GDK provides a higher level interface for 2d operations that is more 
platform agnostic, and users can choice to use that or write directly to 
the cairo API.



  If this is the case, I think it will be as slower as it's
currently, as the problem is not the availability of interfaces, but
most likely community integration.

  I like the idea of having a transient qemu-specific API in libvirt,
as suggested by someone in this thread.
   


I really think what we want is for a libvirt user to be able to call 
libqemu functions directly.  There shouldn't have to be libvirt specific 
functions for every operation we expose.


Regards,

Anthony Liguori

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Luiz Capitulino

On Wed, 24 Mar 2010 21:54:09 +0100
Alexander Graf  wrote:

> 
> On 24.03.2010, at 21:32, Anthony Liguori wrote:
> 
> > On 03/24/2010 03:12 PM, Luiz Capitulino wrote:
> >> On Wed, 24 Mar 2010 21:49:45 +0200
> >> Avi Kivity  wrote:
> >> 
> >>   
> >>> On 03/24/2010 06:42 PM, Luiz Capitulino wrote:
> >>> 
>  On Wed, 24 Mar 2010 12:42:16 +0200
>  Avi Kivity   wrote:
>  
>  
>    
> > So, at best qemud is a toy for people who are annoyed by libvirt.
> > 
> > 
>    Is the reason for doing this in qemu because libvirt is annoying?
>    
> >>> Mostly.
> >>> 
> >>> 
>  I don't see
>  how adding yet another layer/daemon is going to improve ours and user's 
>  life
>  (the same applies for libqemu).
>  
>    
> >>> libvirt becomes optional.
> >>> 
> >>  I think it should only be optional if all you want is to run a single VM
> >> in this case what seems to be missing on our side is a _real_ GUI, bundled
> >> with QEMU potentially written in a high-level language.
> >>   
> > 
> > That's a separate problem.
> > 
> >>  Then we make virt-manager optional and this is good because we can sync
> >> features way faster and we don't have to care about _managing_ several
> >> VMs, our world in terms of usability and maintainability is about one VM.
> >> 
> >>  IMVHO, everything else should be done by third-party tools like libvirt,
> >> we just provide the means for it.
> >>   
> > 
> > We need to have a common management interface for third party tools.  
> > libvirt cannot be that today because of the fact that it doesn't support 
> > all of our features.  What we need to figure out is how we can work with 
> > the libvirt team to fix this.
> 
> The feature problem isn't the only one. It's also about ease of use. I 
> personally find the qemu command line easier to use than anything 
> libvirt-derived.

 Because your a developer and it does make sense to have a good CLI,
on the other hand we also have use cases for a GUI bundled in QEMU
and libvirt-derived things, which know how to deal with several
VMs and integrates well with lots of other things.

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Luiz Capitulino

On Wed, 24 Mar 2010 15:32:42 -0500
Anthony Liguori  wrote:

> On 03/24/2010 03:12 PM, Luiz Capitulino wrote:
> > On Wed, 24 Mar 2010 21:49:45 +0200
> > Avi Kivity  wrote:
> >
> >
> >> On 03/24/2010 06:42 PM, Luiz Capitulino wrote:
> >>  
> >>> On Wed, 24 Mar 2010 12:42:16 +0200
> >>> Avi Kivity   wrote:
> >>>
> >>>
> >>>
>  So, at best qemud is a toy for people who are annoyed by libvirt.
> 
>   
> >>>Is the reason for doing this in qemu because libvirt is annoying?
> >>>
> >> Mostly.
> >>
> >>  
> >>> I don't see
> >>> how adding yet another layer/daemon is going to improve ours and user's 
> >>> life
> >>> (the same applies for libqemu).
> >>>
> >>>
> >> libvirt becomes optional.
> >>  
> >   I think it should only be optional if all you want is to run a single VM
> > in this case what seems to be missing on our side is a _real_ GUI, bundled
> > with QEMU potentially written in a high-level language.
> >
> 
> That's a separate problem.

 I see it as a related problem, because what seems to be under discussion
is the quality of our interfaces with humans and tools.

 Also, when we were discussing the usuability problems I remember that
you

*WARNING: I might be wrong here, please correct me if so*

 you said that you don't push users to libvirt because it's out of sync with
our features. The point is that, even if this true and even if we solve that,
I don't think it will solve the problem of a good experience for a
'single VM user', because libvirt is more than that and people will likely
be annoyed as much as they are today.

 I believe this problem is up to us to solve.

> >   Then we make virt-manager optional and this is good because we can sync
> > features way faster and we don't have to care about _managing_ several
> > VMs, our world in terms of usability and maintainability is about one VM.
> >
> >   IMVHO, everything else should be done by third-party tools like libvirt,
> > we just provide the means for it.
> >
> 
> We need to have a common management interface for third party tools.

 QMP? :-)

> libvirt cannot be that today because of the fact that it doesn't support 
> all of our features.  What we need to figure out is how we can work with 
> the libvirt team to fix this.

 Agreed.

> So far, a libqemu.so with a flexible transport that could be used 
> directly by a libvirt user (ala cairo/gdk type interactions) seems like 
> the best solution to me.

 I tend to disagree.

 First, I think we should invest our time and effort on the text protocol
business, which is QMP. Having yet another public interface will likely split
efforts a bit and will make clients' life harder (which one should I choose?
What if they get out of sync?). Not to mention that I think Paul has a point,
if QMP is not useful here, why do we have it in the first place (vs. a C library
from the beginning)?

 You mentioned dynamic dispatch, but this is useful only for C clients right?
If so, what C clients you expected beyond libvirt? Note that libvirt has added
a new events API recently.

 The second most important point for me is: why do you believe that
libqemu.so is going to improve things? Do you expect that libvirt will
sync faster? If this is the case, I think it will be as slower as it's
currently, as the problem is not the availability of interfaces, but
most likely community integration.

 I like the idea of having a transient qemu-specific API in libvirt,
as suggested by someone in this thread.

Re: [Qemu-devel] [PATCH 2/2] machine opts framework

2010-03-24 Thread Anthony Liguori


On 03/24/2010 03:58 PM, Glauber Costa wrote:

On Wed, Mar 24, 2010 at 02:43:35PM -0500, Anthony Liguori wrote:
   

On 03/24/2010 02:26 PM, Glauber Costa wrote:
 

This patch adds initial support for the -machine option, that allows
command line specification of machine attributes (always relying on safe
defaults). Besides its value per-se, it is the saner way we found to
allow for enabling/disabling of kvm's in-kernel irqchip.

A machine with in-kernel-irqchip could be specified as:
-machine irqchip=apic-kvm
And one without it:
-machine irqchip=apic

To demonstrate how it'd work, this patch introduces a choice between
"pic" and "apic", pic being the old-style isa thing.
   

I started from a different place.  See machine-qemuopts in my staging tree.

I think we should combine efforts.

Regards,

 

Absolutely. I see little overlap between what we did. Just a comment on yours:

-static void an5206_init(ram_addr_t ram_size,
- const char *boot_device,
- const char *kernel_filename, const char *kernel_cmdline,
- const char *initrd_filename, const char *cpu_model)
+static void an5206_init(QemuOpts *opts)
  {

Since we're changing init functions anyway, I believe we should also pass
a pointer to the machine structure. With that, we can avoing using the global
current_machine.
   


Yes, I had the same thought.  For instance, with isa-pc is just pc_init 
with an extra parameter.  If we had a structure like:


typedef struct QEMUPCMachine
{
   QEMUMachine parent;
   int pci_enabled;
} QEMUPCMachine;

Then you wouldn't need those dispatch functions.  Not a huge win for x86 
but for sparc and some arm boards, it's pretty significant.


Regards,

Anthony Liguori

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Aurelien Jarno

On Wed, Mar 24, 2010 at 10:24:12PM +0200, Blue Swirl wrote:
> On 3/24/10, Michael S. Tsirkin  wrote:
> > On Wed, Mar 24, 2010 at 10:05:10PM +0200, Blue Swirl wrote:
> >  > On 3/24/10, Michael S. Tsirkin  wrote:
> >  > > On Tue, Mar 23, 2010 at 11:43:51PM +0200, Blue Swirl wrote:
> >  > >  > rtl8139.c, e1000.c: need to convert ldl/stl to 
> > cpu_physical_memory_read/write.
> >  > >
> >  > >
> >  > > I don't see how it would help. These still get target_phys_addr_t which
> >  > >  is per-target. Further, a ton of devices do
> >  > >  cpu_register_physical_memory/qemu_register_coalesced_mmio.
> >  > >  These are also per target.
> >  >
> >  > I don't know what I was eating yesterday: there are no references to
> >  > ldl/stl in either rtl8139 or e1000. In fact, the conversion is simple
> >  > for the device itself, just add a property "be". The attached patch
> >  > performs this part.
> >  >
> >  > But now there is a bigger problem, how to pass the property to the
> >  > device. It's not fair to require the user to remember to set it.
> >
> >
> > I still don't fully understand how come e1000 cares about
> >  target endianness.
> 
> It shouldn't. Maybe the real fix is to remove the byte swapping.
> 

The real fix is actually to add a layer handling bus byte swapping
depending on how bus are connected.

Currently it only works because all big endian boards QEMU emulates
need to byteswap bus access, and none of the little endian boards 
need to do that.

-- 
Aurelien Jarno  GPG: 1024D/F1BCDB73
aurel...@aurel32.net http://www.aurel32.net

Re: [Qemu-devel] [PATCH 2/2] machine opts framework

2010-03-24 Thread Glauber Costa

On Wed, Mar 24, 2010 at 02:43:35PM -0500, Anthony Liguori wrote:
> On 03/24/2010 02:26 PM, Glauber Costa wrote:
> >This patch adds initial support for the -machine option, that allows
> >command line specification of machine attributes (always relying on safe
> >defaults). Besides its value per-se, it is the saner way we found to
> >allow for enabling/disabling of kvm's in-kernel irqchip.
> >
> >A machine with in-kernel-irqchip could be specified as:
> > -machine irqchip=apic-kvm
> >And one without it:
> > -machine irqchip=apic
> >
> >To demonstrate how it'd work, this patch introduces a choice between
> >"pic" and "apic", pic being the old-style isa thing.
> 
> I started from a different place.  See machine-qemuopts in my staging tree.
> 
> I think we should combine efforts.
> 
> Regards,
> 

Absolutely. I see little overlap between what we did. Just a comment on yours:

-static void an5206_init(ram_addr_t ram_size,
- const char *boot_device,
- const char *kernel_filename, const char *kernel_cmdline,
- const char *initrd_filename, const char *cpu_model)
+static void an5206_init(QemuOpts *opts)
 {

Since we're changing init functions anyway, I believe we should also pass
a pointer to the machine structure. With that, we can avoing using the global
current_machine.

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Alexander Graf


On 24.03.2010, at 21:32, Anthony Liguori wrote:

> On 03/24/2010 03:12 PM, Luiz Capitulino wrote:
>> On Wed, 24 Mar 2010 21:49:45 +0200
>> Avi Kivity  wrote:
>> 
>>   
>>> On 03/24/2010 06:42 PM, Luiz Capitulino wrote:
>>> 
 On Wed, 24 Mar 2010 12:42:16 +0200
 Avi Kivity   wrote:
 
 
   
> So, at best qemud is a toy for people who are annoyed by libvirt.
> 
> 
   Is the reason for doing this in qemu because libvirt is annoying?
   
>>> Mostly.
>>> 
>>> 
 I don't see
 how adding yet another layer/daemon is going to improve ours and user's 
 life
 (the same applies for libqemu).
 
   
>>> libvirt becomes optional.
>>> 
>>  I think it should only be optional if all you want is to run a single VM
>> in this case what seems to be missing on our side is a _real_ GUI, bundled
>> with QEMU potentially written in a high-level language.
>>   
> 
> That's a separate problem.
> 
>>  Then we make virt-manager optional and this is good because we can sync
>> features way faster and we don't have to care about _managing_ several
>> VMs, our world in terms of usability and maintainability is about one VM.
>> 
>>  IMVHO, everything else should be done by third-party tools like libvirt,
>> we just provide the means for it.
>>   
> 
> We need to have a common management interface for third party tools.  libvirt 
> cannot be that today because of the fact that it doesn't support all of our 
> features.  What we need to figure out is how we can work with the libvirt 
> team to fix this.

The feature problem isn't the only one. It's also about ease of use. I 
personally find the qemu command line easier to use than anything 
libvirt-derived.

> So far, a libqemu.so with a flexible transport that could be used directly by 
> a libvirt user (ala cairo/gdk type interactions) seems like the best solution 
> to me.

ACK.

One thing I was thinking is that it might be a good idea to split the qemu 
command line version off as well. The qemu backend would then only speak QMP 
with an empty device case and the actual "qemu" command would run that, connect 
to it via QMP and instanciate everything.

That way we'd get a consistent interface for management apps while keeping an 
easy to use CLI.


Alex

[Qemu-devel] [PATCH] Compile ide/core only once

2010-03-24 Thread Blue Swirl

Make win2k install hack unconditional as it is still restricted to
x86 only in vl.c.

Replace TARGET_PAGE_SIZE with 4096 because that figure is already
used later.

Signed-off-by: Blue Swirl 
---
 Makefile.objs|1 +
 Makefile.target  |   11 ---
 default-configs/arm-softmmu.mak  |1 +
 default-configs/i386-softmmu.mak |1 +
 default-configs/mips-softmmu.mak |1 +
 default-configs/mips64-softmmu.mak   |1 +
 default-configs/mips64el-softmmu.mak |1 +
 default-configs/mipsel-softmmu.mak   |1 +
 default-configs/ppc-softmmu.mak  |1 +
 default-configs/ppc64-softmmu.mak|1 +
 default-configs/ppcemb-softmmu.mak   |1 +
 default-configs/sh4-softmmu.mak  |1 +
 default-configs/sh4eb-softmmu.mak|1 +
 default-configs/sparc64-softmmu.mak  |1 +
 default-configs/x86_64-softmmu.mak   |1 +
 hw/ide/core.c|   10 +++---
 vl.c |2 +-
 17 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/Makefile.objs b/Makefile.objs
index 281f7a6..fe81f6c 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -161,6 +161,7 @@ hw-obj-$(CONFIG_LAN9118) += lan9118.o
 hw-obj-$(CONFIG_NE2000_ISA) += ne2000-isa.o

 # IDE
+hw-obj-$(CONFIG_IDE_CORE) += ide/core.o
 hw-obj-$(CONFIG_IDE_QDEV) += ide/qdev.o
 hw-obj-$(CONFIG_IDE_PCI) += ide/pci.o
 hw-obj-$(CONFIG_IDE_ISA) += ide/isa.o
diff --git a/Makefile.target b/Makefile.target
index eb4d010..a17de90 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -181,8 +181,7 @@ obj-y += rtl8139.o
 obj-y += e1000.o

 # Hardware support
-obj-i386-y = ide/core.o
-obj-i386-y += pckbd.o dma.o
+obj-i386-y = pckbd.o dma.o
 obj-i386-y += vga.o
 obj-i386-y += mc146818rtc.o i8259.o pc.o
 obj-i386-y += cirrus_vga.o apic.o ioapic.o acpi.o piix_pci.o
@@ -191,7 +190,7 @@ obj-i386-y += device-hotplug.o pci-hotplug.o
smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o

 # shared objects
-obj-ppc-y = ppc.o ide/core.o ide/macio.o
+obj-ppc-y = ppc.o ide/macio.o
 obj-ppc-y += vga.o dma.o openpic.o
 # PREP target
 obj-ppc-y += pckbd.o i8259.o mc146818rtc.o
@@ -215,7 +214,6 @@ obj-mips-y += mips_addr.o mips_timer.o mips_int.o
 obj-mips-y += dma.o vga.o i8259.o rc4030.o
 obj-mips-y += vga-isa-mm.o
 obj-mips-y += g364fb.o jazz_led.o dp8393x.o
-obj-mips-y += ide/core.o
 obj-mips-y += gt64xxx.o pckbd.o mc146818rtc.o acpi.o ds1225y.o
 obj-mips-y += piix4.o cirrus_vga.o
 obj-mips-y += mipsnet.o
@@ -248,7 +246,6 @@ obj-cris-y += pflash_cfi02.o

 ifeq ($(TARGET_ARCH), sparc64)
 obj-sparc-y = sun4u.o pckbd.o apb_pci.o
-obj-sparc-y += ide/core.o
 obj-sparc-y += vga.o
 obj-sparc-y += mc146818rtc.o
 obj-sparc-y += cirrus_vga.o
@@ -268,7 +265,7 @@ obj-arm-y += arm-semi.o
 obj-arm-y += pxa2xx.o pxa2xx_pic.o pxa2xx_gpio.o pxa2xx_timer.o pxa2xx_dma.o
 obj-arm-y += pxa2xx_lcd.o pxa2xx_mmci.o pxa2xx_pcmcia.o pxa2xx_keypad.o
 obj-arm-y += pflash_cfi01.o gumstix.o
-obj-arm-y += zaurus.o ide/core.o ide/microdrive.o spitz.o tosa.o tc6393xb.o
+obj-arm-y += zaurus.o ide/microdrive.o spitz.o tosa.o tc6393xb.o
 obj-arm-y += omap1.o omap_lcdc.o omap_dma.o omap_clk.o omap_mmc.o omap_i2c.o
 obj-arm-y += omap2.o omap_dss.o soc_dma.o
 obj-arm-y += omap_sx1.o palm.o tsc210x.o
@@ -282,7 +279,7 @@ obj-arm-y += syborg_virtio.o

 obj-sh4-y = shix.o r2d.o sh7750.o sh7750_regnames.o tc58128.o
 obj-sh4-y += sh_timer.o sh_serial.o sh_intc.o sh_pci.o sm501.o
-obj-sh4-y += ide/core.o ide/mmio.o
+obj-sh4-y += ide/mmio.o

 obj-m68k-y = an5206.o mcf5206.o mcf_uart.o mcf_intc.o mcf5208.o mcf_fec.o
 obj-m68k-y += m68k-semi.o dummy_m68k.o
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index 02ad192..ea878a4 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -8,6 +8,7 @@ CONFIG_ECC=y
 CONFIG_SERIAL=y
 CONFIG_PTIMER=y
 CONFIG_SD=y
+CONFIG_IDE_CORE=y
 CONFIG_MAX7310=y
 CONFIG_WM8750=y
 CONFIG_TWL92230=y
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 4dbf656..59eb670 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -9,6 +9,7 @@ CONFIG_I8254=y
 CONFIG_PCSPK=y
 CONFIG_USB_UHCI=y
 CONFIG_FDC=y
+CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
 CONFIG_IDE_ISA=y
diff --git a/default-configs/mips-softmmu.mak b/default-configs/mips-softmmu.mak
index 345a093..cb48ed1 100644
--- a/default-configs/mips-softmmu.mak
+++ b/default-configs/mips-softmmu.mak
@@ -10,6 +10,7 @@ CONFIG_I8254=y
 CONFIG_PCSPK=y
 CONFIG_USB_UHCI=y
 CONFIG_FDC=y
+CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
 CONFIG_IDE_ISA=y
diff --git a/default-configs/mips64-softmmu.mak
b/default-configs/mips64-softmmu.mak
index 5900ee6..585d6bb 100644
--- a/default-configs/mips64-softmmu.mak
+++ b/default-configs/mips64-softmmu.mak
@@ -10,6 +10,7 @@ CONFIG_I8254=y
 CONFIG_PCSPK=y
 CONFIG_USB_UHCI=y
 CONFIG_FDC=y
+CONFIG_IDE_CORE=y
 CONFIG_IDE_QDEV=y
 CONFIG_IDE_PCI=y
 CONFIG_I

[Qemu-devel] x86_64: iret in long mode resets %fs and %gs base (doesn't on real CPUs)

2010-03-24 Thread Vegard Nossum

Hi,

I've been investigating why some of my code failed on qemu, but
succeeded in bochs and on real hardware. In particular, it turns out
that qemu would reset the FS/GS_BASE_MSR whenever I did iret from ring
0 to 3.

I traced it down to this bit of code (in target-i386/op_helper.c):

static inline void validate_seg(int seg_reg, int cpl)
{
int dpl;
uint32_t e2;

/* XXX: on x86_64, we do not want to nullify FS and GS because
   they may still contain a valid base. I would be interested to
   know how a real x86_64 CPU behaves */
if ((seg_reg == R_FS || seg_reg == R_GS) &&
(env->segs[seg_reg].selector & 0xfffc) == 0)
return;

So the reason why this didn't work in qemu for me was that I was
loading the selector as 8 -- which fails the above test and
validate_seg() proceeds to clear the segment base value. Changing my
own code to only load 0 into %gs from the start fixed the problem for
me.

However, qemu is clearly doing something differently from the real
hardware. I tested both versions (loading 0 or 8 into %gs) on my Intel
P4, and GS_BASE_MSR is preserved in both cases. Perhaps the condition
on the selector value should be removed? (Or perhaps the calls to
validate_seg() for R_FS/R_GS should be removed from
helper_ret_protected()?)

Just a heads up.


Vegard

Re: [Qemu-devel] [PATCH] qemu: jaso-parser: Output the content of invalid keyword

2010-03-24 Thread Luiz Capitulino

On Wed, 24 Mar 2010 17:00:14 +0100
Markus Armbruster  wrote:

> Amos Kong  writes:
> 
> > When input some invialid word 'unknowcmd' through QMP port, qemu outputs 
> > this
> > error message:
> > "parse error: invalid keyword `%s'"
> > This patch makes qemu output the content of invalid keyword, like:
> > "parse error: invalid keyword `unknowcmd'"
> >
> > Signed-off-by: Amos Kong 
> 
> Looks good to me.
> 
> Hint: it's best to put a version in the subject when you respin, like
> [PATCH v2] ...

 Yes, and maintainers may miss a patch down a thread (and it's a good
opportunity to fix the subject).

[Qemu-devel] Re: [PATCH 10/15] virtio-serial: Add QMP events for failed port/device add

2010-03-24 Thread Luiz Capitulino

On Wed, 24 Mar 2010 20:19:28 +0530
Amit Shah  wrote:

> When adding a port or a device to the guest fails, management software
> might be interested in knowing and then cleaning up the host-side of the
> port. Introduce QMP events to signal such errors.

 I'm completely unfamiliar with virtio-serial, so let me ask: how are ports
added? I'd expect the command performing this operation to fail in this case.

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Anthony Liguori


On 03/24/2010 03:12 PM, Luiz Capitulino wrote:

On Wed, 24 Mar 2010 21:49:45 +0200
Avi Kivity  wrote:

   

On 03/24/2010 06:42 PM, Luiz Capitulino wrote:
 

On Wed, 24 Mar 2010 12:42:16 +0200
Avi Kivity   wrote:


   

So, at best qemud is a toy for people who are annoyed by libvirt.

 

   Is the reason for doing this in qemu because libvirt is annoying?
   

Mostly.

 

I don't see
how adding yet another layer/daemon is going to improve ours and user's life
(the same applies for libqemu).

   

libvirt becomes optional.
 

  I think it should only be optional if all you want is to run a single VM
in this case what seems to be missing on our side is a _real_ GUI, bundled
with QEMU potentially written in a high-level language.
   


That's a separate problem.


  Then we make virt-manager optional and this is good because we can sync
features way faster and we don't have to care about _managing_ several
VMs, our world in terms of usability and maintainability is about one VM.

  IMVHO, everything else should be done by third-party tools like libvirt,
we just provide the means for it.
   


We need to have a common management interface for third party tools.  
libvirt cannot be that today because of the fact that it doesn't support 
all of our features.  What we need to figure out is how we can work with 
the libvirt team to fix this.


So far, a libqemu.so with a flexible transport that could be used 
directly by a libvirt user (ala cairo/gdk type interactions) seems like 
the best solution to me.


Regards,

Anthony Liguori

[Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Blue Swirl

On 3/24/10, Michael S. Tsirkin  wrote:
> On Wed, Mar 24, 2010 at 10:24:12PM +0200, Blue Swirl wrote:
>  > On 3/24/10, Michael S. Tsirkin  wrote:
>  > > On Wed, Mar 24, 2010 at 10:05:10PM +0200, Blue Swirl wrote:
>  > >  > On 3/24/10, Michael S. Tsirkin  wrote:
>  > >  > > On Tue, Mar 23, 2010 at 11:43:51PM +0200, Blue Swirl wrote:
>  > >  > >  > rtl8139.c, e1000.c: need to convert ldl/stl to 
> cpu_physical_memory_read/write.
>  > >  > >
>  > >  > >
>  > >  > > I don't see how it would help. These still get target_phys_addr_t 
> which
>  > >  > >  is per-target. Further, a ton of devices do
>  > >  > >  cpu_register_physical_memory/qemu_register_coalesced_mmio.
>  > >  > >  These are also per target.
>  > >  >
>  > >  > I don't know what I was eating yesterday: there are no references to
>  > >  > ldl/stl in either rtl8139 or e1000. In fact, the conversion is simple
>  > >  > for the device itself, just add a property "be". The attached patch
>  > >  > performs this part.
>  > >  >
>  > >  > But now there is a bigger problem, how to pass the property to the
>  > >  > device. It's not fair to require the user to remember to set it.
>  > >
>  > >
>  > > I still don't fully understand how come e1000 cares about
>  > >  target endianness.
>  >
>  > It shouldn't. Maybe the real fix is to remove the byte swapping.
>
>
> Presumably it's there for a reason?
>
>
>  > >  > >  A simple solution would be to change all of cpu_XX functions to
>  > >  > >  get a 64 bit address. This is a lot of churn, if we do this
>  > >  > >  anyway we should also pass length to callbacks, this way
>  > >  > >  rwhandler will get very small or go away completely.
>  > >  >
>  > >  > It's not too much effort to keep the target_phys_addr_t type.
>  > >
>  > >
>  > > I don't understand - target_phys_addr_t is different for different
>  > >  targets to we will need to recompile the code for each target.
>  > >  What am I missing?
>  >
>  > target_phys_addr_t is 64 bit on a 64 bit host, on a 32 bit host it's
>  > size will be either 64 or 32 bits depending on the target. So the
>  > files are compiled once on 64 bit host, twice on 32 bit host if both
>  > 32 and 64 bits targets are selected.
>
>
> How about just making it 64 bit unconditionally?
>  How much do we save by using a 32 bit address value?

On a 32 bit host, probably a lot because of register pressure. And
it's not too much effort to keep the target_phys_addr_t type logic.

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Anthony Liguori

On 03/24/2010 03:24 PM, Blue Swirl wrote:

On 3/24/10, Michael S. Tsirkin  wrote:

On Wed, Mar 24, 2010 at 10:05:10PM +0200, Blue Swirl wrote:
  >  On 3/24/10, Michael S. Tsirkin  wrote:
  >  >  On Tue, Mar 23, 2010 at 11:43:51PM +0200, Blue Swirl wrote:
  >  >   >  rtl8139.c, e1000.c: need to convert ldl/stl to 
cpu_physical_memory_read/write.
  >  >
  >  >
  >  >  I don't see how it would help. These still get target_phys_addr_t which
  >  >   is per-target. Further, a ton of devices do
  >  >   cpu_register_physical_memory/qemu_register_coalesced_mmio.
  >  >   These are also per target.
  >
  >  I don't know what I was eating yesterday: there are no references to
  >  ldl/stl in either rtl8139 or e1000. In fact, the conversion is simple
  >  for the device itself, just add a property "be". The attached patch
  >  performs this part.
  >
  >  But now there is a bigger problem, how to pass the property to the
  >  device. It's not fair to require the user to remember to set it.

I still don't fully understand how come e1000 cares about
  target endianness.

It shouldn't. Maybe the real fix is to remove the byte swapping.

My previous pci memory functions patches removed the byte swapping.

The problem is that PCI devices are going to operate in little endian 
mode (usually) whereas the CPU may be acting in big endian mode.  We 
need to do a byte swap somewhere but the better place to do it is in the 
PCI bus layer.

Regards,

Anthony Liguori

[Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Michael S. Tsirkin

On Wed, Mar 24, 2010 at 10:24:12PM +0200, Blue Swirl wrote:
> On 3/24/10, Michael S. Tsirkin  wrote:
> > On Wed, Mar 24, 2010 at 10:05:10PM +0200, Blue Swirl wrote:
> >  > On 3/24/10, Michael S. Tsirkin  wrote:
> >  > > On Tue, Mar 23, 2010 at 11:43:51PM +0200, Blue Swirl wrote:
> >  > >  > rtl8139.c, e1000.c: need to convert ldl/stl to 
> > cpu_physical_memory_read/write.
> >  > >
> >  > >
> >  > > I don't see how it would help. These still get target_phys_addr_t which
> >  > >  is per-target. Further, a ton of devices do
> >  > >  cpu_register_physical_memory/qemu_register_coalesced_mmio.
> >  > >  These are also per target.
> >  >
> >  > I don't know what I was eating yesterday: there are no references to
> >  > ldl/stl in either rtl8139 or e1000. In fact, the conversion is simple
> >  > for the device itself, just add a property "be". The attached patch
> >  > performs this part.
> >  >
> >  > But now there is a bigger problem, how to pass the property to the
> >  > device. It's not fair to require the user to remember to set it.
> >
> >
> > I still don't fully understand how come e1000 cares about
> >  target endianness.
> 
> It shouldn't. Maybe the real fix is to remove the byte swapping.

Presumably it's there for a reason?

> >  > >  A simple solution would be to change all of cpu_XX functions to
> >  > >  get a 64 bit address. This is a lot of churn, if we do this
> >  > >  anyway we should also pass length to callbacks, this way
> >  > >  rwhandler will get very small or go away completely.
> >  >
> >  > It's not too much effort to keep the target_phys_addr_t type.
> >
> >
> > I don't understand - target_phys_addr_t is different for different
> >  targets to we will need to recompile the code for each target.
> >  What am I missing?
> 
> target_phys_addr_t is 64 bit on a 64 bit host, on a 32 bit host it's
> size will be either 64 or 32 bits depending on the target. So the
> files are compiled once on 64 bit host, twice on 32 bit host if both
> 32 and 64 bits targets are selected.

How about just making it 64 bit unconditionally?
How much do we save by using a 32 bit address value?

-- 
MST

[Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Blue Swirl

On 3/24/10, Michael S. Tsirkin  wrote:
> On Wed, Mar 24, 2010 at 10:05:10PM +0200, Blue Swirl wrote:
>  > On 3/24/10, Michael S. Tsirkin  wrote:
>  > > On Tue, Mar 23, 2010 at 11:43:51PM +0200, Blue Swirl wrote:
>  > >  > rtl8139.c, e1000.c: need to convert ldl/stl to 
> cpu_physical_memory_read/write.
>  > >
>  > >
>  > > I don't see how it would help. These still get target_phys_addr_t which
>  > >  is per-target. Further, a ton of devices do
>  > >  cpu_register_physical_memory/qemu_register_coalesced_mmio.
>  > >  These are also per target.
>  >
>  > I don't know what I was eating yesterday: there are no references to
>  > ldl/stl in either rtl8139 or e1000. In fact, the conversion is simple
>  > for the device itself, just add a property "be". The attached patch
>  > performs this part.
>  >
>  > But now there is a bigger problem, how to pass the property to the
>  > device. It's not fair to require the user to remember to set it.
>
>
> I still don't fully understand how come e1000 cares about
>  target endianness.

It shouldn't. Maybe the real fix is to remove the byte swapping.

>  > >  A simple solution would be to change all of cpu_XX functions to
>  > >  get a 64 bit address. This is a lot of churn, if we do this
>  > >  anyway we should also pass length to callbacks, this way
>  > >  rwhandler will get very small or go away completely.
>  >
>  > It's not too much effort to keep the target_phys_addr_t type.
>
>
> I don't understand - target_phys_addr_t is different for different
>  targets to we will need to recompile the code for each target.
>  What am I missing?

target_phys_addr_t is 64 bit on a 64 bit host, on a 32 bit host it's
size will be either 64 or 32 bits depending on the target. So the
files are compiled once on 64 bit host, twice on 32 bit host if both
32 and 64 bits targets are selected.

>  > From e0ab5cc41c68207be558ccb330f4fb83fba4ee6f Mon Sep 17 00:00:00 2001
>  > From: Blue Swirl 
>  > Date: Wed, 24 Mar 2010 19:54:05 +
>  > Subject: [PATCH] Compile rtl8139 and e1000 only once
>  >
>  > WIP
>  >
>  > Signed-off-by: Blue Swirl 
>  > ---
>  >  Makefile.objs   |2 +
>  >  Makefile.target |4 --
>  >  hw/e1000.c  |  108 
> ++
>  >  hw/rtl8139.c|   82 +++---
>  >  4 files changed, 147 insertions(+), 49 deletions(-)
>  >
>  > diff --git a/Makefile.objs b/Makefile.objs
>  > index 281f7a6..54895f8 100644
>  > --- a/Makefile.objs
>  > +++ b/Makefile.objs
>  > @@ -155,6 +155,8 @@ hw-obj-y += msix.o
>  >  hw-obj-y += ne2000.o
>  >  hw-obj-y += eepro100.o
>  >  hw-obj-y += pcnet.o
>  > +hw-obj-y += rtl8139.o
>  > +hw-obj-y += e1000.o
>  >
>  >  hw-obj-$(CONFIG_SMC91C111) += smc91c111.o
>  >  hw-obj-$(CONFIG_LAN9118) += lan9118.o
>  > diff --git a/Makefile.target b/Makefile.target
>  > index eb4d010..1a86fc4 100644
>  > --- a/Makefile.target
>  > +++ b/Makefile.target
>  > @@ -176,10 +176,6 @@ QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
>  >  # xen backend driver support
>  >  obj-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o
>  >
>  > -# PCI network cards
>  > -obj-y += rtl8139.o
>  > -obj-y += e1000.o
>  > -
>  >  # Hardware support
>  >  obj-i386-y = ide/core.o
>  >  obj-i386-y += pckbd.o dma.o
>  > diff --git a/hw/e1000.c b/hw/e1000.c
>  > index fd3059a..0f72db8 100644
>  > --- a/hw/e1000.c
>  > +++ b/hw/e1000.c
>  > @@ -121,6 +121,7 @@ typedef struct E1000State_st {
>  >  uint16_t reading;
>  >  uint32_t old_eecd;
>  >  } eecd_state;
>  > +uint32_t be;
>  >  } E1000State;
>  >
>  >  #define  defreg(x)   x = (E1000_##x>>2)
>  > @@ -825,14 +826,11 @@ static void (*macreg_writeops[])(E1000State *, int, 
> uint32_t) = {
>  >  enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
>  >
>  >  static void
>  > -e1000_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
>  > +e1000_mmio_writel_le(void *opaque, target_phys_addr_t addr, uint32_t val)
>  >  {
>  >  E1000State *s = opaque;
>  >  unsigned int index = (addr & 0x1) >> 2;
>  >
>  > -#ifdef TARGET_WORDS_BIGENDIAN
>  > -val = bswap32(val);
>  > -#endif
>  >  if (index < NWRITEOPS && macreg_writeops[index])
>  >  macreg_writeops[index](s, index, val);
>  >  else if (index < NREADOPS && macreg_readops[index])
>  > @@ -841,25 +839,47 @@ e1000_mmio_writel(void *opaque, target_phys_addr_t 
> addr, uint32_t val)
>  >  DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08x\n",
>  > index<<2, val);
>  >  }
>  > +static void
>  > +e1000_mmio_writel_be(void *opaque, target_phys_addr_t addr, uint32_t val)
>  > +{
>  > +val = bswap32(val);
>  > +e1000_mmio_writel_le(opaque, addr, val);
>  > +}
>  >
>  >  static void
>  > -e1000_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
>  > +e1000_mmio_writew_le(void *opaque, target_phys_addr_t addr, uint32_t val)
>  >  {
>  >  // emulate hw without byte enables: no RMW
>  > -e10

[Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Michael S. Tsirkin

On Wed, Mar 24, 2010 at 10:05:10PM +0200, Blue Swirl wrote:
> On 3/24/10, Michael S. Tsirkin  wrote:
> > On Tue, Mar 23, 2010 at 11:43:51PM +0200, Blue Swirl wrote:
> >  > rtl8139.c, e1000.c: need to convert ldl/stl to 
> > cpu_physical_memory_read/write.
> >
> >
> > I don't see how it would help. These still get target_phys_addr_t which
> >  is per-target. Further, a ton of devices do
> >  cpu_register_physical_memory/qemu_register_coalesced_mmio.
> >  These are also per target.
> 
> I don't know what I was eating yesterday: there are no references to
> ldl/stl in either rtl8139 or e1000. In fact, the conversion is simple
> for the device itself, just add a property "be". The attached patch
> performs this part.
> 
> But now there is a bigger problem, how to pass the property to the
> device. It's not fair to require the user to remember to set it.

I still don't fully understand how come e1000 cares about
target endianness.

> >  A simple solution would be to change all of cpu_XX functions to
> >  get a 64 bit address. This is a lot of churn, if we do this
> >  anyway we should also pass length to callbacks, this way
> >  rwhandler will get very small or go away completely.
> 
> It's not too much effort to keep the target_phys_addr_t type.

I don't understand - target_phys_addr_t is different for different
targets to we will need to recompile the code for each target.
What am I missing?


> From e0ab5cc41c68207be558ccb330f4fb83fba4ee6f Mon Sep 17 00:00:00 2001
> From: Blue Swirl 
> Date: Wed, 24 Mar 2010 19:54:05 +
> Subject: [PATCH] Compile rtl8139 and e1000 only once
> 
> WIP
> 
> Signed-off-by: Blue Swirl 
> ---
>  Makefile.objs   |2 +
>  Makefile.target |4 --
>  hw/e1000.c  |  108 ++
>  hw/rtl8139.c|   82 +++---
>  4 files changed, 147 insertions(+), 49 deletions(-)
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index 281f7a6..54895f8 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -155,6 +155,8 @@ hw-obj-y += msix.o
>  hw-obj-y += ne2000.o
>  hw-obj-y += eepro100.o
>  hw-obj-y += pcnet.o
> +hw-obj-y += rtl8139.o
> +hw-obj-y += e1000.o
>  
>  hw-obj-$(CONFIG_SMC91C111) += smc91c111.o
>  hw-obj-$(CONFIG_LAN9118) += lan9118.o
> diff --git a/Makefile.target b/Makefile.target
> index eb4d010..1a86fc4 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -176,10 +176,6 @@ QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
>  # xen backend driver support
>  obj-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o
>  
> -# PCI network cards
> -obj-y += rtl8139.o
> -obj-y += e1000.o
> -
>  # Hardware support
>  obj-i386-y = ide/core.o
>  obj-i386-y += pckbd.o dma.o
> diff --git a/hw/e1000.c b/hw/e1000.c
> index fd3059a..0f72db8 100644
> --- a/hw/e1000.c
> +++ b/hw/e1000.c
> @@ -121,6 +121,7 @@ typedef struct E1000State_st {
>  uint16_t reading;
>  uint32_t old_eecd;
>  } eecd_state;
> +uint32_t be;
>  } E1000State;
>  
>  #define  defreg(x)   x = (E1000_##x>>2)
> @@ -825,14 +826,11 @@ static void (*macreg_writeops[])(E1000State *, int, 
> uint32_t) = {
>  enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
>  
>  static void
> -e1000_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> +e1000_mmio_writel_le(void *opaque, target_phys_addr_t addr, uint32_t val)
>  {
>  E1000State *s = opaque;
>  unsigned int index = (addr & 0x1) >> 2;
>  
> -#ifdef TARGET_WORDS_BIGENDIAN
> -val = bswap32(val);
> -#endif
>  if (index < NWRITEOPS && macreg_writeops[index])
>  macreg_writeops[index](s, index, val);
>  else if (index < NREADOPS && macreg_readops[index])
> @@ -841,25 +839,47 @@ e1000_mmio_writel(void *opaque, target_phys_addr_t 
> addr, uint32_t val)
>  DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08x\n",
> index<<2, val);
>  }
> +static void
> +e1000_mmio_writel_be(void *opaque, target_phys_addr_t addr, uint32_t val)
> +{
> +val = bswap32(val);
> +e1000_mmio_writel_le(opaque, addr, val);
> +}
>  
>  static void
> -e1000_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
> +e1000_mmio_writew_le(void *opaque, target_phys_addr_t addr, uint32_t val)
>  {
>  // emulate hw without byte enables: no RMW
> -e1000_mmio_writel(opaque, addr & ~3,
> -  (val & 0x) << (8*(addr & 3)));
> +e1000_mmio_writel_le(opaque, addr & ~3,
> + (val & 0x) << (8*(addr & 3)));
>  }
>  
>  static void
> -e1000_mmio_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
> +e1000_mmio_writew_be(void *opaque, target_phys_addr_t addr, uint32_t val)
>  {
>  // emulate hw without byte enables: no RMW
> -e1000_mmio_writel(opaque, addr & ~3,
> -  (val & 0xff) << (8*(addr & 3)));
> +e1000_mmio_writel_be(opaque, addr & ~3,
> + (val & 0x) << (8*(addr & 3)));
> +}
> +
> +static void
> +e1000_mmio

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Richard Henderson

On 03/24/2010 10:07 AM, Richard Henderson wrote:
> struct CPUSmallCommonState
> {
> // most of the stuff from CPU_COMMON.
> // sorted for some thought of padding elimination.  ;-)
> };
> 
> struct CPULargeCommonState
> {
> CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];
> target_phys_addr_t iotlb[NB_MMU_MODES][CPU_TLB_SIZE];
> struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
> jmp_buf jmp_env;
> };
...
> Now.  If you're compiling a file for which cpu-specific code is ok:
> 
> register CPUXYZLargeState *env __asm__(AREG0);
> #define ENV_SMALL_COMMON_STATE(&env->s.common_s)
> #define ENV_LARGE_COMMON_STATE(&env->common_l)
> 
> If you're compiling a file which is supposed to be independant of cpu:
> 
> register CPUSmallCommonState *env __asm__(AREG0);
> #define ENV_SMALL_COMMON_STATE(env)
> #define ENV_LARGE_COMMON_STATE((CPULargeCommonState *)((char *)env + 
> cpu_large_state_offset))

On 03/24/2010 11:00 AM, Blue Swirl wrote:
> One trick is to define a fixed offset (about half CPUState size) and
> make env point to CPUState + offset. Then the negative part of the
> offset space can be used efficiently. But this just doubles the space
> that can be accessed fast, so it's not a big win.

A good idea.  We can eliminate the cpu_large_state_offset from above via:

struct CPUSmallCommonState
{
// most of the stuff from CPU_COMMON.
} __attribute__((aligned));

struct CPUXYZPrivateState
{
// all the cpu-specific stuff
};

struct CPUXYZSmallState
{
CPUXYZPrivateState p;
CPUSmallCommonState s;
};

struct CPUXYZAllState
{
CPUXYZSmallState s;
CPULargeCommonState l;  // ARG0 register points here.
};

register void *biased_env __asm__(AREG0);

static inline CPUXYZPrivateState *get_env_cpu_private(void)
{
return &((CPUXYZSmallState *)biased_env - 1)->p;
}

static inline CPUSmallCommonState *get_env_common_small(void)
{
return (CPUSmallCommonState *)biased_env - 1;
}

static inline CPULargeCommonState *get_env_common_large(void)
{
return (CPULargeCommonState *)biased_env;
}


r~

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Luiz Capitulino

On Wed, 24 Mar 2010 21:49:45 +0200
Avi Kivity  wrote:

> On 03/24/2010 06:42 PM, Luiz Capitulino wrote:
> > On Wed, 24 Mar 2010 12:42:16 +0200
> > Avi Kivity  wrote:
> >
> >
> >> So, at best qemud is a toy for people who are annoyed by libvirt.
> >>  
> >   Is the reason for doing this in qemu because libvirt is annoying?
> 
> Mostly.
> 
> > I don't see
> > how adding yet another layer/daemon is going to improve ours and user's life
> > (the same applies for libqemu).
> >
> 
> libvirt becomes optional.

 I think it should only be optional if all you want is to run a single VM
in this case what seems to be missing on our side is a _real_ GUI, bundled
with QEMU potentially written in a high-level language.

 Then we make virt-manager optional and this is good because we can sync
features way faster and we don't have to care about _managing_ several
VMs, our world in terms of usability and maintainability is about one VM.

 IMVHO, everything else should be done by third-party tools like libvirt,
we just provide the means for it.

[Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Blue Swirl

On 3/24/10, Michael S. Tsirkin  wrote:
> On Tue, Mar 23, 2010 at 11:43:51PM +0200, Blue Swirl wrote:
>  > rtl8139.c, e1000.c: need to convert ldl/stl to 
> cpu_physical_memory_read/write.
>
>
> I don't see how it would help. These still get target_phys_addr_t which
>  is per-target. Further, a ton of devices do
>  cpu_register_physical_memory/qemu_register_coalesced_mmio.
>  These are also per target.

I don't know what I was eating yesterday: there are no references to
ldl/stl in either rtl8139 or e1000. In fact, the conversion is simple
for the device itself, just add a property "be". The attached patch
performs this part.

But now there is a bigger problem, how to pass the property to the
device. It's not fair to require the user to remember to set it.

>  A simple solution would be to change all of cpu_XX functions to
>  get a 64 bit address. This is a lot of churn, if we do this
>  anyway we should also pass length to callbacks, this way
>  rwhandler will get very small or go away completely.

It's not too much effort to keep the target_phys_addr_t type.
From e0ab5cc41c68207be558ccb330f4fb83fba4ee6f Mon Sep 17 00:00:00 2001
From: Blue Swirl 
Date: Wed, 24 Mar 2010 19:54:05 +
Subject: [PATCH] Compile rtl8139 and e1000 only once

WIP

Signed-off-by: Blue Swirl 
---
 Makefile.objs   |2 +
 Makefile.target |4 --
 hw/e1000.c  |  108 ++
 hw/rtl8139.c|   82 +++---
 4 files changed, 147 insertions(+), 49 deletions(-)

diff --git a/Makefile.objs b/Makefile.objs
index 281f7a6..54895f8 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -155,6 +155,8 @@ hw-obj-y += msix.o
 hw-obj-y += ne2000.o
 hw-obj-y += eepro100.o
 hw-obj-y += pcnet.o
+hw-obj-y += rtl8139.o
+hw-obj-y += e1000.o
 
 hw-obj-$(CONFIG_SMC91C111) += smc91c111.o
 hw-obj-$(CONFIG_LAN9118) += lan9118.o
diff --git a/Makefile.target b/Makefile.target
index eb4d010..1a86fc4 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -176,10 +176,6 @@ QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
 # xen backend driver support
 obj-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o
 
-# PCI network cards
-obj-y += rtl8139.o
-obj-y += e1000.o
-
 # Hardware support
 obj-i386-y = ide/core.o
 obj-i386-y += pckbd.o dma.o
diff --git a/hw/e1000.c b/hw/e1000.c
index fd3059a..0f72db8 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -121,6 +121,7 @@ typedef struct E1000State_st {
 uint16_t reading;
 uint32_t old_eecd;
 } eecd_state;
+uint32_t be;
 } E1000State;
 
 #define	defreg(x)	x = (E1000_##x>>2)
@@ -825,14 +826,11 @@ static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
 
 static void
-e1000_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+e1000_mmio_writel_le(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
 E1000State *s = opaque;
 unsigned int index = (addr & 0x1) >> 2;
 
-#ifdef TARGET_WORDS_BIGENDIAN
-val = bswap32(val);
-#endif
 if (index < NWRITEOPS && macreg_writeops[index])
 macreg_writeops[index](s, index, val);
 else if (index < NREADOPS && macreg_readops[index])
@@ -841,25 +839,47 @@ e1000_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08x\n",
index<<2, val);
 }
+static void
+e1000_mmio_writel_be(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+val = bswap32(val);
+e1000_mmio_writel_le(opaque, addr, val);
+}
 
 static void
-e1000_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+e1000_mmio_writew_le(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
 // emulate hw without byte enables: no RMW
-e1000_mmio_writel(opaque, addr & ~3,
-  (val & 0x) << (8*(addr & 3)));
+e1000_mmio_writel_le(opaque, addr & ~3,
+ (val & 0x) << (8*(addr & 3)));
 }
 
 static void
-e1000_mmio_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+e1000_mmio_writew_be(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
 // emulate hw without byte enables: no RMW
-e1000_mmio_writel(opaque, addr & ~3,
-  (val & 0xff) << (8*(addr & 3)));
+e1000_mmio_writel_be(opaque, addr & ~3,
+ (val & 0x) << (8*(addr & 3)));
+}
+
+static void
+e1000_mmio_writeb_be(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+// emulate hw without byte enables: no RMW
+e1000_mmio_writel_be(opaque, addr & ~3,
+ (val & 0xff) << (8*(addr & 3)));
+}
+
+static void
+e1000_mmio_writeb_le(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+// emulate hw without byte enables: no RMW
+e1000_mmio_writel_le(opaque, addr & ~3,
+ (val & 0xff) << (8*(addr & 3)));
 }
 
 static uint32_t
-e1000_mmio_readl(void *opaque, target_phys_addr_t addr)
+e1000_mmio_readl_le(

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Avi Kivity


On 03/24/2010 06:42 PM, Luiz Capitulino wrote:

On Wed, 24 Mar 2010 12:42:16 +0200
Avi Kivity  wrote:

   

So, at best qemud is a toy for people who are annoyed by libvirt.
 

  Is the reason for doing this in qemu because libvirt is annoying?


Mostly.


I don't see
how adding yet another layer/daemon is going to improve ours and user's life
(the same applies for libqemu).
   


libvirt becomes optional.


  If I got it right, there were two complaints from the kvm-devel flamewar:

1. Qemu has usability problems
2. There's no way an external tool can get /proc/kallsyms info from Qemu

  I don't see how libqemu can help with 1) and having qemud doesn't seem
the best solution for 2) either.

  Still talking about 2), what's wrong in getting the PID or having a QMP
connection in a well known location as suggested by Anthony?
   


I now believe that's the best option.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

[Qemu-devel] [PATCH v2] update bochs vbe interface

2010-03-24 Thread Gerd Hoffmann

The bochs vbe interface got a new register a while back, which specifies
the linear framebuffer size in 64k units.  This patch adds support for
the new register to qemu.  With this patch applied vgabios 0.6c works
with qemu.

[ v2:  Don't savevm the new register.  Doing so breaks migration,
   and as it carries read-only information for the guest there
   is no need to save it. ]
---
 hw/vga.c |5 +++--
 hw/vga_int.h |6 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/hw/vga.c b/hw/vga.c
index 6a1a059..f9e07cf 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -1955,7 +1955,8 @@ void vga_common_reset(VGACommonState *s)
 #ifdef CONFIG_BOCHS_VBE
 s->vbe_index = 0;
 memset(s->vbe_regs, '\0', sizeof(s->vbe_regs));
-s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID0;
+s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID5;
+s->vbe_regs[VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = s->vram_size / (64 * 1024);
 s->vbe_start_addr = 0;
 s->vbe_line_offset = 0;
 s->vbe_bank_mask = (s->vram_size >> 16) - 1;
@@ -2215,7 +2216,7 @@ const VMStateDescription vmstate_vga_common = {
 VMSTATE_UINT8_EQUAL(is_vbe_vmstate, VGACommonState),
 #ifdef CONFIG_BOCHS_VBE
 VMSTATE_UINT16(vbe_index, VGACommonState),
-VMSTATE_UINT16_ARRAY(vbe_regs, VGACommonState, VBE_DISPI_INDEX_NB),
+VMSTATE_UINT16_ARRAY(vbe_regs, VGACommonState, 
VBE_DISPI_INDEX_NB_VMSTATE),
 VMSTATE_UINT32(vbe_start_addr, VGACommonState),
 VMSTATE_UINT32(vbe_line_offset, VGACommonState),
 VMSTATE_UINT32(vbe_bank_mask, VGACommonState),
diff --git a/hw/vga_int.h b/hw/vga_int.h
index 23a42ef..c3c5e21 100644
--- a/hw/vga_int.h
+++ b/hw/vga_int.h
@@ -47,13 +47,17 @@
 #define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7
 #define VBE_DISPI_INDEX_X_OFFSET0x8
 #define VBE_DISPI_INDEX_Y_OFFSET0x9
-#define VBE_DISPI_INDEX_NB  0xa
+#define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa
+
+#define VBE_DISPI_INDEX_NB_VMSTATE  0xa
+#define VBE_DISPI_INDEX_NB  0xb
 
 #define VBE_DISPI_ID0   0xB0C0
 #define VBE_DISPI_ID1   0xB0C1
 #define VBE_DISPI_ID2   0xB0C2
 #define VBE_DISPI_ID3   0xB0C3
 #define VBE_DISPI_ID4   0xB0C4
+#define VBE_DISPI_ID5   0xB0C5
 
 #define VBE_DISPI_DISABLED  0x00
 #define VBE_DISPI_ENABLED   0x01
-- 
1.6.6.1

[Qemu-devel] Re: [PATCH] update bochs vbe interface

2010-03-24 Thread Gerd Hoffmann


On 03/24/10 18:04, Juan Quintela wrote:

Gerd Hoffmann  wrote:

The bochs vbe interface got a new register a while back, which specifies
the linear framebuffer size in 64k units.  This patch adds support for
the new register to qemu.  With this patch applied vgabios 0.6c works
with qemu.

Signed-off-by: Gerd Hoffmann


It breaks migration.

vga.c:2218:VMSTATE_UINT16_ARRAY(vbe_regs, VGACommonState, 
VBE_DISPI_INDEX_NB),



2218 is the interesting line.  Can we freeze this patch until the
subsections stuff is done?


Yea, I see.  Well, the new register doesn't carry any state, it is 
read-only information for the guest.  So the easy way out is to simply 
not save it as we don't have to.


cheers,
  Gerd

Re: [Qemu-devel] [PATCH 2/2] machine opts framework

2010-03-24 Thread Anthony Liguori


On 03/24/2010 02:26 PM, Glauber Costa wrote:

This patch adds initial support for the -machine option, that allows
command line specification of machine attributes (always relying on safe
defaults). Besides its value per-se, it is the saner way we found to
allow for enabling/disabling of kvm's in-kernel irqchip.

A machine with in-kernel-irqchip could be specified as:
-machine irqchip=apic-kvm
And one without it:
-machine irqchip=apic

To demonstrate how it'd work, this patch introduces a choice between
"pic" and "apic", pic being the old-style isa thing.
   


I started from a different place.  See machine-qemuopts in my staging tree.

I think we should combine efforts.

Regards,

Anthony Liguori


---
  hw/boards.h |   10 ++
  hw/pc.c |   45 +++--
  qemu-config.c   |   16 
  qemu-config.h   |1 +
  qemu-options.hx |9 +
  vl.c|   54 ++
  6 files changed, 129 insertions(+), 6 deletions(-)

diff --git a/hw/boards.h b/hw/boards.h
index 6f0f0d7..831728c 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -12,6 +12,15 @@ typedef void QEMUMachineInitFunc(ram_addr_t ram_size,
   const char *initrd_filename,
   const char *cpu_model);

+typedef void (QEMUIrqchipFunc)(void *opaque);
+
+typedef struct QEMUIrqchip {
+const char *name;
+QEMUIrqchipFunc *init;
+int used;
+int is_default;
+} QEMUIrqchip;
+
  typedef struct QEMUMachine {
  const char *name;
  const char *alias;
@@ -28,6 +37,7 @@ typedef struct QEMUMachine {
  no_sdcard:1;
  int is_default;
  GlobalProperty *compat_props;
+QEMUIrqchip *irqchip;
  struct QEMUMachine *next;
  } QEMUMachine;

diff --git a/hw/pc.c b/hw/pc.c
index ba14df0..43ec022 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -752,21 +752,43 @@ int cpu_is_bsp(CPUState *env)
  return env->cpu_index == 0;
  }

+static void qemu_apic_init(void *opaque)
+{
+CPUState *env = opaque;
+if (!(env->cpuid_features&  CPUID_APIC)) {
+fprintf(stderr, "CPU lacks APIC cpuid flag\n");
+exit(1);
+}
+env->cpuid_apic_id = env->cpu_index;
+/* APIC reset callback resets cpu */
+apic_init(env);
+}
+
+static void qemu_pic_init(void *opaque)
+{
+CPUState *env = opaque;
+
+if (smp_cpus>  1) {
+fprintf(stderr, "PIC can't support smp systems\n");
+exit(1);
+}
+qemu_register_reset((QEMUResetHandler*)cpu_reset, env);
+}
+
  static CPUState *pc_new_cpu(const char *cpu_model)
  {
  CPUState *env;
+QEMUIrqchip *ic;

  env = cpu_init(cpu_model);
  if (!env) {
  fprintf(stderr, "Unable to find x86 CPU definition\n");
  exit(1);
  }
-if ((env->cpuid_features&  CPUID_APIC) || smp_cpus>  1) {
-env->cpuid_apic_id = env->cpu_index;
-/* APIC reset callback resets cpu */
-apic_init(env);
-} else {
-qemu_register_reset((QEMUResetHandler*)cpu_reset, env);
+
+for (ic = current_machine->irqchip; ic->name != NULL; ic++) {
+if (ic->used)
+ic->init(env);
  }
  return env;
  }
@@ -1074,6 +1096,17 @@ static QEMUMachine pc_machine = {
  .desc = "Standard PC",
  .init = pc_init_pci,
  .max_cpus = 255,
+.irqchip = (QEMUIrqchip[]){
+{
+.name = "apic",
+.init = qemu_apic_init,
+.is_default = 1,
+},{
+.name = "pic",
+.init = qemu_pic_init,
+},
+{ /* end of list */ },
+},
  .is_default = 1,
  };

diff --git a/qemu-config.c b/qemu-config.c
index 150157c..2b985a9 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -296,6 +296,21 @@ QemuOptsList qemu_cpudef_opts = {
  },
  };

+QemuOptsList qemu_machine_opts = {
+.name = "M",
+.head = QTAILQ_HEAD_INITIALIZER(qemu_machine_opts.head),
+.desc = {
+{
+.name = "mach",
+.type = QEMU_OPT_STRING,
+},{
+.name = "irqchip",
+.type = QEMU_OPT_STRING,
+},
+{ /* end of list */ }
+},
+};
+
  static QemuOptsList *lists[] = {
  &qemu_drive_opts,
  &qemu_chardev_opts,
@@ -306,6 +321,7 @@ static QemuOptsList *lists[] = {
  &qemu_global_opts,
  &qemu_mon_opts,
  &qemu_cpudef_opts,
+&qemu_machine_opts,
  NULL,
  };

diff --git a/qemu-config.h b/qemu-config.h
index f217c58..ea302f0 100644
--- a/qemu-config.h
+++ b/qemu-config.h
@@ -10,6 +10,7 @@ extern QemuOptsList qemu_rtc_opts;
  extern QemuOptsList qemu_global_opts;
  extern QemuOptsList qemu_mon_opts;
  extern QemuOptsList qemu_cpudef_opts;
+extern QemuOptsList qemu_machine_opts;

  QemuOptsList *qemu_find_opts(const char *group);
  int qemu_set_option(const char *str);
diff --git a/qemu-options.hx b/qemu-options.hx
index 8450b45..585ecd2 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@

[Qemu-devel] Re: [PATCH 3/3] monitor: Convert do_migrate() to QError

2010-03-24 Thread Luiz Capitulino

On Tue, 23 Mar 2010 19:07:21 +0100
Markus Armbruster  wrote:

> Human monitor error message changes from "unknown migration protocol:
> FOO" to "Invalid parameter uri".
> 
> The conversion is shallow: the FOO_start_outgoing_migration() aren't
> converted.  Converting them is a big job for relatively little
> practical benefit, so leave it for later.
> 
> Signed-off-by: Markus Armbruster 
> ---
>  migration.c |9 +
>  1 files changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/migration.c b/migration.c
> index 05f6cc5..47d2ab5 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -56,14 +56,14 @@ void qemu_start_incoming_migration(const char *uri)
>  
>  int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data)
>  {
> -MigrationState *s = NULL;
> +MigrationState *s;
>  const char *p;
>  int detach = qdict_get_int(qdict, "detach");
>  const char *uri = qdict_get_str(qdict, "uri");
>  
>  if (current_migration &&
>  current_migration->get_status(current_migration) == 
> MIG_STATE_ACTIVE) {
> -monitor_printf(mon, "migration already in progress\n");
> +qerror_report(QERR_MIGRATION_IN_PROGRESS);
>  return -1;
>  }

 What about QERR_OPERATION_IN_PROGRESS? So that we have:

"Operation already in progress: migration".

>  
> @@ -86,12 +86,13 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject 
> **ret_data)
>  (int)qdict_get_int(qdict, "inc"));
>  #endif
>  } else {
> -monitor_printf(mon, "unknown migration protocol: %s\n", uri);
> +qerror_report(QERR_INVALID_PARAMETER, "uri");
>  return -1;
>  }
>  
>  if (s == NULL) {
> -monitor_printf(mon, "migration failed\n");
> +/* TODO push error reporting into the FOO_start_outgoing_migration() 
> */
> +qerror_report(QERR_MIGRATION_FAILED);
>  return -1;
>  }

 I think this one is no better than the automatic UndefinedError
which is going to be triggered. I would only touch this when/if
we get the migration functions converted.

[Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Michael S. Tsirkin

On Tue, Mar 23, 2010 at 11:43:51PM +0200, Blue Swirl wrote:
> rtl8139.c, e1000.c: need to convert ldl/stl to cpu_physical_memory_read/write.

I don't see how it would help. These still get target_phys_addr_t which
is per-target. Further, a ton of devices do
cpu_register_physical_memory/qemu_register_coalesced_mmio.
These are also per target.

A simple solution would be to change all of cpu_XX functions to
get a 64 bit address. This is a lot of churn, if we do this
anyway we should also pass length to callbacks, this way
rwhandler will get very small or go away completely.

-- 
MST

[Qemu-devel] Re: [PATCH 2/4] monitor: New argument type 'b'

2010-03-24 Thread Luiz Capitulino

On Tue, 23 Mar 2010 11:27:56 +0100
Markus Armbruster  wrote:

> This is a boolean value.  Human monitor accepts "on" or "off".
> Consistent with option parsing (see parse_option_bool()).
> 
> Signed-off-by: Markus Armbruster 
> ---
>  monitor.c |   31 +++
>  1 files changed, 31 insertions(+), 0 deletions(-)
> 
> diff --git a/monitor.c b/monitor.c
> index 3ce9a4e..47b68a2 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -85,6 +85,8 @@
>   *
>   * '?'  optional type (for all types, except '/')
>   * '.'  other form of optional type (for 'i' and 'l')
> + * 'b'  boolean
> + *  user mode accepts "on" or "off"
>   * '-'  optional parameter (eg. '-f')
>   *
>   */
> @@ -3841,6 +3843,29 @@ static const mon_cmd_t *monitor_parse_command(Monitor 
> *mon,
>  qdict_put(qdict, key, qfloat_from_double(val));
>  }
>  break;
> +case 'b':
> +{
> +const char *beg;
> +int val;
> +
> +while (qemu_isspace(*p)) {
> +p++;
> +}
> +beg = p;
> +while (qemu_isgraph(*p)) {
> +p++;
> +}
> +if (!strncmp(beg, "on", p - beg)) {
> +val = 1;
> +} else if (!strncmp(beg, "off", p - beg)) {
> +val = 0;
> +} else {
> +monitor_printf(mon, "Expected 'on' or 'off'\n");
> +goto fail;
> +}

 This will make 'on' be the default when no on/off is specified, is that
your intention? I'm wondering if this can cause problems when you add
optional support for it and mixes it with other arguments.

> +qdict_put(qdict, key, qbool_from_int(val));
> +}
> +break;
>  case '-':
>  {
>  const char *tmp = p;
> @@ -4322,6 +4347,12 @@ static int check_arg(const CmdArgs *cmd_args, QDict 
> *args)
>  return -1;
>  }
>  break;
> +case 'b':
> +if (qobject_type(value) != QTYPE_QBOOL) {
> +qerror_report(QERR_INVALID_PARAMETER_TYPE, name, "bool");
> +return -1;
> +}
> +break;
>  case '-':
>  if (qobject_type(value) != QTYPE_QINT &&
>  qobject_type(value) != QTYPE_QBOOL) {

[Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Juan Quintela

Blue Swirl  wrote:
> On 3/24/10, Juan Quintela  wrote:
>> Blue Swirl  wrote:
>>  > Hi,
>>  >
>>  > Here's some planning for getting most files compiled as few times as
>>  > possible. Comments and suggestions are welcome.
>>
>>
>> I took some thought about this at some point.  Problems here start from
>>  "Recursive Makefile condered Harmful (tm)".
>>
>>  Look at how we jump through hops to be able to compile things in
>>  one/other side.
>>
>>  We have:
>>  Makefile
>>  Makefile.target (really lots of them, one for target)
>>  Makefile.hw
>>  Makefile.user
>>
>>  If we had only a single Makefile, things in this department would be
>>  much, much easier. And no, convert to a single Makefile is not trivial
>>  either, but it would make things easier.
>>
>>  Why do we have several Makefiles?  Because we want to compile each file
>>  with different options.
>>
>>  Why do we need to abuse so much VPATH?  Because we need to bring files
>>  randomly from $(ROOT), $(ROOT)/hw  $(ROOT)/$(TARGET).
>
> Would it help if the Makefiles were scattered to each directory, for
> example instead of Makefile.hw we had hw/Makefile?

The interesting one is Makefile.target, hw/Makefile could help, but that
is far away from where the action is.

If you look at Makefile.target from far enough, you will see that it
basically has:

libobj-$(CONFIG_FOO) = ...

ifdef CONFIG_LINUX_USER

endif

ifdef CONFIG_DARWIN_USER
...
endif

ifdef CONFIG_BSD_USER
...
endif

ifdef CONFIG_SOFTMMU
...
endif


The shared bits are very small (out of the libobj-y stuff).
Spliting the others in different Makefiles (or whatever is easy).  How
to get this ones compiled only once per BASE_ARCH/whatever should put us
near the goal of a single Makefile (and compiling each thing just the
number of times required).  Some thought is needed to know how to work here.

Actually, Anthony suggested at some point to just use 64 bits for
TARGET_PHYS_ADDR_BITS and remove the need for hw32/64.

I think that people emulationg 32bits on 32bits would suffer, but have
no clue how much.  Anthony, what was the idea?

>>  Problem with this proposal is that it is not trivial to do in little
>>  steps, and the real big advantages appear when you switch to a single
>>  Makefile at the end.
>
> I may have missed something, but the compile process doesn't care
> about boards, because all boards for some architecture (and therefore
> all devices used by all boards) are linked to a single
> per-architecture executable. So why introduce the boards concept to
> Makefiles?

I missunderstood this bit of your previous message:

> The target dependent cases should be next. On full build, each MIPS
> device file gets compiled four times, PPC files three times and x86
> twice. The devices for architectures that are compiled only once (ARM,
> Cris, Sparc32 etc.) do not need to be touched.

I was refering to this ones, but somehow got confused with boards :(

>
>>  > vl.c: a lot of work. Maybe the CPUState stuff should be separated to a 
>> new file.
>>
>>
>> That should just be a rule in Documentation.  You can't but anything
>>  else in vl.c.  If you move anything out of vl.c (see timers work from
>>  bonzini for example), you get a wild card for free commit bypassing
>>  maintainers or some similar price :)
>
> Cleaning up vl.c would be great, but just for purpose of single
> compilation, it's enough to put the CPUState pieces to a target
> dependent file (cpu-common.c?) and compile the rest once by making
> TARGET_xxx conditional code unconditional. This may still be doable.

I haven't looked at detail at this :(

>>  
>>
>>  I haven't really looked at them at depth.
>>
>>  I looked when I cleaned up the build system, I thought how to do the
>>  next step (outlined before), but got sidetracked by other more urgent
>>  things.
>
> Thanks for the comments.

You are welcome.

Later, Juan.

[Qemu-devel] [PATCH 1/2] early set current_machine

2010-03-24 Thread Glauber Costa

this way, the machine_init function itself can know which machine is current
in use, not only the late init code.
---
 vl.c |5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/vl.c b/vl.c
index d69250c..ceddeac 100644
--- a/vl.c
+++ b/vl.c
@@ -4841,6 +4841,9 @@ int main(int argc, char **argv, char **envp)
 }
 qemu_add_globals();
 
+
+current_machine = machine;
+
 machine->init(ram_size, boot_devices,
   kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
 
@@ -4859,8 +4862,6 @@ int main(int argc, char **argv, char **envp)
 }
 }
 
-current_machine = machine;
-
 /* init USB devices */
 if (usb_enabled) {
 if (foreach_device_config(DEV_USB, usb_parse) < 0)
-- 
1.6.6

[Qemu-devel] [PATCH 0/2] machine opts framework

2010-03-24 Thread Glauber Costa

This patch adds initial support for the -machine option, that allows
command line specification of machine attributes (always relying on safe
defaults). Besides its value per-se, it is the saner way we found to
allow for enabling/disabling of kvm's in-kernel irqchip.

A machine with in-kernel-irqchip could be specified as:
-machine irqchip=apic-kvm
And one without it:
-machine irqchip=apic

To demonstrate how it'd work, this patch introduces a choice between
"pic" and "apic", pic being the old-style isa thing. It does introduce
a behavioral change, however: In old code, pic would always be a fallback.
Now, it will have to be explicitly selected. I believe it is better behaviour,
but this is not the most important part of it, so I can easily go back
if people want it out.

Let the flames begin!

Glauber Costa (2):
  early set current_machine
  machine opts framework

 hw/boards.h |   10 +
 hw/pc.c |   45 -
 qemu-config.c   |   16 ++
 qemu-config.h   |1 +
 qemu-options.hx |9 
 vl.c|   59 +-
 6 files changed, 132 insertions(+), 8 deletions(-)

[Qemu-devel] [PATCH 2/2] machine opts framework

2010-03-24 Thread Glauber Costa

This patch adds initial support for the -machine option, that allows
command line specification of machine attributes (always relying on safe
defaults). Besides its value per-se, it is the saner way we found to
allow for enabling/disabling of kvm's in-kernel irqchip.

A machine with in-kernel-irqchip could be specified as:
-machine irqchip=apic-kvm
And one without it:
-machine irqchip=apic

To demonstrate how it'd work, this patch introduces a choice between
"pic" and "apic", pic being the old-style isa thing. 

---
 hw/boards.h |   10 ++
 hw/pc.c |   45 +++--
 qemu-config.c   |   16 
 qemu-config.h   |1 +
 qemu-options.hx |9 +
 vl.c|   54 ++
 6 files changed, 129 insertions(+), 6 deletions(-)

diff --git a/hw/boards.h b/hw/boards.h
index 6f0f0d7..831728c 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -12,6 +12,15 @@ typedef void QEMUMachineInitFunc(ram_addr_t ram_size,
  const char *initrd_filename,
  const char *cpu_model);
 
+typedef void (QEMUIrqchipFunc)(void *opaque);
+
+typedef struct QEMUIrqchip {
+const char *name;
+QEMUIrqchipFunc *init; 
+int used;
+int is_default;
+} QEMUIrqchip;
+
 typedef struct QEMUMachine {
 const char *name;
 const char *alias;
@@ -28,6 +37,7 @@ typedef struct QEMUMachine {
 no_sdcard:1;
 int is_default;
 GlobalProperty *compat_props;
+QEMUIrqchip *irqchip;
 struct QEMUMachine *next;
 } QEMUMachine;
 
diff --git a/hw/pc.c b/hw/pc.c
index ba14df0..43ec022 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -752,21 +752,43 @@ int cpu_is_bsp(CPUState *env)
 return env->cpu_index == 0;
 }
 
+static void qemu_apic_init(void *opaque)
+{
+CPUState *env = opaque;
+if (!(env->cpuid_features & CPUID_APIC)) {
+fprintf(stderr, "CPU lacks APIC cpuid flag\n");
+exit(1);
+}
+env->cpuid_apic_id = env->cpu_index;
+/* APIC reset callback resets cpu */
+apic_init(env);
+}
+
+static void qemu_pic_init(void *opaque)
+{
+CPUState *env = opaque;
+
+if (smp_cpus > 1) {
+fprintf(stderr, "PIC can't support smp systems\n");
+exit(1);
+}
+qemu_register_reset((QEMUResetHandler*)cpu_reset, env);
+}
+
 static CPUState *pc_new_cpu(const char *cpu_model)
 {
 CPUState *env;
+QEMUIrqchip *ic;
 
 env = cpu_init(cpu_model);
 if (!env) {
 fprintf(stderr, "Unable to find x86 CPU definition\n");
 exit(1);
 }
-if ((env->cpuid_features & CPUID_APIC) || smp_cpus > 1) {
-env->cpuid_apic_id = env->cpu_index;
-/* APIC reset callback resets cpu */
-apic_init(env);
-} else {
-qemu_register_reset((QEMUResetHandler*)cpu_reset, env);
+
+for (ic = current_machine->irqchip; ic->name != NULL; ic++) {
+if (ic->used)
+ic->init(env);
 }
 return env;
 }
@@ -1074,6 +1096,17 @@ static QEMUMachine pc_machine = {
 .desc = "Standard PC",
 .init = pc_init_pci,
 .max_cpus = 255,
+.irqchip = (QEMUIrqchip[]){
+{
+.name = "apic",
+.init = qemu_apic_init,
+.is_default = 1,
+},{
+.name = "pic",
+.init = qemu_pic_init,
+},
+{ /* end of list */ },
+},
 .is_default = 1,
 };
 
diff --git a/qemu-config.c b/qemu-config.c
index 150157c..2b985a9 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -296,6 +296,21 @@ QemuOptsList qemu_cpudef_opts = {
 },
 };
 
+QemuOptsList qemu_machine_opts = {
+.name = "M",
+.head = QTAILQ_HEAD_INITIALIZER(qemu_machine_opts.head),
+.desc = {
+{
+.name = "mach",
+.type = QEMU_OPT_STRING,
+},{
+.name = "irqchip",
+.type = QEMU_OPT_STRING,
+},
+{ /* end of list */ }
+},
+};
+
 static QemuOptsList *lists[] = {
 &qemu_drive_opts,
 &qemu_chardev_opts,
@@ -306,6 +321,7 @@ static QemuOptsList *lists[] = {
 &qemu_global_opts,
 &qemu_mon_opts,
 &qemu_cpudef_opts,
+&qemu_machine_opts,
 NULL,
 };
 
diff --git a/qemu-config.h b/qemu-config.h
index f217c58..ea302f0 100644
--- a/qemu-config.h
+++ b/qemu-config.h
@@ -10,6 +10,7 @@ extern QemuOptsList qemu_rtc_opts;
 extern QemuOptsList qemu_global_opts;
 extern QemuOptsList qemu_mon_opts;
 extern QemuOptsList qemu_cpudef_opts;
+extern QemuOptsList qemu_machine_opts;
 
 QemuOptsList *qemu_find_opts(const char *group);
 int qemu_set_option(const char *str);
diff --git a/qemu-options.hx b/qemu-options.hx
index 8450b45..585ecd2 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -34,6 +34,15 @@ STEXI
 Select the emulated @var{machine} (@code{-M ?} for list)
 ETEXI
 
+DEF("machine", HAS_ARG, QEMU_OPTION_machine,
+"-machine mach=m[,irqchip=chip]\n"
+"select emulated machine (-machine ? for lis

[Qemu-devel] Re: [PATCH 0/4] monitor: Convert do_set_link() to QObject, QError

2010-03-24 Thread Luiz Capitulino

On Tue, 23 Mar 2010 11:27:54 +0100
Markus Armbruster  wrote:

> PATCH 3/4 changes syntax of set_link's second argument from up|down to
> on|off.  I feel that the argument needs to be boolean in QMP, and this
> is the simplest way to get it.
> 
> Alternatives I could try if the syntax change is unwanted:
> 
> * Use the old string argument in QMP.  Easy.
> 
> * Don't convert set_link, create a new command with a boolean
>   argument.
> 
> * Create a argument parser for up|down.

 I like your approach. Daniel do you use set_link in libvirt already?
I've grepped around I didn't found any reference for it.

> 
> Markus Armbruster (4):
>   monitor: Rename argument type 'b' to 'f'
>   monitor: New argument type 'b'
>   monitor: Use argument type 'b' for set_link
>   monitor: Convert do_set_link() to QObject, QError
> 
>  monitor.c   |   39 +++
>  net.c   |   17 ++---
>  net.h   |2 +-
>  qemu-monitor.hx |   13 +++--
>  4 files changed, 49 insertions(+), 22 deletions(-)
>

Re: [Qemu-devel] [PATCH 3/3] S390: Don't compile in virtio-pci

2010-03-24 Thread Blue Swirl

On 3/24/10, Alexander Graf  wrote:
> Blue Swirl wrote:
>  > On 3/24/10, Alexander Graf  wrote:
>  >
>  >> As soon as virtio-pci.c gets compiled and used on S390 the internal qdev 
> magic
>  >>  gets confused and tries to give us PCI devices instead of S390 virtio 
> devices.
>  >>
>  >>  Since we don't have PCI on S390, we can safely not compile virtio-pci at 
> all.
>  >>
>  >>  In order to do this I added a new config option "CONFIG_PCI" that I 
> enabled
>  >>  for every platform except S390. Thanks to this the change should be a 
> complete
>  >>  nop for every other platform.
>  >>
>  >
>  > The name should be CONFIG_VIRTIO_PCI, CONFIG_PCI would enable
>  > compilation of pci.c.
>  >
>
>
> My idea was to keep the config option generic and rip out all PCI stuff
>  from s390x-softmmu. We don't even do MMIO :-).

In that case you should also rip out all PCI cards.

[Qemu-devel] Re: [RFC] vhost-blk implementation

2010-03-24 Thread Michael S. Tsirkin

On Wed, Mar 24, 2010 at 10:58:50AM -0700, Badari Pulavarty wrote:
> Michael S. Tsirkin wrote:
>> On Tue, Mar 23, 2010 at 12:55:07PM -0700, Badari Pulavarty wrote:
>>   
>>> Michael S. Tsirkin wrote:
>>> 
 On Tue, Mar 23, 2010 at 10:57:33AM -0700, Badari Pulavarty wrote:
 
> Michael S. Tsirkin wrote:
> 
>> On Mon, Mar 22, 2010 at 05:34:04PM -0700, Badari Pulavarty wrote:
>>   
>>> Write Results:
>>> ==
>>>
>>> I see degraded IO performance when doing sequential IO write
>>> tests with vhost-blk compared to virtio-blk.
>>>
>>> # time dd of=/dev/vda if=/dev/zero bs=2M oflag=direct
>>>
>>> I get ~110MB/sec with virtio-blk, but I get only ~60MB/sec with
>>> vhost-blk. Wondering why ?
>>> 
>> Try to look and number of interrupts and/or number of exits.
>>   
> I checked interrupts and IO exits - there is no major noticeable  
>  difference between
> vhost-blk and virtio-blk scenerios.
> 
>> It could also be that you are overrunning some queue.
>>
>> I don't see any exit mitigation strategy in your patch:
>> when there are already lots of requests in a queue, it's usually
>> a good idea to disable notifications and poll the
>> queue as requests complete. That could help performance.
>>   
> Do you mean poll eventfd for new requests instead of waiting for 
> new  notifications ?
> Where do you do that in vhost-net code ?
> 
 vhost_disable_notify does this.

 
> Unlike network socket, since we are dealing with a file, there is 
> no  ->poll support for it.
> So I can't poll for the data. And also, Issue I am having is on 
> the   write() side.
> 
 Not sure I understand.

 
> I looked at it some more - I see 512K write requests on the
> virtio-queue  in both vhost-blk and virtio-blk cases. Both qemu or
> vhost is doing synchronous  writes to page cache (there is no write
> batching in qemu that is affecting this  case).  I still puzzled on
> why virtio-blk outperforms vhost-blk.
>
> Thanks,
> Badari
> 
 If you say the number of requests is the same, we are left with:
 - requests are smaller for some reason?
 - something is causing retries?
 
>>> No. IO requests sizes are exactly same (512K) in both cases. There 
>>> are  no retries or
>>> errors in both cases. One thing I am not clear is - for some reason   
>>> guest kernel
>>> could push more data into virtio-ring in case of virtio-blk vs   
>>> vhost-blk. Is this possible ?
>>> Does guest gets to run much sooner in virtio-blk case than vhost-blk 
>>> ?  Sorry, if its dumb question -
>>> I don't understand  all the vhost details :(
>>>
>>> Thanks,
>>> Badari
>>> 
>>
>> BTW, did you put the backend in non-blocking mode?
>> As I said, vhost net passes non-blocking flag to
>> socket backend, but vfs_write/read that you use does
>> not have an option to do this.
>>
>> So we'll need to extend the backend to fix that,
>> but just for demo purposes, you could set non-blocking
>> mode on the file from userspace.
>>
>>   
> Michael,
>
> Atleast I understand why the performance difference now.. My debug
> code is changed the behaviour of virtio-blk which confused me.
>
> 1) virtio-blk is able to batch up writes from various requests.
> 2) virtio-blk offloads the writes to different thread
>
> Where as vhost-blk, I do each request syncrhonously. Hence
> the difference. You are right - i have to make backend async.
> I will working on handing off work to in-kernel threads.
> I am not sure about IO completion handling and calling
> vhost_add_used_and_signal() out of context. But let
> me take a stab at it and ask your help if I run into
> issues.
>
> Thanks,
> Badari
>


The way I did it for vhost net, requests are synchronous
but non-blocking. So if it can't be done directly,
I delay it.

-- 
MST

Re: [Qemu-devel] [PATCH 3/3] S390: Don't compile in virtio-pci

2010-03-24 Thread Alexander Graf

Blue Swirl wrote:
> On 3/24/10, Alexander Graf  wrote:
>   
>> As soon as virtio-pci.c gets compiled and used on S390 the internal qdev 
>> magic
>>  gets confused and tries to give us PCI devices instead of S390 virtio 
>> devices.
>>
>>  Since we don't have PCI on S390, we can safely not compile virtio-pci at 
>> all.
>>
>>  In order to do this I added a new config option "CONFIG_PCI" that I enabled
>>  for every platform except S390. Thanks to this the change should be a 
>> complete
>>  nop for every other platform.
>> 
>
> The name should be CONFIG_VIRTIO_PCI, CONFIG_PCI would enable
> compilation of pci.c.
>   

My idea was to keep the config option generic and rip out all PCI stuff
from s390x-softmmu. We don't even do MMIO :-).

Alex

Re: [Qemu-devel] [PATCH 3/3] S390: Don't compile in virtio-pci

2010-03-24 Thread Blue Swirl

On 3/24/10, Alexander Graf  wrote:
> As soon as virtio-pci.c gets compiled and used on S390 the internal qdev magic
>  gets confused and tries to give us PCI devices instead of S390 virtio 
> devices.
>
>  Since we don't have PCI on S390, we can safely not compile virtio-pci at all.
>
>  In order to do this I added a new config option "CONFIG_PCI" that I enabled
>  for every platform except S390. Thanks to this the change should be a 
> complete
>  nop for every other platform.

The name should be CONFIG_VIRTIO_PCI, CONFIG_PCI would enable
compilation of pci.c.

>  If anyone feels like their platform shouldn't support PCI either, just remove
>  the config option. If you think we should build even less when we don't have
>  PCI, feel free to come up with a follow-up patch.

None of the currently supported Sparc32 boards have PCI. There are
some real devices with PCI (JavaStations) though.

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Blue Swirl

On 3/24/10, Richard Henderson  wrote:
> On 03/24/2010 02:47 AM, Paolo Bonzini wrote:
>
> > 1) make CPUState define only common fields. Include CPUState at the
> > beginning of each per-target CPUXYZState.
> >
>
>  Irritatingly, the common fields contain quite big TLBs.  And the
>  offsets from the start of env affect the compactness of the code
>  generated from TCG.  We really really want the general registers
>  to come first to make sure that those offsets fit the host's
>  reg+offset addressing mode.

One trick is to define a fixed offset (about half CPUState size) and
make env point to CPUState + offset. Then the negative part of the
offset space can be used efficiently. But this just doubles the space
that can be accessed fast, so it's not a big win.

[Qemu-devel] Re: [RFC] vhost-blk implementation

2010-03-24 Thread Badari Pulavarty


Michael S. Tsirkin wrote:

On Tue, Mar 23, 2010 at 12:55:07PM -0700, Badari Pulavarty wrote:
  

Michael S. Tsirkin wrote:


On Tue, Mar 23, 2010 at 10:57:33AM -0700, Badari Pulavarty wrote:
  
  

Michael S. Tsirkin wrote:



On Mon, Mar 22, 2010 at 05:34:04PM -0700, Badari Pulavarty wrote:

  

Write Results:
==

I see degraded IO performance when doing sequential IO write
tests with vhost-blk compared to virtio-blk.

# time dd of=/dev/vda if=/dev/zero bs=2M oflag=direct

I get ~110MB/sec with virtio-blk, but I get only ~60MB/sec with
vhost-blk. Wondering why ?



Try to look and number of interrupts and/or number of exits.

  
I checked interrupts and IO exits - there is no major noticeable   
difference between

vhost-blk and virtio-blk scenerios.



It could also be that you are overrunning some queue.

I don't see any exit mitigation strategy in your patch:
when there are already lots of requests in a queue, it's usually
a good idea to disable notifications and poll the
queue as requests complete. That could help performance.

  
Do you mean poll eventfd for new requests instead of waiting for new  
notifications ?

Where do you do that in vhost-net code ?



vhost_disable_notify does this.

  
  
Unlike network socket, since we are dealing with a file, there is no  
->poll support for it.
So I can't poll for the data. And also, Issue I am having is on the   
write() side.



Not sure I understand.

  
  

I looked at it some more - I see 512K write requests on the
virtio-queue  in both vhost-blk and virtio-blk cases. Both qemu or
vhost is doing synchronous  writes to page cache (there is no write
batching in qemu that is affecting this  case).  I still puzzled on
why virtio-blk outperforms vhost-blk.

Thanks,
Badari



If you say the number of requests is the same, we are left with:
- requests are smaller for some reason?
- something is causing retries?
  
  
No. IO requests sizes are exactly same (512K) in both cases. There are  
no retries or
errors in both cases. One thing I am not clear is - for some reason  
guest kernel
could push more data into virtio-ring in case of virtio-blk vs  
vhost-blk. Is this possible ?
Does guest gets to run much sooner in virtio-blk case than vhost-blk ?  
Sorry, if its dumb question -

I don't understand  all the vhost details :(

Thanks,
Badari



BTW, did you put the backend in non-blocking mode?
As I said, vhost net passes non-blocking flag to
socket backend, but vfs_write/read that you use does
not have an option to do this.

So we'll need to extend the backend to fix that,
but just for demo purposes, you could set non-blocking
mode on the file from userspace.

  

Michael,

Atleast I understand why the performance difference now.. My debug
code is changed the behaviour of virtio-blk which confused me.

1) virtio-blk is able to batch up writes from various requests.
2) virtio-blk offloads the writes to different thread

Where as vhost-blk, I do each request syncrhonously. Hence
the difference. You are right - i have to make backend async.
I will working on handing off work to in-kernel threads.
I am not sure about IO completion handling and calling
vhost_add_used_and_signal() out of context. But let
me take a stab at it and ask your help if I run into
issues.

Thanks,
Badari

[Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Blue Swirl

On 3/24/10, Juan Quintela  wrote:
> Blue Swirl  wrote:
>  > Hi,
>  >
>  > Here's some planning for getting most files compiled as few times as
>  > possible. Comments and suggestions are welcome.
>
>
> I took some thought about this at some point.  Problems here start from
>  "Recursive Makefile condered Harmful (tm)".
>
>  Look at how we jump through hops to be able to compile things in
>  one/other side.
>
>  We have:
>  Makefile
>  Makefile.target (really lots of them, one for target)
>  Makefile.hw
>  Makefile.user
>
>  If we had only a single Makefile, things in this department would be
>  much, much easier. And no, convert to a single Makefile is not trivial
>  either, but it would make things easier.
>
>  Why do we have several Makefiles?  Because we want to compile each file
>  with different options.
>
>  Why do we need to abuse so much VPATH?  Because we need to bring files
>  randomly from $(ROOT), $(ROOT)/hw  $(ROOT)/$(TARGET).

Would it help if the Makefiles were scattered to each directory, for
example instead of Makefile.hw we had hw/Makefile?

>  Problem here, there isn't a simple way to compile files for several
>  target just once (no way to put them).
>
>  Our main copmile rule is:
>
>  $(QEMU_PROG): $(obj-y) $(obj-$(TARGET_BASE_ARCH)-y)
> $(call LINK,$(obj-y) $(obj-$(TARGET_BASE_ARCH)-y))
>
>
>  (notice that things compiled in Makefile are trivial, they are already
>  compiled just once by definition, problems are for all the qemu's we
>  compile).
>
>  We could change: $(obj-$(TARGET_BASE_ARCH)-y) to something like:
>
>  OBJ-TARGET=s/.o/.$(TARGET_BASE_ARCH).o/
>
>  (I forgot the subst Makefile syntax), and have the:
>
>  %.$(TARGET_BASE_ARCH).o: %.c
>gcc $(TARGET_BASE_ARCH options)
>
>  From there, as you suggested, we need some files that are not compiled
>  by architecture, they need to be compiled by board, well, we need to add
>  yet another level obj-$(TARGET_BOARD) or whatever.
>
>  Notice that this is a lot of work, but you are needing the audit to be
>  able to compile only once.  Problem just now is that there is not a
>  simple way to describe that information,  with my proposal it gets
>  trivial to express:
>
>  obj-$(CONFIG_FOO) += foo.o  # You need this for everything
>  obj-mips-$(CONFIG_FOO) += foo.o  # You need this for all mips boards
>  obj-malta-$(CONFIG_FOO) += foo.o  # You need this for all mips malta board
>
>  You still need to do some different magic from hw-32/64 but it could be
>  done this way.  Once you did it this way, you now where the files are
>  (hw or target) and you can drop the VPATH tricks.
>
>  Problem with this proposal is that it is not trivial to do in little
>  steps, and the real big advantages appear when you switch to a single
>  Makefile at the end.

I may have missed something, but the compile process doesn't care
about boards, because all boards for some architecture (and therefore
all devices used by all boards) are linked to a single
per-architecture executable. So why introduce the boards concept to
Makefiles?

>  > vl.c: a lot of work. Maybe the CPUState stuff should be separated to a new 
> file.
>
>
> That should just be a rule in Documentation.  You can't but anything
>  else in vl.c.  If you move anything out of vl.c (see timers work from
>  bonzini for example), you get a wild card for free commit bypassing
>  maintainers or some similar price :)

Cleaning up vl.c would be great, but just for purpose of single
compilation, it's enough to put the CPUState pieces to a target
dependent file (cpu-common.c?) and compile the rest once by making
TARGET_xxx conditional code unconditional. This may still be doable.

>  
>
>  I haven't really looked at them at depth.
>
>  I looked when I cleaned up the build system, I thought how to do the
>  next step (outlined before), but got sidetracked by other more urgent
>  things.

Thanks for the comments.

[Qemu-devel] [PATCH 2/3] S390: Tell user why VM creation failed

2010-03-24 Thread Alexander Graf

The KVM kernel module on S390 refuses to create a VM when the switch_amode
kernel parameter is not used.

Since that is not exactly obvious, let's give the user a nice warning.

Signed-off-by: Alexander Graf 
---
 kvm-all.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index 534ead0..acf7e31 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -609,8 +609,13 @@ int kvm_init(int smp_cpus)
 }
 
 s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
-if (s->vmfd < 0)
+if (s->vmfd < 0) {
+#ifdef TARGET_S390X
+fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
+"your host kernel command line\n");
+#endif
 goto err;
+}
 
 /* initially, KVM allocated its own memory and we had to jump through
  * hooks to make phys_ram_base point to this.  Modern versions of KVM
-- 
1.6.0.2

[Qemu-devel] [PATCH 3/3] S390: Don't compile in virtio-pci

2010-03-24 Thread Alexander Graf

As soon as virtio-pci.c gets compiled and used on S390 the internal qdev magic
gets confused and tries to give us PCI devices instead of S390 virtio devices.

Since we don't have PCI on S390, we can safely not compile virtio-pci at all.

In order to do this I added a new config option "CONFIG_PCI" that I enabled
for every platform except S390. Thanks to this the change should be a complete
nop for every other platform.

If anyone feels like their platform shouldn't support PCI either, just remove
the config option. If you think we should build even less when we don't have
PCI, feel free to come up with a follow-up patch.

Signed-off-by: Alexander Graf 
---
 Makefile.objs  |3 ++-
 default-configs/arm-softmmu.mak|1 +
 default-configs/cris-softmmu.mak   |1 +
 default-configs/i386-softmmu.mak   |1 +
 default-configs/m68k-softmmu.mak   |1 +
 default-configs/microblaze-softmmu.mak |1 +
 default-configs/mips-softmmu.mak   |1 +
 default-configs/mips64-softmmu.mak |1 +
 default-configs/mips64el-softmmu.mak   |1 +
 default-configs/mipsel-softmmu.mak |1 +
 default-configs/ppc-softmmu.mak|1 +
 default-configs/ppc64-softmmu.mak  |1 +
 default-configs/ppcemb-softmmu.mak |1 +
 default-configs/sh4-softmmu.mak|1 +
 default-configs/sh4eb-softmmu.mak  |1 +
 default-configs/sparc-softmmu.mak  |1 +
 default-configs/sparc64-softmmu.mak|1 +
 default-configs/x86_64-softmmu.mak |1 +
 18 files changed, 19 insertions(+), 1 deletions(-)

diff --git a/Makefile.objs b/Makefile.objs
index 281f7a6..a6ce4f5 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -128,7 +128,8 @@ user-obj-y += cutils.o cache-utils.o
 
 hw-obj-y =
 hw-obj-y += loader.o
-hw-obj-y += virtio.o virtio-console.o virtio-pci.o
+hw-obj-y += virtio.o virtio-console.o
+hw-obj-$(CONFIG_PCI) += virtio-pci.o
 hw-obj-y += fw_cfg.o pci.o pci_host.o pcie_host.o
 hw-obj-y += watchdog.o
 hw-obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index 02ad192..a3819e1 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -24,3 +24,4 @@ CONFIG_SSI_SD=y
 CONFIG_LAN9118=y
 CONFIG_SMC91C111=y
 CONFIG_DS1338=y
+CONFIG_PCI=y
diff --git a/default-configs/cris-softmmu.mak b/default-configs/cris-softmmu.mak
index 8711402..9377235 100644
--- a/default-configs/cris-softmmu.mak
+++ b/default-configs/cris-softmmu.mak
@@ -2,3 +2,4 @@
 
 CONFIG_NAND=y
 CONFIG_PTIMER=y
+CONFIG_PCI=y
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 4dbf656..192dcb8 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -16,3 +16,4 @@ CONFIG_IDE_PIIX=y
 CONFIG_NE2000_ISA=y
 CONFIG_PIIX_PCI=y
 CONFIG_SOUND=y
+CONFIG_PCI=y
diff --git a/default-configs/m68k-softmmu.mak b/default-configs/m68k-softmmu.mak
index 0a78375..07e02d6 100644
--- a/default-configs/m68k-softmmu.mak
+++ b/default-configs/m68k-softmmu.mak
@@ -2,3 +2,4 @@
 
 CONFIG_GDBSTUB_XML=y
 CONFIG_PTIMER=y
+CONFIG_PCI=y
diff --git a/default-configs/microblaze-softmmu.mak 
b/default-configs/microblaze-softmmu.mak
index c800c16..ddc3c15 100644
--- a/default-configs/microblaze-softmmu.mak
+++ b/default-configs/microblaze-softmmu.mak
@@ -1,3 +1,4 @@
 # Default configuration for microblaze-softmmu
 
 CONFIG_PTIMER=y
+CONFIG_PCI=y
diff --git a/default-configs/mips-softmmu.mak b/default-configs/mips-softmmu.mak
index 345a093..4598a0d 100644
--- a/default-configs/mips-softmmu.mak
+++ b/default-configs/mips-softmmu.mak
@@ -16,3 +16,4 @@ CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
 CONFIG_NE2000_ISA=y
 CONFIG_SOUND=y
+CONFIG_PCI=y
diff --git a/default-configs/mips64-softmmu.mak 
b/default-configs/mips64-softmmu.mak
index 5900ee6..6c7f3c6 100644
--- a/default-configs/mips64-softmmu.mak
+++ b/default-configs/mips64-softmmu.mak
@@ -16,3 +16,4 @@ CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
 CONFIG_NE2000_ISA=y
 CONFIG_SOUND=y
+CONFIG_PCI=y
diff --git a/default-configs/mips64el-softmmu.mak 
b/default-configs/mips64el-softmmu.mak
index 3e1ba93..2dcac82 100644
--- a/default-configs/mips64el-softmmu.mak
+++ b/default-configs/mips64el-softmmu.mak
@@ -16,3 +16,4 @@ CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
 CONFIG_NE2000_ISA=y
 CONFIG_SOUND=y
+CONFIG_PCI=y
diff --git a/default-configs/mipsel-softmmu.mak 
b/default-configs/mipsel-softmmu.mak
index 17b83d0..e3e3878 100644
--- a/default-configs/mipsel-softmmu.mak
+++ b/default-configs/mipsel-softmmu.mak
@@ -16,3 +16,4 @@ CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
 CONFIG_NE2000_ISA=y
 CONFIG_SOUND=y
+CONFIG_PCI=y
diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index 5fe591c..50932fa 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -15,3 +15,4 @@ CONFIG_IDE_ISA=y
 CONFIG_IDE_CMD646=y
 CONFIG_NE2000_ISA=y
 CONFIG_SOUND=y
+CONFIG_PCI=y
diff --git a/default-configs/ppc64-s

[Qemu-devel] [PATCH 1/3] S390: Add stub for cpu_get_phys_page_debug

2010-03-24 Thread Alexander Graf

We don't implement any virtual memory in the S390 target so far, so let's
add a stub for this now mandatory function.

Fixes building of S390 target.

Signed-off-by: Alexander Graf 
---
 target-s390x/helper.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index ba0c052..4a5297b 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -58,6 +58,11 @@ void cpu_reset(CPUS390XState *env)
 tlb_flush(env, 1);
 }
 
+target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
+{
+return 0;
+}
+
 #ifndef CONFIG_USER_ONLY
 
 int cpu_s390x_handle_mmu_fault (CPUState *env, target_ulong address, int rw,
-- 
1.6.0.2

[Qemu-devel] [PATCH 0/3] Fix S390x target

2010-03-24 Thread Alexander Graf

We're in a bad situation with the S390 qemu target. It only works with KVM,
so people can't test it when they don't have access to a real S390 machine.

While trying to build and use s390x-softmmu again I stumbled across a couple
of issues, all addressed in this patch set. The patches should all not affect
non-s390 targets at all.

Alexander Graf (3):
  S390: Add stub for cpu_get_phys_page_debug
  S390: Tell user why VM creation failed
  S390: Don't compile in virtio-pci

 Makefile.objs  |3 ++-
 default-configs/arm-softmmu.mak|1 +
 default-configs/cris-softmmu.mak   |1 +
 default-configs/i386-softmmu.mak   |1 +
 default-configs/m68k-softmmu.mak   |1 +
 default-configs/microblaze-softmmu.mak |1 +
 default-configs/mips-softmmu.mak   |1 +
 default-configs/mips64-softmmu.mak |1 +
 default-configs/mips64el-softmmu.mak   |1 +
 default-configs/mipsel-softmmu.mak |1 +
 default-configs/ppc-softmmu.mak|1 +
 default-configs/ppc64-softmmu.mak  |1 +
 default-configs/ppcemb-softmmu.mak |1 +
 default-configs/sh4-softmmu.mak|1 +
 default-configs/sh4eb-softmmu.mak  |1 +
 default-configs/sparc-softmmu.mak  |1 +
 default-configs/sparc64-softmmu.mak|1 +
 default-configs/x86_64-softmmu.mak |1 +
 kvm-all.c  |7 ++-
 target-s390x/helper.c  |5 +
 20 files changed, 30 insertions(+), 2 deletions(-)

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Paul Brook

>  1) make CPUState define only common fields. Include CPUState at the
>  beginning of each per-target CPUXYZState.
> >>>
> >>> Irritatingly, the common fields contain quite big TLBs. And the
> >>> offsets from the start of env affect the compactness of the code
> >>> generated from TCG. We really really want the general registers
> >>> to come first to make sure that those offsets fit the host's
> >>> reg+offset addressing mode.
> >>
> >> What about adding a 512-bytes (or more) block or something like that at
> >> the beginning of CPUState with a union, so you can put the per-target
> >> stuff there?
> >
> > Is it really worth the hassle? Anything touching CPUState is probably
> > going to be CPU specific anyway.
> 
> qemu-timer.c, hw/dma.c is not and these are the first two files I looked
> at.  translate-all.c is the third, and it is except for a trivial cleanup.

The use in hw/dma.c is incorrect.  See previous discussion about how 
qemu_bh_schedule_idle needs to go away.

I'm also unconvinced by your numbers. My i386-softmmu/ directory contains only 
43 object files, most of are device emulation and don't touch CPU state at 
all. arm-softmmu/ contains a good number more, but that's mostly board init 
(which needs to know which CPU it's creating), and devices that are only used 
by one board so noone's bothered to move them into libhw.

Paul

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Richard Henderson

On 03/24/2010 07:45 AM, Paolo Bonzini wrote:
> On 03/24/2010 12:19 PM, Richard Henderson wrote:
>> On 03/24/2010 02:47 AM, Paolo Bonzini wrote:
>>> 1) make CPUState define only common fields. Include CPUState at the
>>> beginning of each per-target CPUXYZState.
>>
>> Irritatingly, the common fields contain quite big TLBs. And the
>> offsets from the start of env affect the compactness of the code
>> generated from TCG. We really really want the general registers
>> to come first to make sure that those offsets fit the host's
>> reg+offset addressing mode.
> 
> What about adding a 512-bytes (or more) block or something like that at
> the beginning of CPUState with a union, so you can put the per-target
> stuff there?

I think that would be confusing.

What might be just as good (although possibly just as confusing)
is to move the big members into a different structure.  E.g.

struct CPUSmallCommonState
{
// most of the stuff from CPU_COMMON.
// sorted for some thought of padding elimination.  ;-)
};

struct CPULargeCommonState
{
CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];
target_phys_addr_t iotlb[NB_MMU_MODES][CPU_TLB_SIZE];
struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
jmp_buf jmp_env;
};

struct CPUXYZSmallState
{
CPUSmallCommonState common_s;
// the rest of the cpu-specific stuff.
};

struct CPUXYZLargeState
{
CPUXYZSmallState s;
CPUBigCommonState common_l;
};

extern int cpu_large_state_offset = offsetof(CPUXYZLargeState, common_l);

Now.  If you're compiling a file for which cpu-specific code is ok:

register CPUXYZLargeState *env __asm__(AREG0);
#define ENV_SMALL_COMMON_STATE(&env->s.common_s)
#define ENV_LARGE_COMMON_STATE(&env->common_l)

If you're compiling a file which is supposed to be independant of cpu:

register CPUSmallCommonState *env __asm__(AREG0);
#define ENV_SMALL_COMMON_STATE(env)
#define ENV_LARGE_COMMON_STATE((CPULargeCommonState *)((char *)env + 
cpu_large_state_offset))

For the gcc-compiled code, the addition of the cpu_large_state_offset
is probably more or less on par in efficiency with indirection.  But
for TCG generated code, the variable read happens at code generation
time, which means we *still* have a constant in the generated code.

r~

[Qemu-devel] Re: [PATCH] update bochs vbe interface

2010-03-24 Thread Juan Quintela

Gerd Hoffmann  wrote:
> The bochs vbe interface got a new register a while back, which specifies
> the linear framebuffer size in 64k units.  This patch adds support for
> the new register to qemu.  With this patch applied vgabios 0.6c works
> with qemu.
>
> Signed-off-by: Gerd Hoffmann 

It breaks migration.

vga.c:525:if (s->vbe_index <= VBE_DISPI_INDEX_NB) {
vga.c:564:if (s->vbe_index <= VBE_DISPI_INDEX_NB) {
vga.c:2218:VMSTATE_UINT16_ARRAY(vbe_regs, VGACommonState, 
VBE_DISPI_INDEX_NB),
vga_int.h:50:#define VBE_DISPI_INDEX_NB  0xa
vga_int.h:71:uint16_t vbe_regs[VBE_DISPI_INDEX_NB];  \


2218 is the interesting line.  Can we freeze this patch until the
subsections stuff is done?

I hope to sent a proposal Friday?  And can use this as guinea pig?

Later, Juan.

> ---
>  hw/vga.c |3 ++-
>  hw/vga_int.h |4 +++-
>  2 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/hw/vga.c b/hw/vga.c
> index 6a1a059..a5c6997 100644
> --- a/hw/vga.c
> +++ b/hw/vga.c
> @@ -1955,7 +1955,8 @@ void vga_common_reset(VGACommonState *s)
>  #ifdef CONFIG_BOCHS_VBE
>  s->vbe_index = 0;
>  memset(s->vbe_regs, '\0', sizeof(s->vbe_regs));
> -s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID0;
> +s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID5;
> +s->vbe_regs[VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = s->vram_size / (64 * 
> 1024);
>  s->vbe_start_addr = 0;
>  s->vbe_line_offset = 0;
>  s->vbe_bank_mask = (s->vram_size >> 16) - 1;
> diff --git a/hw/vga_int.h b/hw/vga_int.h
> index 23a42ef..4b8fc74 100644
> --- a/hw/vga_int.h
> +++ b/hw/vga_int.h
> @@ -47,13 +47,15 @@
>  #define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7
>  #define VBE_DISPI_INDEX_X_OFFSET0x8
>  #define VBE_DISPI_INDEX_Y_OFFSET0x9
> -#define VBE_DISPI_INDEX_NB  0xa
> +#define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa
> +#define VBE_DISPI_INDEX_NB  0xb
>  
>  #define VBE_DISPI_ID0   0xB0C0
>  #define VBE_DISPI_ID1   0xB0C1
>  #define VBE_DISPI_ID2   0xB0C2
>  #define VBE_DISPI_ID3   0xB0C3
>  #define VBE_DISPI_ID4   0xB0C4
> +#define VBE_DISPI_ID5   0xB0C5
>  
>  #define VBE_DISPI_DISABLED  0x00
>  #define VBE_DISPI_ENABLED   0x01

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Luiz Capitulino

On Wed, 24 Mar 2010 12:42:16 +0200
Avi Kivity  wrote:

> So, at best qemud is a toy for people who are annoyed by libvirt.

 Is the reason for doing this in qemu because libvirt is annoying? I don't see
how adding yet another layer/daemon is going to improve ours and user's life
(the same applies for libqemu).

 If I got it right, there were two complaints from the kvm-devel flamewar:

1. Qemu has usability problems
2. There's no way an external tool can get /proc/kallsyms info from Qemu

 I don't see how libqemu can help with 1) and having qemud doesn't seem
the best solution for 2) either.

 Still talking about 2), what's wrong in getting the PID or having a QMP
connection in a well known location as suggested by Anthony?

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Paolo Bonzini


On 03/24/2010 03:56 PM, Paul Brook wrote:

On 03/24/2010 12:19 PM, Richard Henderson wrote:

On 03/24/2010 02:47 AM, Paolo Bonzini wrote:

1) make CPUState define only common fields. Include CPUState at the
beginning of each per-target CPUXYZState.


Irritatingly, the common fields contain quite big TLBs. And the
offsets from the start of env affect the compactness of the code
generated from TCG. We really really want the general registers
to come first to make sure that those offsets fit the host's
reg+offset addressing mode.


What about adding a 512-bytes (or more) block or something like that at
the beginning of CPUState with a union, so you can put the per-target
stuff there?


Is it really worth the hassle? Anything touching CPUState is probably going to
be CPU specific anyway.


qemu-timer.c, hw/dma.c is not and these are the first two files I looked 
at.  translate-all.c is the third, and it is except for a trivial cleanup.


Big parts of vl.c are independent too.

As a quick check:

$ git grep -l 'CPUState' | grep -ve ^tcg -e ^target- | wc -l
96
$ git grep -l 'CPUState' | grep -ve ^tcg -e ^target- | \
 xargs grep -l '#if.*TARGET_' | wc -l
36

The ones that remain are pretty much what would you expect, besides 
translate-all.c and some in hw/ which I snipped:


bsd-user/main.c
darwin-user/main.c darwin-user/qemu.h darwin-user/signal.c
linux-user/elfload.c linux-user/main.c linux-user/qemu.h
linux-user/signal.c linux-user/syscall.c

cpu-all.h cpu-defs.h cpu-exec.c
def-helper.h
disas.c
exec-all.h exec.c
gdbstub.c
monitor.c
translate-all.c
vl.c

Of course this doesn't mean that 60 files are target-independent, but 
30-ish probably are or can be made so.


It would also help code clarity to use CPUXYZState more, to understand 
which hw/ files are specific to one model.  For hw/s390-virtio.c that's 
obvious, but slightly less so for hw/sun4m.c and even less so for 
hw/syborg.c.  This is an independent cleanup.


Paolo

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Paul Brook

> Paul Brook  writes:
> >> > IMO the no_user flag is a bug, and should not exist.
> >>
> >> Sorry, what's that?
> >
> > Usually an indication that a device has been incorrectly or inproperly
> > converted to the qdev interface.
> 
> Can also be an indication that the device can't support multiple
> instances.  For instance:
>qdev: Tag isa-fdc, PIIX3 IDE and PIIX4 IDE as no-user

I still claim this is a bug, and see no good reason why we shouldn't support 
multiple floppy controllers, ISA busses, etc.

Paul

Re: [Qemu-devel] [PATCH] qemu: jaso-parser: Output the content of invalid keyword

2010-03-24 Thread Markus Armbruster

Amos Kong  writes:

> When input some invialid word 'unknowcmd' through QMP port, qemu outputs this
> error message:
> "parse error: invalid keyword `%s'"
> This patch makes qemu output the content of invalid keyword, like:
> "parse error: invalid keyword `unknowcmd'"
>
> Signed-off-by: Amos Kong 

Looks good to me.

Hint: it's best to put a version in the subject when you respin, like
[PATCH v2] ...

Re: [Qemu-devel] Re: [libvirt] Supporting hypervisor specific APIs in libvirt

2010-03-24 Thread Markus Armbruster

Paul Brook  writes:

>> > IMO the no_user flag is a bug, and should not exist.
>> 
>> Sorry, what's that?
>
> Usually an indication that a device has been incorrectly or inproperly 
> converted to the qdev interface.

Can also be an indication that the device can't support multiple
instances.  For instance:

commit 39a51dfda835a75c0ebbfd92705b96e4de77f795
Author: Markus Armbruster 
Date:   Tue Oct 27 13:52:13 2009 +0100

qdev: Tag isa-fdc, PIIX3 IDE and PIIX4 IDE as no-user

These devices are created automatically, and attempting to create
another one with -device fails with "qemu: hardware error:
register_ioport_write: invalid opaque".

Signed-off-by: Markus Armbruster 
Signed-off-by: Anthony Liguori 

With no-user, we at least fail with a decent error message.

I don't think it's fair to demand that a qdev conversion must relax
restrictions that haven't otherwise bothered us to be correct and
proper.  We'll relax them if and when they bother us enough to make
somebody send a decent patch.

And yes, there are better ways to disallow multiple instances of a
device than declaring it no-user.  Patches welcome.

[Qemu-devel] [PATCH] qemu: jaso-parser: Output the content of invalid keyword

2010-03-24 Thread Amos Kong


When input some invialid word 'unknowcmd' through QMP port, qemu outputs this
error message:
"parse error: invalid keyword `%s'"
This patch makes qemu output the content of invalid keyword, like:
"parse error: invalid keyword `unknowcmd'"

Signed-off-by: Amos Kong 
---
 json-parser.c |8 +++-
 1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/json-parser.c b/json-parser.c
index 579928f..b55d763 100644
--- a/json-parser.c
+++ b/json-parser.c
@@ -12,6 +12,7 @@
  */
 
 #include 
+#include 
 
 #include "qemu-common.h"
 #include "qstring.h"
@@ -93,7 +94,12 @@ static int token_is_escape(QObject *obj, const char *value)
  */
 static void parse_error(JSONParserContext *ctxt, QObject *token, const char 
*msg, ...)
 {
-fprintf(stderr, "parse error: %s\n", msg);
+va_list ap;
+va_start(ap, msg);
+fprintf(stderr, "parse error: ");
+vfprintf(stderr, msg, ap);
+fprintf(stderr, "\n");
+va_end(ap);
 }
 
 /**
-- 
1.6.3.3

Re: [Qemu-devel] [PATCH -V2 00/22] virtio-9p: paravirtual file system passthrough

2010-03-24 Thread Luiz Capitulino

On Wed, 24 Mar 2010 09:28:36 +0530
"Aneesh Kumar K. V"  wrote:

> On Tue, 23 Mar 2010 20:17:33 -0300, Luiz Capitulino  
> wrote:
> > On Tue, 16 Mar 2010 14:44:58 +0530
> > "Aneesh Kumar K.V"  wrote:
> > 
> > > Hi,
> > > 
> > > 
> > > This patch series adds a paravirtual file system passthrough mechanism to 
> > > QEMU
> > > based on the 9P protocol. With the current implementation, all I/O is 
> > > implemented
> > > in the VCPU thread.  We've modified the protocol handlers so that we can 
> > > support
> > > dispatch I/O in a thread pool. The actual thread pool implementation will 
> > > be posted later
> > > 
> > > This patch set should work with any recent Linux kernel as virtio-9p has 
> > > been
> > > supported for a few kernel releases now. Export dir is specified using 
> > > the below
> > > Qemu option.
> > > 
> > > -device virtio-9p-pci,share_path=/mnt/,mount_tag=v_mnt
> > > 
> > > mount_tag is used to identify the mount point in the kernel. This will be 
> > > available in Linux
> > > kernel via /sys/devices/virtio-pci/virtio1/mount_tag file.
> > 
> >  I tried this very '-device' line and I can see that the guest has loaded
> > the virtio modules, but there isn't anything in the virtio0 directory other
> > than standard sysfs files.
> > 
> >  Is there a way to debug this?
> > 
> 
> which version of the kernel ? 

2.6.31.5

> The latest linus tree have all the needed
> changes. You should have  /sys/bus/virtio/drivers/9pnet_virtio/ if you
> have CONFIG_NET_9P_VIRTIO enabled.

 I have it, but still no mount_tag. I'll try with a more recent
kernel but this will take more time.

> You can then find mount tag at virtio/mount_tag 
> 
> 
> >  Something possibly related is that, I had to rewind the tree by some 
> > commits
> > because this series doesn't apply against current HEAD.
> 
> 
> 
> The patches are against 0aef4261ac0ec9089ade0e3a92f986cb4ba7317e of the
> master branch 
> 
> 
> -aneesh

Re: [Qemu-devel] Re: Compile files only once: some planning

2010-03-24 Thread Paul Brook

> On 03/24/2010 12:19 PM, Richard Henderson wrote:
> > On 03/24/2010 02:47 AM, Paolo Bonzini wrote:
> >> 1) make CPUState define only common fields. Include CPUState at the
> >> beginning of each per-target CPUXYZState.
> >
> > Irritatingly, the common fields contain quite big TLBs. And the
> > offsets from the start of env affect the compactness of the code
> > generated from TCG. We really really want the general registers
> > to come first to make sure that those offsets fit the host's
> > reg+offset addressing mode.
> 
> What about adding a 512-bytes (or more) block or something like that at
> the beginning of CPUState with a union, so you can put the per-target
> stuff there?

Is it really worth the hassle? Anything touching CPUState is probably going to 
be CPU specific anyway.

Paul

[Qemu-devel] [PATCH 15/15] virtio-serial: Handle scatter/gather input from the guest

2010-03-24 Thread Amit Shah

Current guests don't send more than one iov but it can change later.
Ensure we handle that case.

Signed-off-by: Amit Shah 
CC: Avi Kivity 
---
 hw/virtio-serial-bus.c |   12 
 1 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c
index 3edfeca..42e4ed0 100644
--- a/hw/virtio-serial-bus.c
+++ b/hw/virtio-serial-bus.c
@@ -369,7 +369,8 @@ static void handle_output(VirtIODevice *vdev, VirtQueue *vq)
 
 while (virtqueue_pop(vq, &elem)) {
 VirtIOSerialPort *port;
-size_t ret;
+uint8_t *buf;
+size_t ret, buf_size;
 
 port = find_port_by_vq(vser, vq);
 if (!port) {
@@ -392,9 +393,12 @@ static void handle_output(VirtIODevice *vdev, VirtQueue 
*vq)
 goto next_buf;
 }
 
-/* The guest always sends only one sg */
-ret = port->info->have_data(port, elem.out_sg[0].iov_base,
-elem.out_sg[0].iov_len);
+buf_size = iov_size(elem.out_sg, elem.out_num);
+buf = qemu_malloc(buf_size);
+ret = iov_to_buf(elem.out_sg, elem.out_num, buf, 0, buf_size);
+
+ret = port->info->have_data(port, buf, ret);
+qemu_free(buf);
 
 next_buf:
 virtqueue_push(vq, &elem, ret);
-- 
1.6.2.5

[Qemu-devel] [PATCH 14/15] virtio-serial: Handle scatter-gather buffers for control messages

2010-03-24 Thread Amit Shah

Current control messages are small enough to not be split into multiple
buffers but we could run into such a situation in the future or a
malicious guest could cause such a situation.

So handle the entire iov request for control messages.

Also ensure the size of the control request is >= what we expect
otherwise we risk accessing memory that we don't own.

Signed-off-by: Amit Shah 
CC: Avi Kivity 
Reported-by: Avi Kivity 
---
 hw/virtio-serial-bus.c |   34 +++---
 1 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c
index bd1223e..3edfeca 100644
--- a/hw/virtio-serial-bus.c
+++ b/hw/virtio-serial-bus.c
@@ -216,7 +216,7 @@ static void mon_event(const char *device, const uint32_t 
port_id,
 }
 
 /* Guest wants to notify us of some event */
-static void handle_control_message(VirtIOSerial *vser, void *buf)
+static void handle_control_message(VirtIOSerial *vser, void *buf, size_t len)
 {
 struct VirtIOSerialPort *port;
 struct virtio_console_control cpkt, *gcpkt;
@@ -225,6 +225,11 @@ static void handle_control_message(VirtIOSerial *vser, 
void *buf)
 
 gcpkt = buf;
 
+if (len < sizeof(cpkt)) {
+/* The guest sent an invalid control packet */
+return;
+}
+
 cpkt.event = lduw_p(&gcpkt->event);
 cpkt.value = lduw_p(&gcpkt->value);
 
@@ -321,12 +326,35 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq)
 {
 VirtQueueElement elem;
 VirtIOSerial *vser;
+uint8_t *buf;
+size_t len;
 
 vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
 
+len = 0;
+buf = NULL;
 while (virtqueue_pop(vq, &elem)) {
-handle_control_message(vser, elem.out_sg[0].iov_base);
-virtqueue_push(vq, &elem, elem.out_sg[0].iov_len);
+size_t cur_len, copied;
+
+cur_len = iov_size(elem.out_sg, elem.out_num);
+/*
+ * Allocate a new buf only if we didn't have one previously or
+ * if the size of the buf differs
+ */
+if (cur_len != len) {
+if (len) {
+qemu_free(buf);
+}
+buf = qemu_malloc(cur_len);
+len = cur_len;
+}
+copied = iov_to_buf(elem.out_sg, elem.out_num, buf, 0, len);
+
+handle_control_message(vser, buf, copied);
+virtqueue_push(vq, &elem, copied);
+}
+if (len) {
+qemu_free(buf);
 }
 virtio_notify(vdev, vq);
 }
-- 
1.6.2.5

[Qemu-devel] [PATCH 13/15] iov: Add iov_to_buf and iov_size helpers

2010-03-24 Thread Amit Shah

iov_to_buf() puts the buffer contents in the iov in a linearized buffer.

iov_size() gets the length of the contents in the iov.

The iov_to_buf() function is the memcpy_to_iovec() function that was
used in virtio-ballon.c.

Signed-off-by: Amit Shah 
---
 hw/iov.c|   37 +
 hw/iov.h|3 +++
 hw/virtio-balloon.c |   35 ---
 3 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/hw/iov.c b/hw/iov.c
index 07bd499..d4013cd 100644
--- a/hw/iov.c
+++ b/hw/iov.c
@@ -31,3 +31,40 @@ size_t iov_from_buf(struct iovec *iov, unsigned int iovcnt,
 }
 return offset;
 }
+
+size_t iov_to_buf(struct iovec *iov, unsigned int iovcnt,
+  void *buf, size_t offset, size_t size)
+{
+uint8_t *ptr;
+size_t iov_off, buf_off;
+unsigned int i;
+
+ptr = buf;
+iov_off = 0;
+buf_off = 0;
+for (i = 0; i < iovcnt && size; i++) {
+if (offset < (iov_off + iov[i].iov_len)) {
+size_t len = MIN((iov_off + iov[i].iov_len) - offset , size);
+
+memcpy(ptr + buf_off, iov[i].iov_base + (offset - iov_off), len);
+
+buf_off += len;
+offset += len;
+size -= len;
+}
+iov_off += iov[i].iov_len;
+}
+return buf_off;
+}
+
+size_t iov_size(struct iovec *iov, unsigned int iovcnt)
+{
+size_t len;
+unsigned int i;
+
+len = 0;
+for (i = 0; i < iovcnt; i++) {
+len += iov[i].iov_len;
+}
+return len;
+}
diff --git a/hw/iov.h b/hw/iov.h
index 5e3e541..c977ff1 100644
--- a/hw/iov.h
+++ b/hw/iov.h
@@ -14,3 +14,6 @@
 
 size_t iov_from_buf(struct iovec *iov, unsigned int iovcnt,
 const void *buf, size_t size);
+size_t iov_to_buf(struct iovec *iov, unsigned int iovcnt,
+  void *buf, size_t offset, size_t size);
+size_t iov_size(struct iovec *iov, unsigned int iovcnt);
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 6d12024..4414eae 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -11,6 +11,7 @@
  *
  */
 
+#include "iov.h"
 #include "qemu-common.h"
 #include "virtio.h"
 #include "pc.h"
@@ -91,33 +92,6 @@ static QObject *get_stats_qobject(VirtIOBalloon *dev)
 return QOBJECT(dict);
 }
 
-/* FIXME: once we do a virtio refactoring, this will get subsumed into common
- * code */
-static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
-   struct iovec *iov, int iovlen)
-{
-int i;
-uint8_t *ptr = data;
-size_t iov_off = 0;
-size_t data_off = 0;
-
-for (i = 0; i < iovlen && size; i++) {
-if (offset < (iov_off + iov[i].iov_len)) {
-size_t len = MIN((iov_off + iov[i].iov_len) - offset , size);
-
-memcpy(ptr + data_off, iov[i].iov_base + (offset - iov_off), len);
-
-data_off += len;
-offset += len;
-size -= len;
-}
-
-iov_off += iov[i].iov_len;
-}
-
-return data_off;
-}
-
 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
 VirtIOBalloon *s = to_virtio_balloon(vdev);
@@ -127,8 +101,7 @@ static void virtio_balloon_handle_output(VirtIODevice 
*vdev, VirtQueue *vq)
 size_t offset = 0;
 uint32_t pfn;
 
-while (memcpy_from_iovector(&pfn, offset, 4,
-elem.out_sg, elem.out_num) == 4) {
+while (iov_to_buf(elem.out_sg, elem.out_num, &pfn, offset, 4) == 4) {
 ram_addr_t pa;
 ram_addr_t addr;
 
@@ -180,8 +153,8 @@ static void virtio_balloon_receive_stats(VirtIODevice 
*vdev, VirtQueue *vq)
  */
 reset_stats(s);
 
-while (memcpy_from_iovector(&stat, offset, sizeof(stat), elem->out_sg,
-elem->out_num) == sizeof(stat)) {
+while (iov_to_buf(elem->out_sg, elem->out_num, &stat, offset, sizeof(stat))
+   == sizeof(stat)) {
 uint16_t tag = tswap16(stat.tag);
 uint64_t val = tswap64(stat.val);
 
-- 
1.6.2.5

[Qemu-devel] [PATCH 12/15] iov: Introduce a new file for helpers around iovs, add iov_from_buf()

2010-03-24 Thread Amit Shah

The virtio-net code uses iov_fill() which fills an iov from a linear
buffer. The virtio-serial-bus code does something similar in an
open-coded function.

Create a new iov.c file that has iov_from_buf().

Convert virtio-net and virtio-serial-bus over to use this functionality.
virtio-net used ints to hold sizes, the new function is going to use
size_t types.

Later commits will add the opposite functionality -- going from an iov
to a linear buffer.

Signed-off-by: Amit Shah 
---
 Makefile.objs  |1 +
 hw/iov.c   |   33 +
 hw/iov.h   |   16 
 hw/virtio-net.c|   20 +++-
 hw/virtio-serial-bus.c |   15 +++
 5 files changed, 60 insertions(+), 25 deletions(-)
 create mode 100644 hw/iov.c
 create mode 100644 hw/iov.h

diff --git a/Makefile.objs b/Makefile.objs
index 281f7a6..212ae1d 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -127,6 +127,7 @@ user-obj-y += cutils.o cache-utils.o
 # libhw
 
 hw-obj-y =
+hw-obj-y += iov.o
 hw-obj-y += loader.o
 hw-obj-y += virtio.o virtio-console.o virtio-pci.o
 hw-obj-y += fw_cfg.o pci.o pci_host.o pcie_host.o
diff --git a/hw/iov.c b/hw/iov.c
new file mode 100644
index 000..07bd499
--- /dev/null
+++ b/hw/iov.c
@@ -0,0 +1,33 @@
+/*
+ * Helpers for getting linearized buffers from iov / filling buffers into iovs
+ *
+ * Copyright IBM, Corp. 2007, 2008
+ * Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Author(s):
+ *  Anthony Liguori 
+ *  Amit Shah 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "iov.h"
+
+size_t iov_from_buf(struct iovec *iov, unsigned int iovcnt,
+const void *buf, size_t size)
+{
+size_t offset;
+unsigned int i;
+
+offset = 0;
+for (i = 0; offset < size && i < iovcnt; i++) {
+size_t len;
+
+len = MIN(iov[i].iov_len, size - offset);
+
+memcpy(iov[i].iov_base, buf + offset, len);
+offset += len;
+}
+return offset;
+}
diff --git a/hw/iov.h b/hw/iov.h
new file mode 100644
index 000..5e3e541
--- /dev/null
+++ b/hw/iov.h
@@ -0,0 +1,16 @@
+/*
+ * Helpers for getting linearized buffers from iov / filling buffers into iovs
+ *
+ * Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Author(s):
+ *  Amit Shah 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+
+size_t iov_from_buf(struct iovec *iov, unsigned int iovcnt,
+const void *buf, size_t size);
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index be33c68..273e7f9 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -11,6 +11,7 @@
  *
  */
 
+#include "iov.h"
 #include "virtio.h"
 #include "net.h"
 #include "net/checksum.h"
@@ -423,21 +424,6 @@ static void work_around_broken_dhclient(struct 
virtio_net_hdr *hdr,
 }
 }
 
-static int iov_fill(struct iovec *iov, int iovcnt, const void *buf, int count)
-{
-int offset, i;
-
-offset = i = 0;
-while (offset < count && i < iovcnt) {
-int len = MIN(iov[i].iov_len, count - offset);
-memcpy(iov[i].iov_base, buf + offset, len);
-offset += len;
-i++;
-}
-
-return offset;
-}
-
 static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
   const void *buf, size_t size, size_t hdr_len)
 {
@@ -573,8 +559,8 @@ static ssize_t virtio_net_receive(VLANClientState *nc, 
const uint8_t *buf, size_
 }
 
 /* copy in packet.  ugh */
-len = iov_fill(sg, elem.in_num,
-   buf + offset, size - offset);
+len = iov_from_buf(sg, elem.in_num,
+   buf + offset, size - offset);
 total += len;
 
 /* signal other side */
diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c
index 80fbff4..bd1223e 100644
--- a/hw/virtio-serial-bus.c
+++ b/hw/virtio-serial-bus.c
@@ -15,6 +15,7 @@
  * the COPYING file in the top-level directory.
  */
 
+#include "iov.h"
 #include "monitor.h"
 #include "qemu-objects.h"
 #include "qemu-queue.h"
@@ -85,27 +86,25 @@ static size_t write_to_port(VirtIOSerialPort *port,
 {
 VirtQueueElement elem;
 VirtQueue *vq;
-size_t offset = 0;
-size_t len = 0;
+size_t offset;
 
 vq = port->ivq;
 if (!virtio_queue_ready(vq)) {
 return 0;
 }
 
+offset = 0;
 while (offset < size) {
-int i;
+size_t len;
 
 if (!virtqueue_pop(vq, &elem)) {
 break;
 }
 
-for (i = 0; offset < size && i < elem.in_num; i++) {
-len = MIN(elem.in_sg[i].iov_len, size - offset);
+len = iov_from_buf(elem.in_sg, elem.in_num,
+   buf + offset, size - offset);
+offset += len;
 
-memcpy(elem.in_sg[i].iov_base, buf + offset, len);
-offset += len;
-}

[Qemu-devel] [PATCH 11/15] virtio-serial: Send out guest data to ports only if port is opened

2010-03-24 Thread Amit Shah

Data should be written only when ports are open.

Signed-off-by: Amit Shah 
---
 hw/virtio-serial-bus.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c
index efcc66c..80fbff4 100644
--- a/hw/virtio-serial-bus.c
+++ b/hw/virtio-serial-bus.c
@@ -350,6 +350,11 @@ static void handle_output(VirtIODevice *vdev, VirtQueue 
*vq)
 goto next_buf;
 }
 
+   if (!port->host_connected) {
+ret = 0;
+goto next_buf;
+}
+
 /*
  * A port may not have any handler registered for consuming the
  * data that the guest sends or it may not have a chardev associated
-- 
1.6.2.5

[Qemu-devel] [PATCH 10/15] virtio-serial: Add QMP events for failed port/device add

2010-03-24 Thread Amit Shah

When adding a port or a device to the guest fails, management software
might be interested in knowing and then cleaning up the host-side of the
port. Introduce QMP events to signal such errors.

Signed-off-by: Amit Shah 
CC: Luiz Capitulino 
---
 QMP/qmp-events.txt |   48 
 hw/virtio-serial-bus.c |   15 +++
 monitor.c  |3 +++
 monitor.h  |1 +
 4 files changed, 67 insertions(+), 0 deletions(-)

diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt
index a94e9b4..f13cf45 100644
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -188,3 +188,51 @@ Example:
 
 Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
 followed respectively by the RESET, SHUTDOWN, or STOP events.
+
+VIRTIO_SERIAL
+-
+
+Emitted when errors occur in guest port add or guest device add.
+
+Data:
+
+- "device": The virtio-serial device that triggered the event {json-string}
+  This is the name given to the bus on the command line:
+-device virtio-serial,id="foo"
+  here, the device name is "foo"
+
+- "port": The port number that triggered the event {json-number}
+  This is the number given to the port on the command line:
+-device virtserialport,nr=2
+  here, the port number is 2. If not mentioned on the command
+  line, the number is auto-assigned.
+
+- "result": The result of the operation {json-string}
+  This is one of the following:
+ "pass", "fail"
+
+- "operation": The operation that triggered the event {json-sring}
+  This is one of the following:
+ "port_add", "device_add"
+
+Example:
+
+Port 0 add failure in the guest:
+
+{ "timestamp": {"seconds": 1269438649, "microseconds": 851170},
+  "event": "VIRTIO_SERIAL",
+  "data": {
+"device": "virtio-serial-bus.0",
+"port": 0,
+"result": "fail",
+"operation": "port_add" } }
+
+Device add failure in the guest:
+
+{ "timestamp": {"seconds": 1269438702, "microseconds": 78114},
+  "event": "VIRTIO_SERIAL",
+  "data": {
+"device": "virtio-serial-bus.0",
+"port": 4294967295,
+"result": "fail",
+"operation": "device_add" } }
diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c
index 33083af..efcc66c 100644
--- a/hw/virtio-serial-bus.c
+++ b/hw/virtio-serial-bus.c
@@ -16,6 +16,7 @@
  */
 
 #include "monitor.h"
+#include "qemu-objects.h"
 #include "qemu-queue.h"
 #include "sysbus.h"
 #include "virtio-serial.h"
@@ -204,6 +205,17 @@ size_t virtio_serial_guest_ready(VirtIOSerialPort *port)
 return 0;
 }
 
+static void mon_event(const char *device, const uint32_t port_id,
+  const char *operation, const char *result)
+{
+QObject *data;
+
+data = qobject_from_jsonf("{ 'device': %s, 'port': %ld, 'operation': %s, 
'result': %s }",
+  device, (int64_t)port_id, operation, result);
+monitor_protocol_event(QEVENT_VIRTIO_SERIAL, data);
+qobject_decref(data);
+}
+
 /* Guest wants to notify us of some event */
 static void handle_control_message(VirtIOSerial *vser, void *buf)
 {
@@ -226,6 +238,8 @@ static void handle_control_message(VirtIOSerial *vser, void 
*buf)
 if (!cpkt.value) {
 error_report("virtio-serial-bus: Guest failure in adding device 
%s\n",
 vser->bus->qbus.name);
+mon_event(vser->bus->qbus.name, VIRTIO_CONSOLE_BAD_ID,
+ "device_add", "fail");
 break;
 }
 /*
@@ -241,6 +255,7 @@ static void handle_control_message(VirtIOSerial *vser, void 
*buf)
 if (!cpkt.value) {
 error_report("virtio-serial-bus: Guest failure in adding port %u 
for device %s\n",
  port->id, vser->bus->qbus.name);
+mon_event(vser->bus->qbus.name, port->id, "port_add", "fail");
 break;
 }
 /*
diff --git a/monitor.c b/monitor.c
index 0448a70..9e5bfe7 100644
--- a/monitor.c
+++ b/monitor.c
@@ -441,6 +441,9 @@ void monitor_protocol_event(MonitorEvent event, QObject 
*data)
 case QEVENT_WATCHDOG:
 event_name = "WATCHDOG";
 break;
+case QEVENT_VIRTIO_SERIAL:
+event_name = "VIRTIO_SERIAL";
+break;
 default:
 abort();
 break;
diff --git a/monitor.h b/monitor.h
index bd4ae34..ea4df8a 100644
--- a/monitor.h
+++ b/monitor.h
@@ -28,6 +28,7 @@ typedef enum MonitorEvent {
 QEVENT_BLOCK_IO_ERROR,
 QEVENT_RTC_CHANGE,
 QEVENT_WATCHDOG,
+QEVENT_VIRTIO_SERIAL,
 QEVENT_MAX,
 } MonitorEvent;
 
-- 
1.6.2.5

1 2 >

1 - 100 of 147 matches

Mail list logo