date:20100316

[Qemu-devel] KVM call agenda for Mar 16

2010-03-16 Thread Chris Wright

Please send in any agenda items you are interested in covering.

thanks,
-chris

[Qemu-devel] [trivial one-liner] be more specific in -mem-path error messages

2010-03-16 Thread Michael Tokarev

The error message qemu gives when hugetlbfs is not
accessible is cryptic at best:

  mkstemp: Permission denied

Make it a bit more specific instead:

 unable to create backing store for hugepages: Permission denied

Thanks!

/mjt

diff --git a/exec.c b/exec.c
index 891e0ee..985bdde 100644
--- a/exec.c
+++ b/exec.c
@@ -2569,5 +2569,5 @@ static void *file_ram_alloc(ram_addr_t memory, const char 
*path)
 fd = mkstemp(filename);
 if (fd  0) {
-   perror(mkstemp);
+   perror(unable to create backing store for hugepages);
free(filename);
return NULL;

[Qemu-devel] Re: [PATCH, RFC] Replace assert(0) with abort() or cpu_abort()

2010-03-16 Thread Paolo Bonzini


On 03/15/2010 07:36 PM, Markus Armbruster wrote:

Please don't tell me that user emulators make abort() return.  abort()
is declared __noreturn__, and the optimizer may well rely on that.


If the user programs make a signal (SIGABRT, SIG_IGN) call, I suppose 
abort() will return.


Paolo

Re: [Qemu-devel] Re: [PATCH, RFC] Replace assert(0) with abort() or cpu_abort()

2010-03-16 Thread Markus Armbruster

Paolo Bonzini pbonz...@redhat.com writes:

 On 03/15/2010 07:36 PM, Markus Armbruster wrote:
 Please don't tell me that user emulators make abort() return.  abort()
 is declared __noreturn__, and the optimizer may well rely on that.

 If the user programs make a signal (SIGABRT, SIG_IGN) call, I
 suppose abort() will return.

I program doing that gets what it asks for, and richly deserves.

[Qemu-devel] [PATCH -v2 02/22] vrtio-9p: Implement P9_TVERSION for 9P

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

[ki...@linux.vnet.ibm.com: malloc to qemu_malloc coversion]

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p.c |  263 +++-
 1 files changed, 261 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index 115c93b..53b3d78 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -111,10 +111,269 @@ static void free_pdu(V9fsState *s, V9fsPDU *pdu)
 }
 }
 
+static void v9fs_string_free(V9fsString *str)
+{
+free(str-data);
+str-data = NULL;
+str-size = 0;
+}
+
+static size_t pdu_unpack(void *dst, V9fsPDU *pdu, size_t offset, size_t size)
+{
+struct iovec *sg = pdu-elem.out_sg;
+BUG_ON((offset + size)  sg[0].iov_len);
+memcpy(dst, sg[0].iov_base + offset, size);
+return size;
+}
+
+/* FIXME i can do this with less variables */
+static size_t pdu_pack(V9fsPDU *pdu, size_t offset, const void *src, size_t 
size)
+{
+struct iovec *sg = pdu-elem.in_sg;
+size_t off = 0;
+size_t copied = 0;
+int i = 0;
+
+for (i = 0; size  i  pdu-elem.in_num; i++) {
+size_t len;
+
+if (offset = off  offset  (off + sg[i].iov_len)) {
+len = MIN(sg[i].iov_len - (offset - off), size);
+memcpy(sg[i].iov_base + (offset - off), src, len);
+size -= len;
+offset += len;
+off = offset;
+copied += len;
+src += len;
+} else {
+off += sg[i].iov_len;
+}
+}
+
+return copied;
+}
+
+static int pdu_copy_sg(V9fsPDU *pdu, size_t offset, int rx, struct iovec *sg)
+{
+size_t pos = 0;
+int i, j;
+struct iovec *src_sg;
+unsigned int num;
+
+if (rx) {
+src_sg = pdu-elem.in_sg;
+num = pdu-elem.in_num;
+} else {
+src_sg = pdu-elem.out_sg;
+num = pdu-elem.out_num;
+}
+
+j = 0;
+for (i = 0; i  num; i++) {
+if (offset = pos) {
+sg[j].iov_base = src_sg[i].iov_base;
+sg[j].iov_len = src_sg[i].iov_len;
+j++;
+} else if (offset  (src_sg[i].iov_len + pos)) {
+sg[j].iov_base = src_sg[i].iov_base;
+sg[j].iov_len = src_sg[i].iov_len;
+sg[j].iov_base += (offset - pos);
+sg[j].iov_len -= (offset - pos);
+j++;
+}
+pos += src_sg[i].iov_len;
+}
+
+return j;
+}
+
+static size_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
+{
+size_t old_offset = offset;
+va_list ap;
+int i;
+
+va_start(ap, fmt);
+for (i = 0; fmt[i]; i++) {
+   switch (fmt[i]) {
+   case 'b': {
+   int8_t *valp = va_arg(ap, int8_t *);
+   offset += pdu_unpack(valp, pdu, offset, sizeof(*valp));
+   break;
+   }
+   case 'w': {
+   int16_t *valp = va_arg(ap, int16_t *);
+   offset += pdu_unpack(valp, pdu, offset, sizeof(*valp));
+   break;
+   }
+   case 'd': {
+   int32_t *valp = va_arg(ap, int32_t *);
+   offset += pdu_unpack(valp, pdu, offset, sizeof(*valp));
+   break;
+   }
+   case 'q': {
+   int64_t *valp = va_arg(ap, int64_t *);
+   offset += pdu_unpack(valp, pdu, offset, sizeof(*valp));
+   break;
+   }
+   case 'v': {
+   struct iovec *iov = va_arg(ap, struct iovec *);
+   int *iovcnt = va_arg(ap, int *);
+   *iovcnt = pdu_copy_sg(pdu, offset, 0, iov);
+   break;
+   }
+   case 's': {
+   V9fsString *str = va_arg(ap, V9fsString *);
+   offset += pdu_unmarshal(pdu, offset, w, str-size);
+   /* FIXME: sanity check str-size */
+   str-data = qemu_malloc(str-size + 1);
+   offset += pdu_unpack(str-data, pdu, offset, str-size);
+   str-data[str-size] = 0;
+   break;
+   }
+   case 'Q': {
+   V9fsQID *qidp = va_arg(ap, V9fsQID *);
+   offset += pdu_unmarshal(pdu, offset, bdq,
+   qidp-type, qidp-version, qidp-path);
+   break;
+   }
+   case 'S': {
+   V9fsStat *statp = va_arg(ap, V9fsStat *);
+   offset += pdu_unmarshal(pdu, offset, wwdQdddqsddd,
+   statp-size, statp-type, statp-dev,
+   statp-qid, statp-mode, statp-atime,
+   statp-mtime, statp-length,
+   statp-name, statp-uid, statp-gid,
+   statp-muid, statp-extension,
+   statp-n_uid, statp-n_gid,
+   statp-n_muid);
+   break;
+   }
+   default:
+   break;
+   }
+}
+
+va_end(ap);
+
+return offset - old_offset;
+}
+
+static size_t pdu_marshal(V9fsPDU *pdu,

[Qemu-devel] [PATCH -v2 01/22] vitio-9p: Add a virtio 9p device to qemu

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

This patch doesn't implement the 9p protocol handling
code. It add a simple device which dump the protocl data

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 Makefile.target  |1 +
 hw/virtio-9p-debug.c |  442 ++
 hw/virtio-9p.c   |  275 +++
 hw/virtio-9p.h   |   70 
 hw/virtio-pci.c  |   25 +++
 hw/virtio.h  |1 +
 6 files changed, 814 insertions(+), 0 deletions(-)
 create mode 100644 hw/virtio-9p-debug.c
 create mode 100644 hw/virtio-9p.c
 create mode 100644 hw/virtio-9p.h

diff --git a/Makefile.target b/Makefile.target
index 320f807..33f9fcb 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -172,6 +172,7 @@ obj-y = vl.o async.o monitor.o pci.o pci_host.o pcie_host.o 
machine.o gdbstub.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-pci.o 
virtio-serial-bus.o
+obj-y += virtio-9p.o virtio-9p-debug.o
 obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
diff --git a/hw/virtio-9p-debug.c b/hw/virtio-9p-debug.c
new file mode 100644
index 000..9230659
--- /dev/null
+++ b/hw/virtio-9p-debug.c
@@ -0,0 +1,442 @@
+/*
+ * Virtio 9p PDU debug
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *  Anthony Liguori   aligu...@us.ibm.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+#include virtio.h
+#include pc.h
+#include virtio-9p.h
+
+#include assert.h
+#include sys/uio.h
+
+#define BUG_ON(cond) assert(!(cond))
+
+extern int dotu;
+static FILE *llogfile;
+
+static struct iovec *get_sg(V9fsPDU *pdu, int rx)
+{
+if (rx)
+return pdu-elem.in_sg;
+return pdu-elem.out_sg;
+}
+
+static void pprint_int8(V9fsPDU *pdu, int rx, size_t *offsetp,
+const char *name)
+{
+struct iovec *sg = get_sg(pdu, rx);
+size_t offset = *offsetp;
+int8_t value;
+
+BUG_ON((offset + sizeof(value))  sg[0].iov_len);
+
+memcpy(value, sg[0].iov_base + offset, sizeof(value));
+offset += sizeof(value);
+
+fprintf(llogfile, %s=0x%x, name, value);
+
+*offsetp = offset;
+}
+
+static void pprint_int16(V9fsPDU *pdu, int rx, size_t *offsetp,
+const char *name)
+{
+struct iovec *sg = get_sg(pdu, rx);
+size_t offset = *offsetp;
+int16_t value;
+
+BUG_ON((offset + sizeof(value))  sg[0].iov_len);
+
+memcpy(value, sg[0].iov_base + offset, sizeof(value));
+offset += sizeof(value);
+
+fprintf(llogfile, %s=0x%x, name, value);
+
+*offsetp = offset;
+}
+
+static void pprint_int32(V9fsPDU *pdu, int rx, size_t *offsetp,
+const char *name)
+{
+struct iovec *sg = get_sg(pdu, rx);
+size_t offset = *offsetp;
+int32_t value;
+
+BUG_ON((offset + sizeof(value))  sg[0].iov_len);
+
+memcpy(value, sg[0].iov_base + offset, sizeof(value));
+offset += sizeof(value);
+
+fprintf(llogfile, %s=0x%x, name, value);
+
+*offsetp = offset;
+}
+
+static void pprint_int64(V9fsPDU *pdu, int rx, size_t *offsetp,
+const char *name)
+{
+struct iovec *sg = get_sg(pdu, rx);
+size_t offset = *offsetp;
+int64_t value;
+
+BUG_ON((offset + sizeof(value))  sg[0].iov_len);
+
+memcpy(value, sg[0].iov_base + offset, sizeof(value));
+offset += sizeof(value);
+
+fprintf(llogfile, %s=0x% PRIx64, name, value);
+
+*offsetp = offset;
+}
+
+static void pprint_str(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name)
+{
+struct iovec *sg = get_sg(pdu, rx);
+size_t offset = *offsetp;
+int16_t size;
+size_t result;
+
+BUG_ON((offset + 2)  sg[0].iov_len);
+memcpy(size, sg[0].iov_base + offset, 2);
+offset += 2;
+
+BUG_ON((offset + size)  sg[0].iov_len);
+fprintf(llogfile, %s=, name);
+result = fwrite(sg[0].iov_base + offset, 1, size, llogfile);
+BUG_ON(result != size);
+offset += size;
+
+*offsetp = offset;
+}
+
+static void pprint_qid(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name)
+{
+fprintf(llogfile, %s={, name);
+pprint_int8(pdu, rx, offsetp, type);
+pprint_int32(pdu, rx, offsetp, , version);
+pprint_int64(pdu, rx, offsetp, , path);
+fprintf(llogfile, });
+}
+
+static void pprint_stat(V9fsPDU *pdu, int rx, size_t *offsetp, const char 
*name)
+{
+fprintf(llogfile, %s={, name);
+pprint_int16(pdu, rx, offsetp, size);
+pprint_int16(pdu, rx, offsetp, , type);
+pprint_int32(pdu, rx, offsetp, , dev);
+pprint_qid(pdu, rx, offsetp, , qid);
+pprint_int32(pdu, rx, offsetp, , mode);
+pprint_int32(pdu, rx, offsetp, , atime);
+pprint_int32(pdu, rx, offsetp, , mtime);
+pprint_int64(pdu,

[Qemu-devel] [PATCH -V2 00/22] virtio-9p: paravirtual file system passthrough

2010-03-16 Thread Aneesh Kumar K.V

Hi,


This patch series adds a paravirtual file system passthrough mechanism to QEMU
based on the 9P protocol. With the current implementation, all I/O is 
implemented
in the VCPU thread.  We've modified the protocol handlers so that we can support
dispatch I/O in a thread pool. The actual thread pool implementation will be 
posted later

This patch set should work with any recent Linux kernel as virtio-9p has been
supported for a few kernel releases now. Export dir is specified using the below
Qemu option.

-device virtio-9p-pci,share_path=/mnt/,mount_tag=v_mnt

mount_tag is used to identify the mount point in the kernel. This will be 
available in Linux
kernel via /sys/devices/virtio-pci/virtio1/mount_tag file.

Changes from V1:
a) fsstress test suite runs successfully with the patches. That should indicate 
patches are stable
enough to be merged.
b) Added proper error handling to all posix_* calls.
c) Fixed code to follow Qemu coding style.
d) Other bug fixes most of which are folded back into the original patches
e) rebased to qemu master 0aef4261ac0ec9089ade0e3a92f986cb4ba7317e


-aneesh

[Qemu-devel] [PATCH -v2 03/22] virtio-9p: Implement P9_TATTACH

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

[jv...@linux.vnet.ibm.com: Added qemu_vasprintf]

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 Makefile.target  |2 +-
 hw/virtio-9p-local.c |   84 +++
 hw/virtio-9p.c   |  155 +++---
 hw/virtio-9p.h   |   33 +++
 qemu-common.h|1 +
 qemu-malloc.c|5 ++
 6 files changed, 270 insertions(+), 10 deletions(-)
 create mode 100644 hw/virtio-9p-local.c

diff --git a/Makefile.target b/Makefile.target
index 33f9fcb..97f32a9 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -172,7 +172,7 @@ obj-y = vl.o async.o monitor.o pci.o pci_host.o pcie_host.o 
machine.o gdbstub.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-pci.o 
virtio-serial-bus.o
-obj-y += virtio-9p.o virtio-9p-debug.o
+obj-y += virtio-9p.o virtio-9p-debug.o virtio-9p-local.o
 obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c
new file mode 100644
index 000..1d2523b
--- /dev/null
+++ b/hw/virtio-9p-local.c
@@ -0,0 +1,84 @@
+/*
+ * Virtio 9p Posix callback
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *  Anthony Liguori   aligu...@us.ibm.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+#include virtio.h
+#include pc.h
+#include qemu_socket.h
+#include virtio-9p.h
+#include sys/uio.h
+#include arpa/inet.h
+#include assert.h
+#include pwd.h
+#include grp.h
+#include sys/socket.h
+#include sys/un.h
+
+static const char *base_path;
+
+static const char *rpath(const char *path)
+{
+/* FIXME: so wrong... */
+static char buffer[4096];
+snprintf(buffer, sizeof(buffer), %s/%s, base_path, path);
+return buffer;
+}
+
+static int local_lstat(void *opaque, const char *path, struct stat *stbuf)
+{
+return lstat(rpath(path), stbuf);
+}
+
+static int local_setuid(void *opaque, uid_t uid)
+{
+struct passwd *pw;
+gid_t groups[33];
+int ngroups;
+static uid_t cur_uid = -1;
+
+if (cur_uid == uid)
+return 0;
+
+if (setreuid(0, 0))
+return -1;
+
+pw = getpwuid(uid);
+if (pw == NULL)
+return -1;
+
+ngroups = 33;
+if (getgrouplist(pw-pw_name, pw-pw_gid, groups, ngroups) == -1)
+return -1;
+
+if (setgroups(ngroups, groups))
+return -1;
+
+if (setregid(-1, pw-pw_gid))
+return -1;
+
+if (setreuid(-1, uid))
+return -1;
+
+cur_uid = uid;
+
+return 0;
+}
+
+static V9fsPosixFileOperations ops = {
+.lstat = local_lstat,
+.setuid = local_setuid,
+};
+
+V9fsPosixFileOperations *virtio_9p_init_local(const char *path)
+{
+base_path = path;
+return ops;
+}
diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index 53b3d78..fdff589 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -82,6 +82,7 @@ typedef struct V9fsState
 V9fsPDU pdus[MAX_REQ];
 V9fsPDU *free_pdu;
 V9fsFidState *fid_list;
+V9fsPosixFileOperations *ops;
 char *root;
 uid_t uid;
 } V9fsState;
@@ -91,6 +92,123 @@ int debug_9p_pdu = 1;
 
 extern void pprint_pdu(V9fsPDU *pdu);
 
+static int posix_lstat(V9fsState *s, V9fsString *path, struct stat *stbuf)
+{
+return s-ops-lstat(s-ops-opaque, path-data, stbuf);
+}
+
+static int posix_setuid(V9fsState *s, uid_t uid)
+{
+return s-ops-setuid(s-ops-opaque, uid);
+}
+
+static void v9fs_string_free(V9fsString *str)
+{
+qemu_free(str-data);
+str-data = NULL;
+str-size = 0;
+}
+
+static void v9fs_string_sprintf(V9fsString *str, const char *fmt, ...)
+{
+va_list ap;
+int err;
+
+v9fs_string_free(str);
+
+va_start(ap, fmt);
+err = qemu_vasprintf(str-data, fmt, ap);
+BUG_ON(err == -1);
+va_end(ap);
+
+str-size = err;
+}
+
+static V9fsFidState *lookup_fid(V9fsState *s, int32_t fid)
+{
+V9fsFidState *f;
+
+for (f = s-fid_list; f; f = f-next) {
+if (f-fid == fid) {
+posix_setuid(s, f-uid);
+return f;
+}
+}
+
+return NULL;
+}
+
+static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
+{
+V9fsFidState *f;
+
+f = lookup_fid(s, fid);
+if (f)
+return NULL;
+
+f = qemu_mallocz(sizeof(V9fsFidState));
+BUG_ON(f == NULL);
+
+f-fid = fid;
+f-fd = -1;
+f-dir = NULL;
+
+f-next = s-fid_list;
+s-fid_list = f;
+
+return f;
+}
+
+#define P9_QID_TYPE_DIR0x80
+#define P9_QID_TYPE_SYMLINK0x02
+
+#define P9_STAT_MODE_DIR   0x8000
+#define P9_STAT_MODE_APPEND0x4000
+#define P9_STAT_MODE_EXCL  0x2000
+#define P9_STAT_MODE_MOUNT 0x1000
+#define P9_STAT_MODE_AUTH  0x0800
+#define

[Qemu-devel] [PATCH -v2 04/22] virtio-9p: Implement P9_TSTAT

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

This get the mount to work on the guest

[ki...@linux.vnet.ibm.com: malloc to qemu_malloc conversion]

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Gautham R Shenoy e...@in.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p-local.c |7 ++
 hw/virtio-9p.c   |  169 +-
 2 files changed, 174 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c
index 1d2523b..aefb5a8 100644
--- a/hw/virtio-9p-local.c
+++ b/hw/virtio-9p-local.c
@@ -72,9 +72,16 @@ static int local_setuid(void *opaque, uid_t uid)
 return 0;
 }
 
+static ssize_t local_readlink(void *opaque, const char *path,
+char *buf, size_t bufsz)
+{
+return readlink(rpath(path), buf, bufsz);
+}
+
 static V9fsPosixFileOperations ops = {
 .lstat = local_lstat,
 .setuid = local_setuid,
+.readlink = local_readlink,
 };
 
 V9fsPosixFileOperations *virtio_9p_init_local(const char *path)
diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index fdff589..de5f6b0 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -102,6 +102,21 @@ static int posix_setuid(V9fsState *s, uid_t uid)
 return s-ops-setuid(s-ops-opaque, uid);
 }
 
+static ssize_t posix_readlink(V9fsState *s, V9fsString *path, V9fsString *buf)
+{
+ssize_t len;
+
+buf-data = qemu_malloc(1024);
+
+len = s-ops-readlink(s-ops-opaque, path-data, buf-data, 1024 - 1);
+if (len  -1) {
+buf-size = len;
+buf-data[len] = 0;
+}
+
+return len;
+}
+
 static void v9fs_string_free(V9fsString *str)
 {
 qemu_free(str-data);
@@ -109,6 +124,11 @@ static void v9fs_string_free(V9fsString *str)
 str-size = 0;
 }
 
+static void v9fs_string_null(V9fsString *str)
+{
+v9fs_string_free(str);
+}
+
 static void v9fs_string_sprintf(V9fsString *str, const char *fmt, ...)
 {
 va_list ap;
@@ -124,6 +144,11 @@ static void v9fs_string_sprintf(V9fsString *str, const 
char *fmt, ...)
 str-size = err;
 }
 
+static size_t v9fs_string_size(V9fsString *str)
+{
+return str-size;
+}
+
 static V9fsFidState *lookup_fid(V9fsState *s, int32_t fid)
 {
 V9fsFidState *f;
@@ -437,6 +462,15 @@ static size_t pdu_marshal(V9fsPDU *pdu, size_t offset, 
const char *fmt, ...)
 return offset - old_offset;
 }
 
+static void v9fs_stat_free(V9fsStat *stat)
+{
+v9fs_string_free(stat-name);
+v9fs_string_free(stat-uid);
+v9fs_string_free(stat-gid);
+v9fs_string_free(stat-muid);
+v9fs_string_free(stat-extension);
+}
+
 static void complete_pdu(V9fsState *s, V9fsPDU *pdu, ssize_t len)
 {
 int8_t id = pdu-id + 1; /* Response */
@@ -472,6 +506,88 @@ static void complete_pdu(V9fsState *s, V9fsPDU *pdu, 
ssize_t len)
 free_pdu(s, pdu);
 }
 
+static uint32_t stat_to_v9mode(const struct stat *stbuf)
+{
+uint32_t mode;
+
+mode = stbuf-st_mode  0777;
+if (S_ISDIR(stbuf-st_mode))
+mode |= P9_STAT_MODE_DIR;
+
+if (dotu) {
+if (S_ISLNK(stbuf-st_mode))
+mode |= P9_STAT_MODE_SYMLINK;
+if (S_ISSOCK(stbuf-st_mode))
+mode |= P9_STAT_MODE_SOCKET;
+if (S_ISFIFO(stbuf-st_mode))
+mode |= P9_STAT_MODE_NAMED_PIPE;
+if (S_ISBLK(stbuf-st_mode) || S_ISCHR(stbuf-st_mode))
+mode |= P9_STAT_MODE_DEVICE;
+if (stbuf-st_mode  S_ISUID)
+mode |= P9_STAT_MODE_SETUID;
+if (stbuf-st_mode  S_ISGID)
+mode |= P9_STAT_MODE_SETGID;
+if (stbuf-st_mode  S_ISVTX)
+mode |= P9_STAT_MODE_SETVTX;
+}
+
+return mode;
+}
+
+static void stat_to_v9stat(V9fsState *s, V9fsString *name,
+const struct stat *stbuf,
+V9fsStat *v9stat)
+{
+int err;
+const char *str;
+
+memset(v9stat, 0, sizeof(*v9stat));
+
+stat_to_qid(stbuf, v9stat-qid);
+v9stat-mode = stat_to_v9mode(stbuf);
+v9stat-atime = stbuf-st_atime;
+v9stat-mtime = stbuf-st_mtime;
+v9stat-length = stbuf-st_size;
+
+v9fs_string_null(v9stat-uid);
+v9fs_string_null(v9stat-gid);
+v9fs_string_null(v9stat-muid);
+
+if (dotu) {
+v9stat-n_uid = stbuf-st_uid;
+v9stat-n_gid = stbuf-st_gid;
+v9stat-n_muid = 0;
+
+v9fs_string_null(v9stat-extension);
+
+if (v9stat-mode  P9_STAT_MODE_SYMLINK) {
+err = posix_readlink(s, name, v9stat-extension);
+BUG_ON(err == -1);
+v9stat-extension.data[err] = 0;
+v9stat-extension.size = err;
+} else if (v9stat-mode  P9_STAT_MODE_DEVICE) {
+v9fs_string_sprintf(v9stat-extension, %c %u %u,
+S_ISCHR(stbuf-st_mode) ? 'c' : 'b',
+major(stbuf-st_rdev), minor(stbuf-st_rdev));
+}
+}
+
+str = strrchr(name-data, '/');
+if (str)
+str += 1;
+else
+str = name-data;
+
+

[Qemu-devel] [PATCH -v2 08/22] virtio-9p: Implement P9_TCLUNK

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

This patch gets ls -al to work

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p.c |   16 ++--
 1 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index 9bc0a57..3ac6255 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -1303,8 +1303,20 @@ out:
 
 static void v9fs_clunk(V9fsState *s, V9fsPDU *pdu)
 {
-if (debug_9p_pdu)
-pprint_pdu(pdu);
+int32_t fid;
+size_t offset = 7;
+int err;
+
+pdu_unmarshal(pdu, offset, d, fid);
+
+err = free_fid(s, fid);
+if (err  0)
+goto out;
+
+offset = 7;
+err = offset;
+out:
+complete_pdu(s, pdu, err);
 }
 
 static void v9fs_write(V9fsState *s, V9fsPDU *pdu)
-- 
1.7.0.2.273.gc2413

[Qemu-devel] [PATCH -v2 05/22] virtio-9p: Implement P9_TWALK

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Gautham R Shenoy e...@in.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p-local.c |   12 +++
 hw/virtio-9p.c   |  219 +-
 2 files changed, 229 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c
index aefb5a8..409f5b0 100644
--- a/hw/virtio-9p-local.c
+++ b/hw/virtio-9p-local.c
@@ -78,10 +78,22 @@ static ssize_t local_readlink(void *opaque, const char 
*path,
 return readlink(rpath(path), buf, bufsz);
 }
 
+static int local_close(void *opaque, int fd)
+{
+return close(fd);
+}
+
+static int local_closedir(void *opaque, DIR *dir)
+{
+return closedir(dir);
+}
+
 static V9fsPosixFileOperations ops = {
 .lstat = local_lstat,
 .setuid = local_setuid,
 .readlink = local_readlink,
+.close = local_close,
+.closedir = local_closedir,
 };
 
 V9fsPosixFileOperations *virtio_9p_init_local(const char *path)
diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index de5f6b0..784d399 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -117,6 +117,22 @@ static ssize_t posix_readlink(V9fsState *s, V9fsString 
*path, V9fsString *buf)
 return len;
 }
 
+static int posix_close(V9fsState *s, int fd)
+{
+return s-ops-close(s-ops-opaque, fd);
+}
+
+static int posix_closedir(V9fsState *s, DIR *dir)
+{
+return s-ops-closedir(s-ops-opaque, dir);
+}
+
+static void v9fs_string_init(V9fsString *str)
+{
+str-data = NULL;
+str-size = 0;
+}
+
 static void v9fs_string_free(V9fsString *str)
 {
 qemu_free(str-data);
@@ -144,6 +160,12 @@ static void v9fs_string_sprintf(V9fsString *str, const 
char *fmt, ...)
 str-size = err;
 }
 
+static void v9fs_string_copy(V9fsString *lhs, V9fsString *rhs)
+{
+v9fs_string_free(lhs);
+v9fs_string_sprintf(lhs, %s, rhs-data);
+}
+
 static size_t v9fs_string_size(V9fsString *str)
 {
 return str-size;
@@ -184,6 +206,31 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
 return f;
 }
 
+static int free_fid(V9fsState *s, int32_t fid)
+{
+V9fsFidState **fidpp, *fidp;
+
+for (fidpp = s-fid_list; *fidpp; fidpp = (*fidpp)-next) {
+if ((*fidpp)-fid == fid)
+break;
+}
+
+if (*fidpp == NULL)
+return -ENOENT;
+
+fidp = *fidpp;
+*fidpp = fidp-next;
+
+if (fidp-fd != -1)
+posix_close(s, fidp-fd);
+if (fidp-dir)
+posix_closedir(s, fidp-dir);
+v9fs_string_free(fidp-path);
+qemu_free(fidp);
+
+return 0;
+}
+
 #define P9_QID_TYPE_DIR0x80
 #define P9_QID_TYPE_SYMLINK0x02
 
@@ -689,10 +736,178 @@ out:
 qemu_free(vs);
 }
 
+typedef struct V9fsWalkState {
+V9fsPDU *pdu;
+size_t offset;
+int32_t fid;
+int32_t newfid;
+int16_t nwnames;
+int name_idx;
+V9fsQID *qids;
+V9fsFidState *fidp;
+V9fsFidState *newfidp;
+V9fsString path;
+V9fsString *wnames;
+struct stat stbuf;
+} V9fsWalkState;
+
+static void v9fs_walk_complete(V9fsState *s, V9fsWalkState *vs, int err)
+{
+complete_pdu(s, vs-pdu, err);
+
+if(vs-nwnames) {
+for (vs-name_idx = 0; vs-name_idx  vs-nwnames; vs-name_idx++)
+v9fs_string_free(vs-wnames[vs-name_idx]);
+
+qemu_free(vs-wnames);
+qemu_free(vs-qids);
+}
+}
+
+static void v9fs_walk_marshal(V9fsWalkState *vs)
+{
+int i;
+vs-offset = 7;
+vs-offset += pdu_marshal(vs-pdu, vs-offset, w, vs-nwnames);
+
+for (i = 0; i  vs-nwnames; i++)
+vs-offset += pdu_marshal(vs-pdu, vs-offset, Q, vs-qids[i]);
+}
+
+static void v9fs_walk_post_newfid_lstat(V9fsState *s, V9fsWalkState *vs,
+int err)
+{
+if (err == -1) {
+free_fid(s, vs-newfid);
+v9fs_string_free(vs-path);
+err = -ENOENT;
+goto out;
+}
+
+stat_to_qid(vs-stbuf, vs-qids[vs-name_idx]);
+
+vs-name_idx++;
+if (vs-name_idx  vs-nwnames) {
+v9fs_string_sprintf(vs-path, %s/%s, vs-newfidp-path.data,
+vs-wnames[vs-name_idx].data);
+v9fs_string_copy(vs-newfidp-path, vs-path);
+
+err = posix_lstat(s, vs-newfidp-path, vs-stbuf);
+v9fs_walk_post_newfid_lstat(s, vs, err);
+return;
+}
+
+v9fs_string_free(vs-path);
+v9fs_walk_marshal(vs);
+err = vs-offset;
+out:
+v9fs_walk_complete(s, vs, err);
+}
+
+static void v9fs_walk_post_oldfid_lstat(V9fsState *s, V9fsWalkState *vs,
+int err)
+{
+if (err == -1) {
+v9fs_string_free(vs-path);
+err = -ENOENT;
+goto out;
+}
+
+stat_to_qid(vs-stbuf, vs-qids[vs-name_idx]);
+vs-name_idx++;
+if (vs-name_idx  vs-nwnames) {
+
+v9fs_string_sprintf(vs-path, %s/%s,
+vs-fidp-path.data, vs-wnames[vs-name_idx].data);
+

[Qemu-devel] [PATCH -v2 09/22] virtio-9p: Implement P9_TWRITE

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

This gets write to file to work

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Venkateswararao Jujjuri jv...@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p-local.c |7 
 hw/virtio-9p.c   |   97 -
 2 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c
index d77ecc2..c5d1db3 100644
--- a/hw/virtio-9p-local.c
+++ b/hw/virtio-9p-local.c
@@ -129,6 +129,12 @@ static off_t local_lseek(void *opaque, int fd, off_t 
offset, int whence)
 return lseek(fd, offset, whence);
 }
 
+static ssize_t local_writev(void *opaque, int fd, const struct iovec *iov,
+   int iovcnt)
+{
+return writev(fd, iov, iovcnt);
+}
+
 static V9fsPosixFileOperations ops = {
 .lstat = local_lstat,
 .setuid = local_setuid,
@@ -143,6 +149,7 @@ static V9fsPosixFileOperations ops = {
 .seekdir = local_seekdir,
 .readv = local_readv,
 .lseek = local_lseek,
+.writev = local_writev,
 };
 
 V9fsPosixFileOperations *virtio_9p_init_local(const char *path)
diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index 3ac6255..bc26d66 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -168,6 +168,12 @@ static off_t posix_lseek(V9fsState *s, int fd, off_t 
offset, int whence)
 return s-ops-lseek(s-ops-opaque, fd, offset, whence);
 }
 
+static int posix_writev(V9fsState *s, int fd, const struct iovec *iov,
+   int iovcnt)
+{
+return s-ops-writev(s-ops-opaque, fd, iov, iovcnt);
+}
+
 static void v9fs_string_init(V9fsString *str)
 {
 str-data = NULL;
@@ -1319,10 +1325,97 @@ out:
 complete_pdu(s, pdu, err);
 }
 
+typedef struct V9fsWriteState {
+V9fsPDU *pdu;
+size_t offset;
+int32_t fid;
+int32_t len;
+int32_t count;
+int32_t total;
+int64_t off;
+V9fsFidState *fidp;
+struct iovec iov[128]; /* FIXME: bad, bad, bad */
+struct iovec *sg;
+int cnt;
+} V9fsWriteState;
+
+static void v9fs_write_post_writev(V9fsState *s, V9fsWriteState *vs,
+   ssize_t err)
+{
+BUG_ON(vs-len  0);
+vs-total += vs-len;
+vs-sg = adjust_sg(vs-sg, vs-len, vs-cnt);
+if (vs-total  vs-count  vs-len  0) {
+do {
+if (0)
+print_sg(vs-sg, vs-cnt);
+vs-len =  posix_writev(s, vs-fidp-fd, vs-sg, vs-cnt);
+} while (vs-len == -1  errno == EINTR);
+v9fs_write_post_writev(s, vs, err);
+}
+vs-offset += pdu_marshal(vs-pdu, vs-offset, d, vs-total);
+
+err = vs-offset;
+complete_pdu(s, vs-pdu, err);
+qemu_free(vs);
+}
+
+static void v9fs_write_post_lseek(V9fsState *s, V9fsWriteState *vs, ssize_t 
err)
+{
+BUG_ON(err == -1);
+
+vs-sg = cap_sg(vs-sg, vs-count, vs-cnt);
+
+if (vs-total  vs-count) {
+do {
+if (0)
+print_sg(vs-sg, vs-cnt);
+vs-len = posix_writev(s, vs-fidp-fd, vs-sg, vs-cnt);
+} while (vs-len == -1  errno == EINTR);
+
+v9fs_write_post_writev(s, vs, err);
+return;
+}
+
+complete_pdu(s, vs-pdu, err);
+qemu_free(vs);
+}
+
 static void v9fs_write(V9fsState *s, V9fsPDU *pdu)
 {
-if (debug_9p_pdu)
-pprint_pdu(pdu);
+V9fsWriteState *vs;
+ssize_t err;
+
+vs = qemu_malloc(sizeof(*vs));
+
+vs-pdu = pdu;
+vs-offset = 7;
+vs-sg = vs-iov;
+vs-total = 0;
+vs-len = 0;
+
+pdu_unmarshal(vs-pdu, vs-offset, dqdv, vs-fid, vs-off, vs-count,
+vs-sg, vs-cnt);
+
+vs-fidp = lookup_fid(s, vs-fid);
+if (vs-fidp == NULL) {
+err = -EINVAL;
+goto out;
+}
+
+if (vs-fidp-fd == -1) {
+err = -EINVAL;
+goto out;
+}
+
+err = posix_lseek(s, vs-fidp-fd, vs-off, SEEK_SET);
+
+v9fs_write_post_lseek(s, vs, err);
+return;
+
+out:
+complete_pdu(s, vs-pdu, err);
+qemu_free(vs);
 }
 
 static void v9fs_create(V9fsState *s, V9fsPDU *pdu)
-- 
1.7.0.2.273.gc2413

[Qemu-devel] [PATCH -v2 06/22] virtio-9p: Implement P9_TOPEN

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Gautham R Shenoy e...@in.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p-local.c |   12 
 hw/virtio-9p.c   |  145 +++--
 2 files changed, 151 insertions(+), 6 deletions(-)

diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c
index 409f5b0..d8cb70d 100644
--- a/hw/virtio-9p-local.c
+++ b/hw/virtio-9p-local.c
@@ -88,12 +88,24 @@ static int local_closedir(void *opaque, DIR *dir)
 return closedir(dir);
 }
 
+static int local_open(void *opaque, const char *path, int flags)
+{
+return open(rpath(path), flags);
+}
+
+static DIR *local_opendir(void *opaque, const char *path)
+{
+return opendir(rpath(path));
+}
+
 static V9fsPosixFileOperations ops = {
 .lstat = local_lstat,
 .setuid = local_setuid,
 .readlink = local_readlink,
 .close = local_close,
 .closedir = local_closedir,
+.open = local_open,
+.opendir = local_opendir,
 };
 
 V9fsPosixFileOperations *virtio_9p_init_local(const char *path)
diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index 784d399..f1df0b9 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -127,6 +127,16 @@ static int posix_closedir(V9fsState *s, DIR *dir)
 return s-ops-closedir(s-ops-opaque, dir);
 }
 
+static int posix_open(V9fsState *s, V9fsString *path, int flags)
+{
+return s-ops-open(s-ops-opaque, path-data, flags);
+}
+
+static DIR *posix_opendir(V9fsState *s, V9fsString *path)
+{
+return s-ops-opendir(s-ops-opaque, path-data);
+}
+
 static void v9fs_string_init(V9fsString *str)
 {
 str-data = NULL;
@@ -910,15 +920,138 @@ out:
 v9fs_walk_complete(s, vs, err);
 }
 
-static void v9fs_clunk(V9fsState *s, V9fsPDU *pdu)
+typedef struct V9fsOpenState {
+V9fsPDU *pdu;
+size_t offset;
+int32_t fid;
+int8_t mode;
+V9fsFidState *fidp;
+V9fsQID qid;
+struct stat stbuf;
+
+} V9fsOpenState;
+
+enum {
+Oread  = 0x00,
+Owrite = 0x01,
+Ordwr  = 0x02,
+Oexec  = 0x03,
+Oexcl  = 0x04,
+Otrunc = 0x10,
+Orexec = 0x20,
+Orclose= 0x40,
+Oappend= 0x80,
+};
+
+static int omode_to_uflags(int8_t mode)
 {
-if (debug_9p_pdu)
-pprint_pdu(pdu);
+int ret = 0;
+
+switch (mode  3) {
+case Oread:
+ret = O_RDONLY;
+break;
+case Ordwr:
+ret = O_RDWR;
+break;
+case Owrite:
+ret = O_WRONLY;
+break;
+case Oexec:
+ret = O_RDONLY;
+break;
+}
+
+if (mode  Otrunc)
+ret |= O_TRUNC;
+
+if (mode  Oappend)
+ret |= O_APPEND;
+
+if (mode  Oexcl)
+ret |= O_EXCL;
+
+return ret;
+}
+
+static void v9fs_open_post_opendir(V9fsState *s, V9fsOpenState *vs, int err)
+{
+if (vs-fidp-dir == NULL) {
+err = -errno;
+goto out;
+}
+
+vs-offset += pdu_marshal(vs-pdu, vs-offset, Qd, vs-qid, 0);
+err = vs-offset;
+out:
+complete_pdu(s, vs-pdu, err);
+qemu_free(vs);
+
 }
 
-static void v9fs_open(V9fsState *s, V9fsPDU *pdu)
-{if (debug_9p_pdu)
-pprint_pdu(pdu);
+static void v9fs_open_post_open(V9fsState *s, V9fsOpenState *vs, int err)
+{
+if (vs-fidp-fd == -1) {
+err = -errno;
+goto out;
+}
+
+vs-offset += pdu_marshal(vs-pdu, vs-offset, Qd, vs-qid, 0);
+err = vs-offset;
+out:
+complete_pdu(s, vs-pdu, err);
+qemu_free(vs);
+}
+
+static void v9fs_open_post_lstat(V9fsState *s, V9fsOpenState *vs, int err)
+{
+BUG_ON(err == -1);
+
+stat_to_qid(vs-stbuf, vs-qid);
+
+if (S_ISDIR(vs-stbuf.st_mode)) {
+vs-fidp-dir = posix_opendir(s, vs-fidp-path);
+v9fs_open_post_opendir(s, vs, err);
+} else {
+vs-fidp-fd = posix_open(s, vs-fidp-path,
+omode_to_uflags(vs-mode));
+v9fs_open_post_open(s, vs, err);
+}
+
+}
+
+static void v9fs_open(V9fsState *s, V9fsPDU *pdu)
+{
+
+V9fsOpenState *vs;
+ssize_t err = 0;
+
+
+vs = qemu_malloc(sizeof(*vs));
+vs-pdu = pdu;
+vs-offset = 7;
+
+pdu_unmarshal(vs-pdu, vs-offset, db, vs-fid, vs-mode);
+
+vs-fidp = lookup_fid(s, vs-fid);
+if (vs-fidp == NULL) {
+err = -ENOENT;
+goto out;
+}
+
+err = posix_lstat(s, vs-fidp-path, vs-stbuf);
+
+v9fs_open_post_lstat(s, vs, err);
+return;
+out:
+complete_pdu(s, pdu, err);
+qemu_free(vs);
+}
+
+static void v9fs_clunk(V9fsState *s, V9fsPDU *pdu)
+{
+if (debug_9p_pdu)
+   pprint_pdu(pdu);
 }
 
 static void v9fs_read(V9fsState *s, V9fsPDU *pdu)
-- 
1.7.0.2.273.gc2413

[Qemu-devel] [PATCH -v2 07/22] virtio-9p: Implement P9_TREAD

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Venkateswararao Jujjuri jv...@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p-local.c |   37 
 hw/virtio-9p.c   |  253 +-
 2 files changed, 287 insertions(+), 3 deletions(-)

diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c
index d8cb70d..d77ecc2 100644
--- a/hw/virtio-9p-local.c
+++ b/hw/virtio-9p-local.c
@@ -98,6 +98,37 @@ static DIR *local_opendir(void *opaque, const char *path)
 return opendir(rpath(path));
 }
 
+static void local_rewinddir(void *opaque, DIR *dir)
+{
+return rewinddir(dir);
+}
+
+static off_t local_telldir(void *opaque, DIR *dir)
+{
+return telldir(dir);
+}
+
+static struct dirent *local_readdir(void *opaque, DIR *dir)
+{
+return readdir(dir);
+}
+
+static void local_seekdir(void *opaque, DIR *dir, off_t off)
+{
+return seekdir(dir, off);
+}
+
+static ssize_t local_readv(void *opaque, int fd, const struct iovec *iov,
+  int iovcnt)
+{
+return readv(fd, iov, iovcnt);
+}
+
+static off_t local_lseek(void *opaque, int fd, off_t offset, int whence)
+{
+return lseek(fd, offset, whence);
+}
+
 static V9fsPosixFileOperations ops = {
 .lstat = local_lstat,
 .setuid = local_setuid,
@@ -106,6 +137,12 @@ static V9fsPosixFileOperations ops = {
 .closedir = local_closedir,
 .open = local_open,
 .opendir = local_opendir,
+.rewinddir = local_rewinddir,
+.telldir = local_telldir,
+.readdir = local_readdir,
+.seekdir = local_seekdir,
+.readv = local_readv,
+.lseek = local_lseek,
 };
 
 V9fsPosixFileOperations *virtio_9p_init_local(const char *path)
diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index f1df0b9..9bc0a57 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -137,6 +137,37 @@ static DIR *posix_opendir(V9fsState *s, V9fsString *path)
 return s-ops-opendir(s-ops-opaque, path-data);
 }
 
+static void posix_rewinddir(V9fsState *s, DIR *dir)
+{
+return s-ops-rewinddir(s-ops-opaque, dir);
+}
+
+static off_t posix_telldir(V9fsState *s, DIR *dir)
+{
+return s-ops-telldir(s-ops-opaque, dir);
+}
+
+static struct dirent *posix_readdir(V9fsState *s, DIR *dir)
+{
+return s-ops-readdir(s-ops-opaque, dir);
+}
+
+static void posix_seekdir(V9fsState *s, DIR *dir, off_t off)
+{
+return s-ops-seekdir(s-ops-opaque, dir, off);
+}
+
+static int posix_readv(V9fsState *s, int fd, const struct iovec *iov,
+  int iovcnt)
+{
+return s-ops-readv(s-ops-opaque, fd, iov, iovcnt);
+}
+
+static off_t posix_lseek(V9fsState *s, int fd, off_t offset, int whence)
+{
+return s-ops-lseek(s-ops-opaque, fd, offset, whence);
+}
+
 static void v9fs_string_init(V9fsString *str)
 {
 str-data = NULL;
@@ -1048,14 +1079,230 @@ out:
 qemu_free(vs);
 }
 
-static void v9fs_clunk(V9fsState *s, V9fsPDU *pdu)
+static struct iovec *adjust_sg(struct iovec *sg, int len, int *iovcnt)
 {
-if (debug_9p_pdu)
-   pprint_pdu(pdu);
+while (len  *iovcnt) {
+if (len  sg-iov_len) {
+sg-iov_len -= len;
+sg-iov_base += len;
+len = 0;
+} else {
+len -= sg-iov_len;
+sg++;
+*iovcnt -= 1;
+}
+}
+
+return sg;
+}
+
+static struct iovec *cap_sg(struct iovec *sg, int cap, int *cnt)
+{
+int i;
+int total = 0;
+
+for (i = 0; i  *cnt; i++) {
+if ((total + sg[i].iov_len)  cap) {
+sg[i].iov_len -= ((total + sg[i].iov_len) - cap);
+i++;
+break;
+}
+total += sg[i].iov_len;
+}
+
+*cnt = i;
+
+return sg;
+}
+
+static void print_sg(struct iovec *sg, int cnt)
+{
+int i;
+
+printf(sg[%d]: {, cnt);
+for (i = 0; i  cnt; i++) {
+if (i)
+printf(, );
+printf((%p, %zd), sg[i].iov_base, sg[i].iov_len);
+}
+printf(}\n);
+}
+
+typedef struct V9fsReadState {
+V9fsPDU *pdu;
+size_t offset;
+int32_t fid;
+int32_t count;
+int32_t total;
+int64_t off;
+V9fsFidState *fidp;
+struct iovec iov[128]; /* FIXME: bad, bad, bad */
+struct iovec *sg;
+off_t dir_pos;
+struct dirent *dent;
+struct stat stbuf;
+V9fsString name;
+V9fsStat v9stat;
+int32_t len;
+int32_t cnt;
+int32_t max_count;
+} V9fsReadState;
+
+static void v9fs_read_post_readdir(V9fsState *, V9fsReadState *, ssize_t );
+
+static void v9fs_read_post_seekdir(V9fsState *s, V9fsReadState *vs, ssize_t 
err)
+{
+v9fs_stat_free(vs-v9stat);
+v9fs_string_free(vs-name);
+vs-offset += pdu_marshal(vs-pdu, vs-offset, d, vs-count);
+vs-offset += vs-count;
+err = vs-offset;
+complete_pdu(s, vs-pdu, err);
+qemu_free(vs);
+return;
+}
+
+static void v9fs_read_post_dir_lstat(V9fsState *s, V9fsReadState *vs,
+

[Qemu-devel] [PATCH -v2 11/22] virtio-9p: Implement P9_TWSTAT

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

This gets file and directory creation to work

[jv...@linux.vnet.ibm.com: strdup to qemu_strdup conversion]

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Gautham R Shenoy e...@in.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p-local.c |   42 +
 hw/virtio-9p.c   |  236 +-
 2 files changed, 274 insertions(+), 4 deletions(-)

diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c
index cdae5c0..829e79a 100644
--- a/hw/virtio-9p-local.c
+++ b/hw/virtio-9p-local.c
@@ -207,6 +207,44 @@ static int local_link(void *opaque, const char *oldpath, 
const char *newpath)
 return err;
 }
 
+static int local_truncate(void *opaque, const char *path, off_t size)
+{
+return truncate(rpath(path), size);
+}
+
+static int local_rename(void *opaque, const char *oldpath,
+   const char *newpath)
+{
+char *tmp;
+int err;
+
+tmp = qemu_strdup(rpath(oldpath));
+if (tmp == NULL)
+   return -1;
+
+err = rename(tmp, rpath(newpath));
+if (err == -1) {
+   int serrno = errno;
+   qemu_free(tmp);
+   errno = serrno;
+} else
+   qemu_free(tmp);
+
+return err;
+
+}
+
+static int local_chown(void *opaque, const char *path, uid_t uid, gid_t gid)
+{
+return chown(rpath(path), uid, gid);
+}
+
+static int local_utime(void *opaque, const char *path,
+  const struct utimbuf *buf)
+{
+return utime(rpath(path), buf);
+}
+
 static V9fsPosixFileOperations ops = {
 .lstat = local_lstat,
 .setuid = local_setuid,
@@ -230,6 +268,10 @@ static V9fsPosixFileOperations ops = {
 .open2 = local_open2,
 .symlink = local_symlink,
 .link = local_link,
+.truncate = local_truncate,
+.rename = local_rename,
+.chown = local_chown,
+.utime = local_utime,
 };
 
 V9fsPosixFileOperations *virtio_9p_init_local(const char *path)
diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index 067cc85..c8995a3 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -215,6 +215,28 @@ static int posix_link(V9fsState *s, V9fsString *oldpath, 
V9fsString *newpath)
 return s-ops-link(s-ops-opaque, oldpath-data, newpath-data);
 }
 
+static int posix_truncate(V9fsState *s, V9fsString *path, off_t size)
+{
+return s-ops-truncate(s-ops-opaque, path-data, size);
+}
+
+static int posix_rename(V9fsState *s, V9fsString *oldpath,
+   V9fsString *newpath)
+{
+return s-ops-rename(s-ops-opaque, oldpath-data, newpath-data);
+}
+
+static int posix_chown(V9fsState *s, V9fsString *path, uid_t uid, gid_t gid)
+{
+return s-ops-chown(s-ops-opaque, path-data, uid, gid);
+}
+
+static int posix_utime(V9fsState *s, V9fsString *path,
+  const struct utimbuf *buf)
+{
+return s-ops-utime(s-ops-opaque, path-data, buf);
+}
+
 static void v9fs_string_init(V9fsString *str)
 {
 str-data = NULL;
@@ -398,7 +420,8 @@ static size_t pdu_unpack(void *dst, V9fsPDU *pdu, size_t 
offset, size_t size)
 }
 
 /* FIXME i can do this with less variables */
-static size_t pdu_pack(V9fsPDU *pdu, size_t offset, const void *src, size_t 
size)
+static size_t pdu_pack(V9fsPDU *pdu, size_t offset, const void *src,
+   size_t size)
 {
 struct iovec *sg = pdu-elem.in_sg;
 size_t off = 0;
@@ -1613,7 +1636,8 @@ static void v9fs_create_post_lstat(V9fsState *s, 
V9fsCreateState *vs, int err)
 uint32_t major, minor;
 mode_t nmode = 0;
 
-if (sscanf(vs-extension.data, %c %u %u, ctype, major, minor) != 
3) {
+if (sscanf(vs-extension.data, %c %u %u, ctype, major,
+   minor) != 3) {
 err = -errno;
 v9fs_post_create(s, vs, err);
 }
@@ -1698,10 +1722,214 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
 pprint_pdu(pdu);
 }
 
+static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
+{
+mode_t ret;
+
+ret = mode  0777;
+if (mode  P9_STAT_MODE_DIR)
+ret |= S_IFDIR;
+
+if (dotu) {
+if (mode  P9_STAT_MODE_SYMLINK)
+ret |= S_IFLNK;
+if (mode  P9_STAT_MODE_SOCKET)
+ret |= S_IFSOCK;
+if (mode  P9_STAT_MODE_NAMED_PIPE)
+ret |= S_IFIFO;
+if (mode  P9_STAT_MODE_DEVICE) {
+if (extension  extension-data[0] == 'c')
+ret |= S_IFCHR;
+else
+ret |= S_IFBLK;
+}
+}
+
+if (!(ret~0777))
+ret |= S_IFREG;
+
+if (mode  P9_STAT_MODE_SETUID)
+ret |= S_ISUID;
+if (mode  P9_STAT_MODE_SETGID)
+ret |= S_ISGID;
+if (mode  P9_STAT_MODE_SETVTX)
+ret |= S_ISVTX;
+
+return ret;
+}
+
+typedef struct V9fsWstatState
+{
+V9fsPDU *pdu;
+size_t offset;
+int32_t fid;
+int16_t unused;
+V9fsStat v9stat;
+

[Qemu-devel] [PATCH -v2 14/22] virtio-9p: Add multiple mount point support

2010-03-16 Thread Aneesh Kumar K.V

This patch add a mount tag name in 9p config space. This tag should
uniquely identify the mount point and should be used in the mount
command as the device name

Qemu command line for specifying 9p share directory now becomes
-device virtio-9p-pci,share_path=/mnt/,mount_tag=v_mnt
-device virtio-9p-pci,share_path=/tmp/,mount_tag=v_tmp

NOTE: We now limit tag name to 32 characters because of
virtio config space limitation.

Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/9p.h  |   26 ++
 hw/virtio-9p-local.c |  101 -
 hw/virtio-9p.c   |  206 -
 hw/virtio-9p.h   |  141 ---
 hw/virtio-pci.c  |8 +-
 hw/virtio.h  |3 +-
 6 files changed, 296 insertions(+), 189 deletions(-)
 create mode 100644 hw/9p.h

diff --git a/hw/9p.h b/hw/9p.h
new file mode 100644
index 000..f0ff45b
--- /dev/null
+++ b/hw/9p.h
@@ -0,0 +1,26 @@
+/*
+ * Virtio 9p
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *  Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_9P_H
+#define QEMU_9P_H
+
+#include stdbool.h
+
+typedef struct V9fsConf
+{
+char *share_path;
+/* tag name for the device */
+char *tag;
+} V9fsConf;
+
+#endif
diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c
index dca6175..4dd6b22 100644
--- a/hw/virtio-9p-local.c
+++ b/hw/virtio-9p-local.c
@@ -22,22 +22,20 @@
 #include sys/socket.h
 #include sys/un.h
 
-static const char *base_path;
-
-static const char *rpath(const char *path)
+static const char *rpath(V9fsState *s, const char *path)
 {
 /* FIXME: so wrong... */
 static char buffer[4096];
-snprintf(buffer, sizeof(buffer), %s/%s, base_path, path);
+snprintf(buffer, sizeof(buffer), %s/%s, s-fs_root, path);
 return buffer;
 }
 
-static int local_lstat(void *opaque, const char *path, struct stat *stbuf)
+static int local_lstat(V9fsState *s, const char *path, struct stat *stbuf)
 {
-return lstat(rpath(path), stbuf);
+return lstat(rpath(s, path), stbuf);
 }
 
-static int local_setuid(void *opaque, uid_t uid)
+static int local_setuid(V9fsState *s, uid_t uid)
 {
 struct passwd *pw;
 gid_t groups[33];
@@ -72,86 +70,86 @@ static int local_setuid(void *opaque, uid_t uid)
 return 0;
 }
 
-static ssize_t local_readlink(void *opaque, const char *path,
-char *buf, size_t bufsz)
+static ssize_t local_readlink(V9fsState *s, const char *path,
+ char *buf, size_t bufsz)
 {
-return readlink(rpath(path), buf, bufsz);
+return readlink(rpath(s, path), buf, bufsz);
 }
 
-static int local_close(void *opaque, int fd)
+static int local_close(V9fsState *s, int fd)
 {
 return close(fd);
 }
 
-static int local_closedir(void *opaque, DIR *dir)
+static int local_closedir(V9fsState *s, DIR *dir)
 {
 return closedir(dir);
 }
 
-static int local_open(void *opaque, const char *path, int flags)
+static int local_open(V9fsState *s, const char *path, int flags)
 {
-return open(rpath(path), flags);
+return open(rpath(s, path), flags);
 }
 
-static DIR *local_opendir(void *opaque, const char *path)
+static DIR *local_opendir(V9fsState *s, const char *path)
 {
-return opendir(rpath(path));
+return opendir(rpath(s, path));
 }
 
-static void local_rewinddir(void *opaque, DIR *dir)
+static void local_rewinddir(V9fsState *s, DIR *dir)
 {
 return rewinddir(dir);
 }
 
-static off_t local_telldir(void *opaque, DIR *dir)
+static off_t local_telldir(V9fsState *s, DIR *dir)
 {
 return telldir(dir);
 }
 
-static struct dirent *local_readdir(void *opaque, DIR *dir)
+static struct dirent *local_readdir(V9fsState *s, DIR *dir)
 {
 return readdir(dir);
 }
 
-static void local_seekdir(void *opaque, DIR *dir, off_t off)
+static void local_seekdir(V9fsState *s, DIR *dir, off_t off)
 {
 return seekdir(dir, off);
 }
 
-static ssize_t local_readv(void *opaque, int fd, const struct iovec *iov,
+static ssize_t local_readv(V9fsState *s, int fd, const struct iovec *iov,
   int iovcnt)
 {
 return readv(fd, iov, iovcnt);
 }
 
-static off_t local_lseek(void *opaque, int fd, off_t offset, int whence)
+static off_t local_lseek(V9fsState *s, int fd, off_t offset, int whence)
 {
 return lseek(fd, offset, whence);
 }
 
-static ssize_t local_writev(void *opaque, int fd, const struct iovec *iov,
+static ssize_t local_writev(V9fsState *s, int fd, const struct iovec *iov,
int iovcnt)
 {
 return writev(fd, iov, iovcnt);
 }
 
-static int local_chmod(void *opaque, const char *path, mode_t mode)
+static int local_chmod(V9fsState *s, const char *path, mode_t mode)
 {
-return chmod(rpath(path), mode);
+return chmod(rpath(s, path), mode);
 }
 
-static

[Qemu-devel] [PATCH -v2 18/22] virtio-9p: Fix sg usage in the code

2010-03-16 Thread Aneesh Kumar K.V

sg list contain more than one element and we need to use the right
element when we are doing the marshaling and unmarshaling of data.
This patch also abstract out the pack/unpack interface and make sure
we use one function for doing both.

Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p-debug.c |   83 +
 hw/virtio-9p.c   |   57 --
 hw/virtio-9p.h   |9 +
 3 files changed, 98 insertions(+), 51 deletions(-)

diff --git a/hw/virtio-9p-debug.c b/hw/virtio-9p-debug.c
index 9230659..ee222db 100644
--- a/hw/virtio-9p-debug.c
+++ b/hw/virtio-9p-debug.c
@@ -29,92 +29,121 @@ static struct iovec *get_sg(V9fsPDU *pdu, int rx)
 return pdu-elem.out_sg;
 }
 
+static int get_sg_count(V9fsPDU *pdu, int rx)
+{
+if (rx)
+return pdu-elem.in_num;
+return pdu-elem.out_num;
+
+}
+
 static void pprint_int8(V9fsPDU *pdu, int rx, size_t *offsetp,
 const char *name)
 {
-struct iovec *sg = get_sg(pdu, rx);
+size_t copied;
+int count = get_sg_count(pdu, rx);
 size_t offset = *offsetp;
+struct iovec *sg = get_sg(pdu, rx);
 int8_t value;
 
-BUG_ON((offset + sizeof(value))  sg[0].iov_len);
+copied = do_pdu_unpack(value, sg, count, offset, sizeof(value));
 
-memcpy(value, sg[0].iov_base + offset, sizeof(value));
+BUG_ON(copied != sizeof(value));
 offset += sizeof(value);
-
 fprintf(llogfile, %s=0x%x, name, value);
-
 *offsetp = offset;
 }
 
 static void pprint_int16(V9fsPDU *pdu, int rx, size_t *offsetp,
 const char *name)
 {
+size_t copied;
+int count = get_sg_count(pdu, rx);
 struct iovec *sg = get_sg(pdu, rx);
 size_t offset = *offsetp;
 int16_t value;
 
-BUG_ON((offset + sizeof(value))  sg[0].iov_len);
 
-memcpy(value, sg[0].iov_base + offset, sizeof(value));
-offset += sizeof(value);
+copied = do_pdu_unpack(value, sg, count, offset, sizeof(value));
 
+BUG_ON(copied != sizeof(value));
+offset += sizeof(value);
 fprintf(llogfile, %s=0x%x, name, value);
-
 *offsetp = offset;
 }
 
 static void pprint_int32(V9fsPDU *pdu, int rx, size_t *offsetp,
 const char *name)
 {
+size_t copied;
+int count = get_sg_count(pdu, rx);
 struct iovec *sg = get_sg(pdu, rx);
 size_t offset = *offsetp;
 int32_t value;
 
-BUG_ON((offset + sizeof(value))  sg[0].iov_len);
 
-memcpy(value, sg[0].iov_base + offset, sizeof(value));
-offset += sizeof(value);
+copied = do_pdu_unpack(value, sg, count, offset, sizeof(value));
 
+BUG_ON(copied != sizeof(value));
+offset += sizeof(value);
 fprintf(llogfile, %s=0x%x, name, value);
-
 *offsetp = offset;
 }
 
 static void pprint_int64(V9fsPDU *pdu, int rx, size_t *offsetp,
 const char *name)
 {
+size_t copied;
+int count = get_sg_count(pdu, rx);
 struct iovec *sg = get_sg(pdu, rx);
 size_t offset = *offsetp;
 int64_t value;
 
-BUG_ON((offset + sizeof(value))  sg[0].iov_len);
 
-memcpy(value, sg[0].iov_base + offset, sizeof(value));
-offset += sizeof(value);
+copied = do_pdu_unpack(value, sg, count, offset, sizeof(value));
 
+BUG_ON(copied != sizeof(value));
+offset += sizeof(value);
 fprintf(llogfile, %s=0x% PRIx64, name, value);
-
 *offsetp = offset;
 }
 
 static void pprint_str(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name)
 {
+int sg_count = get_sg_count(pdu, rx);
 struct iovec *sg = get_sg(pdu, rx);
 size_t offset = *offsetp;
-int16_t size;
+uint16_t tmp_size, size;
 size_t result;
+size_t copied = 0;
+int i = 0;
 
-BUG_ON((offset + 2)  sg[0].iov_len);
-memcpy(size, sg[0].iov_base + offset, 2);
-offset += 2;
+/* get the size */
+copied = do_pdu_unpack(tmp_size, sg, sg_count, offset, sizeof(tmp_size));
+BUG_ON(copied != sizeof(tmp_size));
+size = le16_to_cpupu(tmp_size);
+offset += copied;
 
-BUG_ON((offset + size)  sg[0].iov_len);
 fprintf(llogfile, %s=, name);
-result = fwrite(sg[0].iov_base + offset, 1, size, llogfile);
-BUG_ON(result != size);
-offset += size;
-
-*offsetp = offset;
+for (i = 0; size  i  sg_count; i++) {
+size_t len;
+if (offset = sg[i].iov_len) {
+/* skip this sg */
+offset -= sg[i].iov_len;
+continue;
+} else {
+len = MIN(sg[i].iov_len - offset, size);
+result = fwrite(sg[i].iov_base + offset, 1, len, llogfile);
+BUG_ON(result != len);
+size -= len;
+copied += len;
+if (size) {
+offset = 0;
+continue;
+}
+}
+}
+*offsetp += copied;
 }
 
 static void pprint_qid(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name)
diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index

[Qemu-devel] [PATCH -v2 12/22] virtio-9p: Implement P9_TREMOVE

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

This gets file deletion to work

[mo...@in.ibm.com: Fix truncate to use the relative path]

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Gautham R Shenoy e...@in.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p-local.c |7 ++
 hw/virtio-9p.c   |   54 -
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c
index 829e79a..dca6175 100644
--- a/hw/virtio-9p-local.c
+++ b/hw/virtio-9p-local.c
@@ -245,6 +245,12 @@ static int local_utime(void *opaque, const char *path,
 return utime(rpath(path), buf);
 }
 
+static int local_remove(void *opaque, const char *path)
+{
+return remove(rpath(path));
+}
+
+
 static V9fsPosixFileOperations ops = {
 .lstat = local_lstat,
 .setuid = local_setuid,
@@ -272,6 +278,7 @@ static V9fsPosixFileOperations ops = {
 .rename = local_rename,
 .chown = local_chown,
 .utime = local_utime,
+.remove = local_remove,
 };
 
 V9fsPosixFileOperations *virtio_9p_init_local(const char *path)
diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index c8995a3..4478e57 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -237,6 +237,11 @@ static int posix_utime(V9fsState *s, V9fsString *path,
 return s-ops-utime(s-ops-opaque, path-data, buf);
 }
 
+static int posix_remove(V9fsState *s, V9fsString *path)
+{
+return s-ops-remove(s-ops-opaque, path-data);
+}
+
 static void v9fs_string_init(V9fsString *str)
 {
 str-data = NULL;
@@ -1716,10 +1721,55 @@ static void v9fs_flush(V9fsState *s, V9fsPDU *pdu)
 pprint_pdu(pdu);
 }
 
+typedef struct V9fsRemoveState {
+V9fsPDU *pdu;
+size_t offset;
+int32_t fid;
+V9fsFidState *fidp;
+} V9fsRemoveState;
+
+static void v9fs_remove_post_remove(V9fsState *s, V9fsRemoveState *vs,
+int err)
+{
+if (err) {
+err = -errno;
+goto out;
+}
+
+err = free_fid(s, vs-fid);
+if (err  0)
+goto out;
+
+err = vs-offset;
+out:
+complete_pdu(s, vs-pdu, err);
+qemu_free(vs);
+}
+
 static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
 {
-if (debug_9p_pdu)
-pprint_pdu(pdu);
+V9fsRemoveState *vs;
+int err = 0;
+
+vs = qemu_malloc(sizeof(*vs));
+vs-pdu = pdu;
+vs-offset = 7;
+
+pdu_unmarshal(vs-pdu, vs-offset, d, vs-fid);
+
+vs-fidp = lookup_fid(s, vs-fid);
+if (vs-fidp == NULL) {
+err = -EINVAL;
+goto out;
+}
+
+err = posix_remove(s, vs-fidp-path);
+v9fs_remove_post_remove(s, vs, err);
+return;
+
+out:
+complete_pdu(s, pdu, err);
+qemu_free(vs);
 }
 
 static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
-- 
1.7.0.2.273.gc2413

[Qemu-devel] [PATCH -v2 17/22] Implement sync support in 9p server

2010-03-16 Thread Aneesh Kumar K.V

From: M. Mohan Kumar mo...@in.ibm.com

When wstat is called with stat field values set to 'don't touch' pattern,
9p Server interprets it as a request to guarantee that the contents of
the associated file are committed to stable storage before the Rwstat
message is returned. Implement this feature in the server side.

[jv...@linux.vnet.ibm.com:  Proper error handling and state friendliness]

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p-local.c |6 ++
 hw/virtio-9p.c   |   45 +
 hw/virtio-9p.h   |1 +
 3 files changed, 52 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c
index 4dd6b22..4584bf6 100644
--- a/hw/virtio-9p-local.c
+++ b/hw/virtio-9p-local.c
@@ -249,6 +249,11 @@ static int local_remove(V9fsState *s, const char *path)
 }
 
 
+static int local_fsync(V9fsState *s, int fd)
+{
+return fsync(fd);
+}
+
 static V9fsPosixFileOperations ops = {
 .lstat = local_lstat,
 .setuid = local_setuid,
@@ -277,6 +282,7 @@ static V9fsPosixFileOperations ops = {
 .chown = local_chown,
 .utime = local_utime,
 .remove = local_remove,
+.fsync = local_fsync,
 };
 
 V9fsPosixFileOperations *virtio_9p_init_local(const char *path)
diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index 3ddf2b9..3a5b3f0 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -172,6 +172,11 @@ static int posix_remove(V9fsState *s, V9fsString *path)
 return s-ops-remove(s, path-data);
 }
 
+static int posix_fsync(V9fsState *s, int fd)
+{
+return s-ops-fsync(s, fd);
+}
+
 static void v9fs_string_init(V9fsString *str)
 {
 str-data = NULL;
@@ -1889,6 +1894,39 @@ out:
 qemu_free(vs);
 }
 
+static void v9fs_wstat_post_fsync(V9fsState *s, V9fsWstatState *vs, int err)
+{
+if (err == -1) {
+err = -errno;
+}
+v9fs_stat_free(vs-v9stat);
+complete_pdu(s, vs-pdu, err);
+qemu_free(vs);
+}
+
+static int donttouch_stat(V9fsStat *stat)
+{
+if (stat-type == -1 
+   stat-dev == -1 
+   stat-qid.type == -1 
+   stat-qid.version == -1 
+   stat-qid.path == -1 
+   stat-mode == -1 
+   stat-atime == -1 
+   stat-mtime == -1 
+   stat-length == -1 
+   !stat-name.size 
+   !stat-uid.size 
+   !stat-gid.size 
+   !stat-muid.size 
+   stat-n_uid == -1 
+   stat-n_gid == -1 
+   stat-n_muid == -1)
+   return 1;
+   else
+   return 0;
+}
+
 static void v9fs_wstat(V9fsState *s, V9fsPDU *pdu)
 {
 V9fsWstatState *vs;
@@ -1906,6 +1944,13 @@ static void v9fs_wstat(V9fsState *s, V9fsPDU *pdu)
 goto out;
 }
 
+/* do we need to sync the file? */
+if (donttouch_stat(vs-v9stat)) {
+err = posix_fsync(s, vs-fidp-fd);
+v9fs_wstat_post_fsync(s, vs, err);
+return;
+}
+
 if (vs-v9stat.mode != -1) {
 if (vs-v9stat.mode  P9_STAT_MODE_DIR  vs-fidp-dir == NULL) {
 err = -EIO;
diff --git a/hw/virtio-9p.h b/hw/virtio-9p.h
index 3fc88a4..da0aa64 100644
--- a/hw/virtio-9p.h
+++ b/hw/virtio-9p.h
@@ -182,6 +182,7 @@ typedef struct V9fsPosixFileOpertions
 int (*fstat)(V9fsState *, int, struct stat *);
 int (*rename)(V9fsState *, const char *, const char *);
 int (*truncate)(V9fsState *, const char *, off_t);
+int (*fsync)(V9fsState *, int);
 void *opaque;
 } V9fsPosixFileOperations;
 
-- 
1.7.0.2.273.gc2413

[Qemu-devel] [PATCH -v2 21/22] virtio-9p: Remove unnecessary definition of fid

2010-03-16 Thread Aneesh Kumar K.V

We already have fid as a part of V9fsFidState so use that instead
of defining another variable

Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
Reviewed-by: Venkateswararao Jujjuri jv...@linux.vnet.ibm.com
---
 hw/virtio-9p.c |   62 ++-
 1 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index 3ce26ca..c8ab6b6 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -770,7 +770,6 @@ out:
 typedef struct V9fsStatState {
 V9fsPDU *pdu;
 size_t offset;
-int32_t fid;
 V9fsStat v9stat;
 V9fsFidState *fidp;
 struct stat stbuf;
@@ -798,6 +797,7 @@ out:
 
 static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
 {
+int32_t fid;
 V9fsStatState *vs;
 ssize_t err = 0;
 
@@ -807,9 +807,9 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
 
 memset(vs-v9stat, 0, sizeof(vs-v9stat));
 
-pdu_unmarshal(vs-pdu, vs-offset, d, vs-fid);
+pdu_unmarshal(vs-pdu, vs-offset, d, fid);
 
-vs-fidp = lookup_fid(s, vs-fid);
+vs-fidp = lookup_fid(s, fid);
 if (vs-fidp == NULL) {
err = -ENOENT;
 goto out;
@@ -828,8 +828,6 @@ out:
 typedef struct V9fsWalkState {
 V9fsPDU *pdu;
 size_t offset;
-int32_t fid;
-int32_t newfid;
 int16_t nwnames;
 int name_idx;
 V9fsQID *qids;
@@ -867,7 +865,7 @@ static void v9fs_walk_post_newfid_lstat(V9fsState *s, 
V9fsWalkState *vs,
 int err)
 {
 if (err == -1) {
-free_fid(s, vs-newfid);
+free_fid(s, vs-newfidp-fid);
 v9fs_string_free(vs-path);
 err = -ENOENT;
 goto out;
@@ -924,6 +922,7 @@ out:
 
 static void v9fs_walk(V9fsState *s, V9fsPDU *pdu)
 {
+int32_t fid, newfid;
 V9fsWalkState *vs;
 int err = 0;
 int i;
@@ -934,8 +933,8 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu)
 vs-qids = NULL;
 vs-offset = 7;
 
-vs-offset += pdu_unmarshal(vs-pdu, vs-offset, ddw, vs-fid,
-vs-newfid, vs-nwnames);
+vs-offset += pdu_unmarshal(vs-pdu, vs-offset, ddw, fid,
+newfid, vs-nwnames);
 
 if(vs-nwnames) {
 vs-wnames = qemu_mallocz(sizeof(vs-wnames[0]) * vs-nwnames);
@@ -948,14 +947,14 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu)
 }
 }
 
-vs-fidp = lookup_fid(s, vs-fid);
+vs-fidp = lookup_fid(s, fid);
 if (vs-fidp == NULL) {
 err = -ENOENT;
 goto out;
 }
 
 /* FIXME: is this really valid? */
-if (vs-fid == vs-newfid) {
+if (fid == newfid) {
 v9fs_string_init(vs-path);
 vs-name_idx = 0;
 
@@ -969,7 +968,7 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu)
 return;
 }
 } else {
-vs-newfidp = alloc_fid(s, vs-newfid);
+vs-newfidp = alloc_fid(s, newfid);
 if (vs-newfidp == NULL) {
 err = -EINVAL;
 goto out;
@@ -1000,7 +999,6 @@ out:
 typedef struct V9fsOpenState {
 V9fsPDU *pdu;
 size_t offset;
-int32_t fid;
 int8_t mode;
 V9fsFidState *fidp;
 V9fsQID qid;
@@ -1105,7 +1103,7 @@ out:
 
 static void v9fs_open(V9fsState *s, V9fsPDU *pdu)
 {
-
+int32_t fid;
 V9fsOpenState *vs;
 ssize_t err = 0;
 
@@ -1114,9 +1112,9 @@ static void v9fs_open(V9fsState *s, V9fsPDU *pdu)
 vs-pdu = pdu;
 vs-offset = 7;
 
-pdu_unmarshal(vs-pdu, vs-offset, db, vs-fid, vs-mode);
+pdu_unmarshal(vs-pdu, vs-offset, db, fid, vs-mode);
 
-vs-fidp = lookup_fid(s, vs-fid);
+vs-fidp = lookup_fid(s, fid);
 if (vs-fidp == NULL) {
 err = -ENOENT;
 goto out;
@@ -1183,7 +1181,6 @@ static void print_sg(struct iovec *sg, int cnt)
 typedef struct V9fsReadState {
 V9fsPDU *pdu;
 size_t offset;
-int32_t fid;
 int32_t count;
 int32_t total;
 int64_t off;
@@ -1346,6 +1343,7 @@ out:
 
 static void v9fs_read(V9fsState *s, V9fsPDU *pdu)
 {
+int32_t fid;
 V9fsReadState *vs;
 ssize_t err = 0;
 
@@ -1356,9 +1354,9 @@ static void v9fs_read(V9fsState *s, V9fsPDU *pdu)
 vs-len = 0;
 vs-count = 0;
 
-pdu_unmarshal(vs-pdu, vs-offset, dqd, vs-fid, vs-off, vs-count);
+pdu_unmarshal(vs-pdu, vs-offset, dqd, fid, vs-off, vs-count);
 
-vs-fidp = lookup_fid(s, vs-fid);
+vs-fidp = lookup_fid(s, fid);
 if (vs-fidp == NULL) {
 err = -EINVAL;
 goto out;
@@ -1407,7 +1405,6 @@ out:
 typedef struct V9fsWriteState {
 V9fsPDU *pdu;
 size_t offset;
-int32_t fid;
 int32_t len;
 int32_t count;
 int32_t total;
@@ -1476,6 +1473,7 @@ out:
 
 static void v9fs_write(V9fsState *s, V9fsPDU *pdu)
 {
+int32_t fid;
 V9fsWriteState *vs;
 ssize_t err;
 
@@ -1487,10 +1485,10 @@ static void v9fs_write(V9fsState *s, V9fsPDU *pdu)
 vs-total = 0;
 vs-len = 0;
 
-pdu_unmarshal(vs-pdu, vs-offset, dqdv, vs-fid, vs-off,

[Qemu-devel] [PATCH -v2 22/22] virtio-9p: Update existing fid path on rename

2010-03-16 Thread Aneesh Kumar K.V

We need to make sure that we update the path component of the existing
fid's when we rename a file. The client is not expected to clunk these fids
pointing to the old name. If we don't update any operation on the old unopened
fid will point to the old name and will fail

Add BUG_ON to make sure when we clone a fid, we don't have open descriptor
attached to the fid. We also need to make sure that when we open a fid, the
specified fid should not already be opened. Capture the case by adding a BUG_ON

Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p.c |   39 +++
 1 files changed, 39 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index c8ab6b6..9aa4b72 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -955,6 +955,9 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu)
 
 /* FIXME: is this really valid? */
 if (fid == newfid) {
+
+BUG_ON(vs-fidp-fd != -1);
+BUG_ON(vs-fidp-dir);
 v9fs_string_init(vs-path);
 vs-name_idx = 0;
 
@@ -1120,6 +1123,9 @@ static void v9fs_open(V9fsState *s, V9fsPDU *pdu)
 goto out;
 }
 
+BUG_ON(vs-fidp-fd != -1);
+BUG_ON(vs-fidp-dir);
+
 err = posix_lstat(s, vs-fidp-path, vs-stbuf);
 
 v9fs_open_post_lstat(s, vs, err);
@@ -1877,8 +1883,19 @@ out:
 qemu_free(vs);
 }
 
+static void v9fs_fix_path(V9fsString *dst, V9fsString *src, int len)
+{
+V9fsString str;
+v9fs_string_init(str);
+v9fs_string_copy(str, dst);
+v9fs_string_sprintf(dst, %s%s, src-data, str.data+len);
+v9fs_string_free(str);
+}
+
+
 static void v9fs_wstat_post_chown(V9fsState *s, V9fsWstatState *vs, int err)
 {
+V9fsFidState *fidp;
 if (err  0) {
 goto out;
 }
@@ -1905,6 +1922,28 @@ static void v9fs_wstat_post_chown(V9fsState *s, 
V9fsWstatState *vs, int err)
 if (strcmp(new_name, vs-fidp-path.data) != 0) {
 if (posix_rename(s, vs-fidp-path, vs-nname)) {
 err = -errno;
+} else {
+/*
+ * Fixup fid's pointing to the old name to
+ * start pointing to the new name
+ */
+for (fidp = s-fid_list; fidp; fidp = fidp-next) {
+
+if (vs-fidp == fidp) {
+/*
+ * we replace name of this fid towards the end
+ * so that our below strcmp will work
+ */
+continue;
+}
+if (!strncmp(vs-fidp-path.data, fidp-path.data,
+ strlen(vs-fidp-path.data))) {
+/* replace the name */
+v9fs_fix_path(fidp-path, vs-nname,
+  strlen(vs-fidp-path.data));
+}
+}
+v9fs_string_copy(vs-fidp-path, vs-nname);
 }
 }
 }
-- 
1.7.0.2.273.gc2413

[Qemu-devel] [PATCH -v2 15/22] virtio-9p: Use little endian format on virtio

2010-03-16 Thread Aneesh Kumar K.V

We need to use platform independent data format as
part of protocol data. 9P uses little endian format
on wire

Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p.c |   34 +++---
 1 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index e095916..e8a9eeb 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -426,23 +426,32 @@ static size_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, 
const char *fmt, ...)
 for (i = 0; fmt[i]; i++) {
switch (fmt[i]) {
case 'b': {
-   int8_t *valp = va_arg(ap, int8_t *);
+   uint8_t *valp = va_arg(ap, uint8_t *);
offset += pdu_unpack(valp, pdu, offset, sizeof(*valp));
break;
}
case 'w': {
-   int16_t *valp = va_arg(ap, int16_t *);
-   offset += pdu_unpack(valp, pdu, offset, sizeof(*valp));
+   uint16_t val, *valp;
+   valp = va_arg(ap, uint16_t *);
+   val = le16_to_cpupu(valp);
+   offset += pdu_unpack(val, pdu, offset, sizeof(val));
+   *valp = val;
break;
}
case 'd': {
-   int32_t *valp = va_arg(ap, int32_t *);
-   offset += pdu_unpack(valp, pdu, offset, sizeof(*valp));
+   uint32_t val, *valp;
+   valp = va_arg(ap, uint32_t *);
+   val = le32_to_cpupu(valp);
+   offset += pdu_unpack(val, pdu, offset, sizeof(val));
+   *valp = val;
break;
}
case 'q': {
-   int64_t *valp = va_arg(ap, int64_t *);
-   offset += pdu_unpack(valp, pdu, offset, sizeof(*valp));
+   uint64_t val, *valp;
+   valp = va_arg(ap, uint64_t *);
+   val = le64_to_cpup(valp);
+   offset += pdu_unpack(val, pdu, offset, sizeof(val));
+   *valp = val;
break;
}
case 'v': {
@@ -498,22 +507,25 @@ static size_t pdu_marshal(V9fsPDU *pdu, size_t offset, 
const char *fmt, ...)
 for (i = 0; fmt[i]; i++) {
switch (fmt[i]) {
case 'b': {
-   int8_t val = va_arg(ap, int);
+   uint8_t val = va_arg(ap, int);
offset += pdu_pack(pdu, offset, val, sizeof(val));
break;
}
case 'w': {
-   int16_t val = va_arg(ap, int);
+   uint16_t val;
+   cpu_to_le16w(val, va_arg(ap, int));
offset += pdu_pack(pdu, offset, val, sizeof(val));
break;
}
case 'd': {
-   int32_t val = va_arg(ap, int);
+   uint32_t val;
+   cpu_to_le32w(val, va_arg(ap, uint32_t));
offset += pdu_pack(pdu, offset, val, sizeof(val));
break;
}
case 'q': {
-   int64_t val = va_arg(ap, int64_t);
+   uint64_t val;
+   cpu_to_le64w(val, va_arg(ap, uint64_t));
offset += pdu_pack(pdu, offset, val, sizeof(val));
break;
}
-- 
1.7.0.2.273.gc2413

[Qemu-devel] [PATCH -v2 19/22] virtio-9p: Get the correct count values from the pdu

2010-03-16 Thread Aneesh Kumar K.V

From: Venkateswararao Jujjuri jv...@linux.vnet.ibm.com

PDU contain little endian format for integer values. So
we need to make sure we map them to host format. Also the count
value can be in another sg offset other than 0. Use the righ
functions to get the count value

Signed-off-by: Venkateswararao Jujjuri jv...@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p-debug.c |   29 +++--
 1 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/hw/virtio-9p-debug.c b/hw/virtio-9p-debug.c
index ee222db..e8ede8e 100644
--- a/hw/virtio-9p-debug.c
+++ b/hw/virtio-9p-debug.c
@@ -181,20 +181,25 @@ static void pprint_stat(V9fsPDU *pdu, int rx, size_t 
*offsetp, const char *name)
 
 static void pprint_strs(V9fsPDU *pdu, int rx, size_t *offsetp, const char 
*name)
 {
+int sg_count = get_sg_count(pdu, rx);
 struct iovec *sg = get_sg(pdu, rx);
 size_t offset = *offsetp;
-int16_t count, i;
+uint16_t tmp_count, count, i;
+size_t copied = 0;
 
 fprintf(llogfile, %s={, name);
 
-BUG_ON((offset + 2)  sg[0].iov_len);
-memcpy(count, sg[0].iov_base + offset, 2);
-offset += 2;
+/* Get the count */
+copied = do_pdu_unpack(tmp_count, sg, sg_count, offset, 
sizeof(tmp_count));
+BUG_ON(copied != sizeof(tmp_count));
+count = le16_to_cpupu(tmp_count);
+offset += copied;
 
 for (i = 0; i  count; i++) {
 char str[512];
-if (i)
+if (i) {
 fprintf(llogfile, , );
+}
 snprintf(str, sizeof(str), [%d], i);
 pprint_str(pdu, rx, offset, str);
 }
@@ -206,20 +211,24 @@ static void pprint_strs(V9fsPDU *pdu, int rx, size_t 
*offsetp, const char *name)
 
 static void pprint_qids(V9fsPDU *pdu, int rx, size_t *offsetp, const char 
*name)
 {
+int sg_count = get_sg_count(pdu, rx);
 struct iovec *sg = get_sg(pdu, rx);
 size_t offset = *offsetp;
-int16_t count, i;
+uint16_t tmp_count, count, i;
+size_t copied = 0;
 
 fprintf(llogfile, %s={, name);
 
-BUG_ON((offset + 2)  sg[0].iov_len);
-memcpy(count, sg[0].iov_base + offset, 2);
-offset += 2;
+copied = do_pdu_unpack(tmp_count, sg, sg_count, offset, 
sizeof(tmp_count));
+BUG_ON(copied != sizeof(tmp_count));
+count = le16_to_cpupu(tmp_count);
+offset += copied;
 
 for (i = 0; i  count; i++) {
 char str[512];
-if (i)
+if (i) {
 fprintf(llogfile, , );
+}
 snprintf(str, sizeof(str), [%d], i);
 pprint_qid(pdu, rx, offset, str);
 }
-- 
1.7.0.2.273.gc2413

[Qemu-devel] [PATCH -v2 20/22] virtio-9p: Remove BUG_ON and add proper error handling

2010-03-16 Thread Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p.c |  106 
 1 files changed, 84 insertions(+), 22 deletions(-)

diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index 1237bac..3ce26ca 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -244,7 +244,6 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
 return NULL;
 
 f = qemu_mallocz(sizeof(V9fsFidState));
-BUG_ON(f == NULL);
 
 f-fid = fid;
 f-fd = -1;
@@ -320,15 +319,18 @@ static void stat_to_qid(const struct stat *stbuf, V9fsQID 
*qidp)
 qidp-type |= P9_QID_TYPE_SYMLINK;
 }
 
-static void fid_to_qid(V9fsState *s, V9fsFidState *fidp, V9fsQID *qidp)
+static int fid_to_qid(V9fsState *s, V9fsFidState *fidp, V9fsQID *qidp)
 {
 struct stat stbuf;
 int err;
 
 err = posix_lstat(s, fidp-path, stbuf);
-BUG_ON(err == -1);
+if (err) {
+return err;
+}
 
 stat_to_qid(stbuf, qidp);
+return 0;
 }
 
 static V9fsPDU *alloc_pdu(V9fsState *s)
@@ -653,7 +655,7 @@ static uint32_t stat_to_v9mode(const struct stat *stbuf)
 return mode;
 }
 
-static void stat_to_v9stat(V9fsState *s, V9fsString *name,
+static int stat_to_v9stat(V9fsState *s, V9fsString *name,
 const struct stat *stbuf,
 V9fsStat *v9stat)
 {
@@ -681,7 +683,10 @@ static void stat_to_v9stat(V9fsState *s, V9fsString *name,
 
 if (v9stat-mode  P9_STAT_MODE_SYMLINK) {
 err = posix_readlink(s, name, v9stat-extension);
-BUG_ON(err == -1);
+if (err == -1) {
+err = -errno;
+return err;
+}
 v9stat-extension.data[err] = 0;
 v9stat-extension.size = err;
 } else if (v9stat-mode  P9_STAT_MODE_DEVICE) {
@@ -708,6 +713,7 @@ static void stat_to_v9stat(V9fsState *s, V9fsString *name,
 v9fs_string_size(v9stat-gid) +
 v9fs_string_size(v9stat-muid) +
 v9fs_string_size(v9stat-extension);
+return 0;
 }
 
 static void v9fs_version(V9fsState *s, V9fsPDU *pdu)
@@ -745,7 +751,12 @@ static void v9fs_attach(V9fsState *s, V9fsPDU *pdu)
 fidp-uid = n_uname;
 
 v9fs_string_sprintf(fidp-path, %s, /);
-fid_to_qid(s, fidp, qid);
+err = fid_to_qid(s, fidp, qid);
+if (err) {
+err = -EINVAL;
+free_fid(s, fid);
+goto out;
+}
 
 offset += pdu_marshal(pdu, offset, Q, qid);
 
@@ -772,7 +783,10 @@ static void v9fs_stat_post_lstat(V9fsState *s, 
V9fsStatState *vs, int err)
 goto out;
 }
 
-stat_to_v9stat(s, vs-fidp-path, vs-stbuf, vs-v9stat);
+err = stat_to_v9stat(s, vs-fidp-path, vs-stbuf, vs-v9stat);
+if (err) {
+goto out;
+}
 vs-offset += pdu_marshal(vs-pdu, vs-offset, wS, 0, vs-v9stat);
 err = vs-offset;
 
@@ -925,10 +939,8 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu)
 
 if(vs-nwnames) {
 vs-wnames = qemu_mallocz(sizeof(vs-wnames[0]) * vs-nwnames);
-BUG_ON(vs-wnames == NULL);
 
 vs-qids = qemu_mallocz(sizeof(vs-qids[0]) * vs-nwnames);
-BUG_ON(vs-qids == NULL);
 
 for (i = 0; i  vs-nwnames; i++) {
 vs-offset += pdu_unmarshal(vs-pdu, vs-offset, s,
@@ -1070,7 +1082,10 @@ out:
 
 static void v9fs_open_post_lstat(V9fsState *s, V9fsOpenState *vs, int err)
 {
-BUG_ON(err == -1);
+if (err) {
+err = -errno;
+goto out;
+}
 
 stat_to_qid(vs-stbuf, vs-qid);
 
@@ -1082,7 +1097,10 @@ static void v9fs_open_post_lstat(V9fsState *s, 
V9fsOpenState *vs, int err)
 omode_to_uflags(vs-mode));
 v9fs_open_post_open(s, vs, err);
 }
-
+return;
+out:
+complete_pdu(s, vs-pdu, err);
+qemu_free(vs);
 }
 
 static void v9fs_open(V9fsState *s, V9fsPDU *pdu)
@@ -1186,11 +1204,15 @@ static void v9fs_read_post_readdir(V9fsState *, 
V9fsReadState *, ssize_t );
 
 static void v9fs_read_post_seekdir(V9fsState *s, V9fsReadState *vs, ssize_t 
err)
 {
+if (err) {
+goto out;
+}
 v9fs_stat_free(vs-v9stat);
 v9fs_string_free(vs-name);
 vs-offset += pdu_marshal(vs-pdu, vs-offset, d, vs-count);
 vs-offset += vs-count;
 err = vs-offset;
+out:
 complete_pdu(s, vs-pdu, err);
 qemu_free(vs);
 return;
@@ -1199,8 +1221,14 @@ static void v9fs_read_post_seekdir(V9fsState *s, 
V9fsReadState *vs, ssize_t err)
 static void v9fs_read_post_dir_lstat(V9fsState *s, V9fsReadState *vs,
 ssize_t err)
 {
-BUG_ON(err == -1);
-stat_to_v9stat(s, vs-name, vs-stbuf, vs-v9stat);
+if (err) {
+   err = -errno;
+   goto out;
+}
+err = stat_to_v9stat(s, vs-name, vs-stbuf, vs-v9stat);
+if (err) {
+goto out;
+}
 
 vs-len = pdu_marshal(vs-pdu, vs-offset + 4 + vs-count, S,
 vs-v9stat);
@@ -1217,6 +1245,11 @@ static void

[Qemu-devel] [PATCH -v2 13/22] virtio-9p: Implement P9_TFLUSH

2010-03-16 Thread Aneesh Kumar K.V

From: Anthony Liguori aligu...@us.ibm.com

Don't do anything special for flush

Signed-off-by: Anthony Liguori aligu...@us.ibm.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 hw/virtio-9p.c |5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c
index 4478e57..1dbb982 100644
--- a/hw/virtio-9p.c
+++ b/hw/virtio-9p.c
@@ -1717,10 +1717,11 @@ out:
 
 static void v9fs_flush(V9fsState *s, V9fsPDU *pdu)
 {
-if (debug_9p_pdu)
-pprint_pdu(pdu);
+/* A nop call with no return */
+complete_pdu(s, pdu, 7);
 }
 
+
 typedef struct V9fsRemoveState {
 V9fsPDU *pdu;
 size_t offset;
-- 
1.7.0.2.273.gc2413

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Juan Quintela

Chris Wright chr...@redhat.com wrote:
 Please send in any agenda items you are interested in covering.

Migration:
- flexible migration:  I hope to sent an RFC patch on time for the
  call.  idea is to use subsections.

- callbacks.  block migration introduced several callbacks:
  * cancel()
  * get_status()
  * release()
  in spice we need now another to callbacks: on_start() and on_end().
   * on_start(): tells spice that migration has started (it will then
 manage certificates, passwords, ... itself)
   * on_end(): it is called when migration ends.  spice use it to
 transparently connect to the new host and user don't have to reconnect

- what to do on migration error:
  - target side:  libvirt folks want the program to print a message if
it fails.  Current code spent 100% cpu time doing select on a closed
fd.  (patches already on the list to make it wait without using
cpu).
  - source side: current behaviour if migration fails is to stop the
vm.  We have requests to make it continue (remember that this is
live migration).  what to do?  adding a paramenter like the block
layer:
  migration_error=[stop|continue]
any better ideas.

- block migration: it added the set_params() callback, investigating
  why.  I think that it can be done with inside block_save_live(), but I
  haven't investigated it fully yet.

Later, Juan.



 thanks,
 -chris

Re: [Qemu-devel] [PATCH] block: add logical_block_size property

2010-03-16 Thread Christoph Hellwig

ping?

On Thu, Mar 04, 2010 at 02:20:17PM +0100, Christoph Hellwig wrote:
 
 Add a logical block size attribute as various guest side tools only
 increase the filesystem sector size based on it, not the advisory
 physical block size.
 
 For scsi we already have support for a different logical block size
 in place for CDROMs that we can built upon.  Only my recent block
 device characteristics VPD page needs some fixups.  Note that we
 leave the logial block size for CDROMs hardcoded as the 2k value
 is expected for it in general.
 
 For virtio-blk we already have a feature flag claiming to support
 a variable logical block size that was added for the s390 kuli
 hypervisor.  Interestingly it does not actually change the units
 in which the protocol works, which is still fixed at 512 bytes,
 but only communicates a different minimum I/O granularity.  So
 all we need to do in virtio is to add a trap for unaligned I/O
 and round down the device size to the next multiple of the logical
 block size.
 
 IDE does not support any other logical block size than 512 bytes.
 
 Signed-off-by: Christoph Hellwig h...@lst.de
 
 Index: qemu/block_int.h
 ===
 --- qemu.orig/block_int.h 2010-03-03 19:16:13.408253228 +0100
 +++ qemu/block_int.h  2010-03-03 19:16:43.030003751 +0100
 @@ -209,6 +209,7 @@ struct DriveInfo;
  typedef struct BlockConf {
  struct DriveInfo *dinfo;
  uint16_t physical_block_size;
 +uint16_t logical_block_size;
  uint16_t min_io_size;
  uint32_t opt_io_size;
  } BlockConf;
 @@ -226,6 +227,8 @@ static inline unsigned int get_physical_
  
  #define DEFINE_BLOCK_PROPERTIES(_state, _conf)  \
  DEFINE_PROP_DRIVE(drive, _state, _conf.dinfo),\
 +DEFINE_PROP_UINT16(logical_block_size, _state,\
 +   _conf.logical_block_size, 512),  \
  DEFINE_PROP_UINT16(physical_block_size, _state,   \
 _conf.physical_block_size, 512), \
  DEFINE_PROP_UINT16(min_io_size, _state, _conf.min_io_size, 512),  \
 Index: qemu/hw/scsi-disk.c
 ===
 --- qemu.orig/hw/scsi-disk.c  2010-03-03 19:16:13.419254346 +0100
 +++ qemu/hw/scsi-disk.c   2010-03-03 19:16:43.031004240 +0100
 @@ -397,8 +397,10 @@ static int scsi_disk_emulate_inquiry(SCS
  }
  case 0xb0: /* block device characteristics */
  {
 -unsigned int min_io_size = s-qdev.conf.min_io_size  9;
 -unsigned int opt_io_size = s-qdev.conf.opt_io_size  9;
 +unsigned int min_io_size =
 +s-qdev.conf.min_io_size / s-qdev.blocksize;
 +unsigned int opt_io_size =
 +s-qdev.conf.opt_io_size / s-qdev.blocksize;
  
  /* required VPD size with unmap support */
  outbuf[3] = buflen = 0x3c;
 @@ -1028,11 +1030,12 @@ static int scsi_disk_initfn(SCSIDevice *
  s-bs = s-qdev.conf.dinfo-bdrv;
  
  if (bdrv_get_type_hint(s-bs) == BDRV_TYPE_CDROM) {
 -s-cluster_size = 4;
 +s-qdev.blocksize = 2048;
  } else {
 -s-cluster_size = 1;
 +s-qdev.blocksize = s-qdev.conf.logical_block_size;
  }
 -s-qdev.blocksize = 512 * s-cluster_size;
 +s-cluster_size = s-qdev.blocksize / 512;
 +
  s-qdev.type = TYPE_DISK;
  bdrv_get_geometry(s-bs, nb_sectors);
  nb_sectors /= s-cluster_size;
 Index: qemu/hw/virtio-blk.c
 ===
 --- qemu.orig/hw/virtio-blk.c 2010-03-03 19:16:13.426273971 +0100
 +++ qemu/hw/virtio-blk.c  2010-03-03 19:35:16.636028605 +0100
 @@ -27,6 +27,7 @@ typedef struct VirtIOBlock
  void *rq;
  QEMUBH *bh;
  BlockConf *conf;
 +unsigned short sector_mask;
  } VirtIOBlock;
  
  static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
 @@ -250,6 +251,11 @@ static void virtio_blk_handle_flush(Virt
  static void virtio_blk_handle_write(BlockRequest *blkreq, int *num_writes,
  VirtIOBlockReq *req, BlockDriverState **old_bs)
  {
 +if (req-out-sector  req-dev-sector_mask) {
 +virtio_blk_rw_complete(req, -EIO);
 +return;
 +}
 +
  if (req-dev-bs != *old_bs || *num_writes == 32) {
  if (*old_bs != NULL) {
  do_multiwrite(*old_bs, blkreq, *num_writes);
 @@ -272,6 +278,11 @@ static void virtio_blk_handle_read(VirtI
  {
  BlockDriverAIOCB *acb;
  
 +if (req-out-sector  req-dev-sector_mask) {
 +virtio_blk_rw_complete(req, -EIO);
 +return;
 +}
 +
  acb = bdrv_aio_readv(req-dev-bs, req-out-sector, req-qiov,
   req-qiov.size / 512, virtio_blk_rw_complete, req);
  if (!acb) {
 @@ -404,12 +415,13 @@ static void virtio_blk_update_config(Vir
  stl_raw(blkcfg.seg_max, 128 - 2);
  stw_raw(blkcfg.cylinders, cylinders);

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Daniel P. Berrange

On Tue, Mar 16, 2010 at 10:18:03AM +0100, Juan Quintela wrote:
 Chris Wright chr...@redhat.com wrote:
  Please send in any agenda items you are interested in covering.
 
 Migration:
 - flexible migration:  I hope to sent an RFC patch on time for the
   call.  idea is to use subsections.
 
 - callbacks.  block migration introduced several callbacks:
   * cancel()
   * get_status()
   * release()
   in spice we need now another to callbacks: on_start() and on_end().
* on_start(): tells spice that migration has started (it will then
  manage certificates, passwords, ... itself)
* on_end(): it is called when migration ends.  spice use it to
  transparently connect to the new host and user don't have to reconnect
 
 - what to do on migration error:
   - target side:  libvirt folks want the program to print a message if
 it fails.  Current code spent 100% cpu time doing select on a closed
 fd.  (patches already on the list to make it wait without using
 cpu).

No, that is not correct. We want QEMU to exit when incoming migration
fails. Printing to stderr is just something that will end up in the
logs for admin to further diagnose the problem if required. There is 
nothing to be gained by leaving QEMU running, and everything to loose
since the failed migration may have left it in a dangerous state from 
which you do not want to attempt incoming migration again.

If we really want to leave it running when migration fails, then we're
going to have to add yet another QMP event to inform libvirt when
migration has finished/failed, and/or make 'query_migrate' work on
the destination too.

   - source side: current behaviour if migration fails is to stop the
 vm.  We have requests to make it continue (remember that this is
 live migration).  what to do?  adding a paramenter like the block
 layer:
   migration_error=[stop|continue]
 any better ideas.

A parameter to the 'migrate' monitor command would be the logical
place if we needed this configurable.

Incidentally I have a feeling we might need to introduce a migration
event in QMP. Currently libvirt polls on the 'query_migrate' command
to get the ongoing migration status. This means there can be a delay
in detecting completion as long as the polling interval - for this
reason we just dropped libvirt's polling time from 1/2 sec to 50ms
to ensure prompt detection.

Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

[Qemu-devel] Re: [PATCH 1/7] Add support for generic notifier lists (v2)

2010-03-16 Thread Avi Kivity


On 03/15/2010 10:34 PM, Anthony Liguori wrote:

Notifiers are data-less callbacks and a notifier list is a list of registered
notifiers that all are interested in a particular event.

We'll use this in a few patches to implement mouse change notification.
   


Looks nicer  lighter!

--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Avi Kivity


On 03/16/2010 11:29 AM, Daniel P. Berrange wrote:

On Tue, Mar 16, 2010 at 10:18:03AM +0100, Juan Quintela wrote:
   

Chris Wrightchr...@redhat.com  wrote:
 

Please send in any agenda items you are interested in covering.
   

Migration:
- flexible migration:  I hope to sent an RFC patch on time for the
   call.  idea is to use subsections.

- callbacks.  block migration introduced several callbacks:
   * cancel()
   * get_status()
   * release()
   in spice we need now another to callbacks: on_start() and on_end().
* on_start(): tells spice that migration has started (it will then
  manage certificates, passwords, ... itself)
* on_end(): it is called when migration ends.  spice use it to
  transparently connect to the new host and user don't have to reconnect

- what to do on migration error:
   - target side:  libvirt folks want the program to print a message if
 it fails.  Current code spent 100% cpu time doing select on a closed
 fd.  (patches already on the list to make it wait without using
 cpu).
 

No, that is not correct. We want QEMU to exit when incoming migration
fails. Printing to stderr is just something that will end up in the
logs for admin to further diagnose the problem if required. There is
nothing to be gained by leaving QEMU running, and everything to loose
since the failed migration may have left it in a dangerous state from
which you do not want to attempt incoming migration again.

If we really want to leave it running when migration fails, then we're
going to have to add yet another QMP event to inform libvirt when
migration has finished/failed, and/or make 'query_migrate' work on
the destination too.
   


A qmp event seems the logical thing to do?  Exiting can happen for many 
reasons, a qmp event is unambiguous.



   - source side: current behaviour if migration fails is to stop the
 vm.  We have requests to make it continue (remember that this is
 live migration).  what to do?  adding a paramenter like the block
 layer:
   migration_error=[stop|continue]
 any better ideas.
 

A parameter to the 'migrate' monitor command would be the logical
place if we needed this configurable.

Incidentally I have a feeling we might need to introduce a migration
event in QMP. Currently libvirt polls on the 'query_migrate' command
to get the ongoing migration status. This means there can be a delay
in detecting completion as long as the polling interval - for this
reason we just dropped libvirt's polling time from 1/2 sec to 50ms
to ensure prompt detection.
   


Whenever you implement a polling loop, can you send an event to qemu-de...@?

Polling loops are an indication that something is wrong.

--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Daniel P. Berrange

On Tue, Mar 16, 2010 at 09:29:44AM +, Daniel P. Berrange wrote:
 On Tue, Mar 16, 2010 at 10:18:03AM +0100, Juan Quintela wrote:
  Chris Wright chr...@redhat.com wrote:
   Please send in any agenda items you are interested in covering.
  
  Migration:
  - flexible migration:  I hope to sent an RFC patch on time for the
call.  idea is to use subsections.
  
  - callbacks.  block migration introduced several callbacks:
* cancel()
* get_status()
* release()
in spice we need now another to callbacks: on_start() and on_end().
 * on_start(): tells spice that migration has started (it will then
   manage certificates, passwords, ... itself)
 * on_end(): it is called when migration ends.  spice use it to
   transparently connect to the new host and user don't have to 
  reconnect
  
  - what to do on migration error:
- target side:  libvirt folks want the program to print a message if
  it fails.  Current code spent 100% cpu time doing select on a closed
  fd.  (patches already on the list to make it wait without using
  cpu).
 
 No, that is not correct. We want QEMU to exit when incoming migration
 fails. Printing to stderr is just something that will end up in the
 logs for admin to further diagnose the problem if required. There is 
 nothing to be gained by leaving QEMU running, and everything to loose
 since the failed migration may have left it in a dangerous state from 
 which you do not want to attempt incoming migration again.

Sorry, I forgot to include the original BZ report about this problem from
Fedora. In essence, we just truncated the original save state image and
then tried to restore from it to check handling in the event of corrupted
save image.

  https://bugzilla.redhat.com/show_bug.cgi?id=518032


Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Daniel P. Berrange

On Tue, Mar 16, 2010 at 12:38:02PM +0200, Avi Kivity wrote:
 On 03/16/2010 12:31 PM, Daniel P. Berrange wrote:
 Polling loops are an indication that something is wrong.
  
 Except when people suggest they are the right answer, qcow high
 watermark ;-P

 
 I liked Anthony's suggestion of an lvm2 block format driver.  No polling.

Doesn't that require giving QEMU privileges to perform LVM operations which
implies QEMU having CAP_SYS_ADMIN  ?


Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Christoph Hellwig

On Tue, Mar 16, 2010 at 12:38:02PM +0200, Avi Kivity wrote:
 On 03/16/2010 12:31 PM, Daniel P. Berrange wrote:
 Polling loops are an indication that something is wrong.
  
 Except when people suggest they are the right answer, qcow high
 watermark ;-P


 I liked Anthony's suggestion of an lvm2 block format driver.  No polling.

I have done some work on linking the new lvm library to qemu to control
snapshotting.  But introducing a whole new block format seems like a lot
of duplication to me.

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Avi Kivity


On 03/16/2010 12:31 PM, Daniel P. Berrange wrote:

Polling loops are an indication that something is wrong.
 

Except when people suggest they are the right answer, qcow high
watermark ;-P
   


I liked Anthony's suggestion of an lvm2 block format driver.  No polling.

--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Daniel P. Berrange

On Tue, Mar 16, 2010 at 11:43:48AM +0200, Avi Kivity wrote:
 On 03/16/2010 11:29 AM, Daniel P. Berrange wrote:
 On Tue, Mar 16, 2010 at 10:18:03AM +0100, Juan Quintela wrote:

 Chris Wrightchr...@redhat.com  wrote:
  
 Please send in any agenda items you are interested in covering.

 Migration:
 - flexible migration:  I hope to sent an RFC patch on time for the
call.  idea is to use subsections.
 
 - callbacks.  block migration introduced several callbacks:
* cancel()
* get_status()
* release()
in spice we need now another to callbacks: on_start() and on_end().
 * on_start(): tells spice that migration has started (it will then
   manage certificates, passwords, ... itself)
 * on_end(): it is called when migration ends.  spice use it to
   transparently connect to the new host and user don't have to 
   reconnect
 
 - what to do on migration error:
- target side:  libvirt folks want the program to print a message if
  it fails.  Current code spent 100% cpu time doing select on a closed
  fd.  (patches already on the list to make it wait without using
  cpu).
  
 No, that is not correct. We want QEMU to exit when incoming migration
 fails. Printing to stderr is just something that will end up in the
 logs for admin to further diagnose the problem if required. There is
 nothing to be gained by leaving QEMU running, and everything to loose
 since the failed migration may have left it in a dangerous state from
 which you do not want to attempt incoming migration again.
 
 If we really want to leave it running when migration fails, then we're
 going to have to add yet another QMP event to inform libvirt when
 migration has finished/failed, and/or make 'query_migrate' work on
 the destination too.

 
 A qmp event seems the logical thing to do?  Exiting can happen for many 
 reasons, a qmp event is unambiguous.

Yes, for the QEMU upstream adding an event is more flexible. I had
originally suggested exiting in the context of the Fedora bug report
which was for QEMU 0.10.x which has no events capability.

 
 Incidentally I have a feeling we might need to introduce a migration
 event in QMP. Currently libvirt polls on the 'query_migrate' command
 to get the ongoing migration status. This means there can be a delay
 in detecting completion as long as the polling interval - for this
 reason we just dropped libvirt's polling time from 1/2 sec to 50ms
 to ensure prompt detection.

 
 Whenever you implement a polling loop, can you send an event to qemu-de...@?

Yep, sure thing. This is the only polling loop that isn't related to I/O
stats collection.

 
 Polling loops are an indication that something is wrong.

Except when people suggest they are the right answer, qcow high 
watermark ;-P

Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

[Qemu-devel] [PATCH 0/6] qemu-kvm: Introduce bit-based phys_ram_dirty, and bit-based dirty page checker.

2010-03-16 Thread Yoshiaki Tamura


The dirty and non-dirty pages are checked one by one in vl.c.
When the most of the memory is not dirty,
checking the dirty and non-dirty pages by multiple page size
should be much faster than checking them one by one.
We introduced bit-based phys_ram_dirty for VGA, CODE and MIGRATION, and
cpu_physical_memory_get_dirty_range() for this purpose.

This patch is based on the following discussion.

http://www.mail-archive.com/k...@vger.kernel.org/msg28733.html

To prove our prospect, we have evaluated effect of this patch.
We compared runtime of ram_save_remaining with original 
ram_save_remaining() and ram_save_remaining() using functions of this patch.

Test Environment:
CPU: 4x Intel Xeon Quad Core 2.66GHz
Mem size: 96GB
kvm version: 2.6.33
qemu-kvm version: commit 2b644fd0e737407133c88054ba498e772ce01f27

Host OS: CentOS (kernel 2.6.33)
Guest OS: Debian/GNU Linux lenny (kernel 2.6.26)
Guest Mem size: 512MB

Conditions of experiments are as follows:
Cond1: Guest OS periodically makes the 256MB continuous dirty pages.
Cond2: Guest OS periodically makes the 256MB dirty pages and non-dirty pages
in turn.
Cond3: Guest OS read 1GB file, which is bigger than memory.
Cond4: Guest OS write 1GB file, which is bigger than memory.

Experimental results:
Cond1: 1.9 ~ 61 times speed up
Cond2: 1.9 ~ 56 times speed up
Cond3: 1.9 ~ 59 times speed up
Cond4: 1.7 ~ 59 times speed up

[Qemu-devel] [PATCH 5/6] qemu-kvm: Use cpu_physical_memory_set_dirty_range() to update phys_ram_dirty.

2010-03-16 Thread Yoshiaki Tamura

Modifies kvm_get_dirty_pages_log_range to use 
cpu_physical_memory_set_dirty_range() to update the row of 
the bit-based phys_ram_dirty bitmap at once.

Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
---
 qemu-kvm.c |   19 ++-
 1 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index e417f21..75fa9b0 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -2305,9 +2305,8 @@ static int kvm_get_dirty_pages_log_range(unsigned long 
start_addr,
  unsigned long offset,
  unsigned long mem_size)
 {
-unsigned int i, j;
-unsigned long page_number, addr, addr1, c;
-ram_addr_t ram_addr;
+unsigned int i;
+unsigned long page_number, addr, addr1;
 unsigned int len = ((mem_size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) /
 HOST_LONG_BITS;
 
@@ -2317,16 +2316,10 @@ static int kvm_get_dirty_pages_log_range(unsigned long 
start_addr,
  */
 for (i = 0; i  len; i++) {
 if (bitmap[i] != 0) {
-c = leul_to_cpu(bitmap[i]);
-do {
-j = ffsl(c) - 1;
-c = ~(1ul  j);
-page_number = i * HOST_LONG_BITS + j;
-addr1 = page_number * TARGET_PAGE_SIZE;
-addr = offset + addr1;
-ram_addr = cpu_get_physical_page_desc(addr);
-cpu_physical_memory_set_dirty(ram_addr);
-} while (c != 0);
+page_number = i * HOST_LONG_BITS;
+addr1 = page_number * TARGET_PAGE_SIZE;
+addr = offset + addr1;
+cpu_physical_memory_set_dirty_range(addr, leul_to_cpu(bitmap[i]));
 }
 }
 return 0;
-- 
1.7.0.31.g1df487

[Qemu-devel] [PATCH 6/6] qemu-kvm: Use cpu_physical_memory_get_dirty_range() to check multiple dirty pages.

2010-03-16 Thread Yoshiaki Tamura

Modifies ram_save_block() and ram_save_remaining() to use 
cpu_physical_memory_get_dirty_range() to check multiple dirty and non-dirty
pages at once.

Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
---
 vl.c |   55 +++
 1 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/vl.c b/vl.c
index 6e35cc6..e9ad7c9 100644
--- a/vl.c
+++ b/vl.c
@@ -2779,7 +2779,8 @@ static int ram_save_block(QEMUFile *f)
 static ram_addr_t current_addr = 0;
 ram_addr_t saved_addr = current_addr;
 ram_addr_t addr = 0;
-int found = 0;
+ram_addr_t dirty_rams[HOST_LONG_BITS];
+int i, found = 0;
 
 while (addr  last_ram_offset) {
 if (kvm_enabled()  current_addr == 0) {
@@ -2791,28 +2792,35 @@ static int ram_save_block(QEMUFile *f)
 return 0;
 }
 }
-if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) 
{
+if ((found = cpu_physical_memory_get_dirty_range(
+ current_addr, last_ram_offset, dirty_rams, HOST_LONG_BITS,
+ MIGRATION_DIRTY_FLAG))) {
 uint8_t *p;
 
-cpu_physical_memory_reset_dirty(current_addr,
-current_addr + TARGET_PAGE_SIZE,
-MIGRATION_DIRTY_FLAG);
+for (i = 0; i  found; i++) {
+ram_addr_t page_addr = dirty_rams[i];
+cpu_physical_memory_reset_dirty(page_addr,
+page_addr + TARGET_PAGE_SIZE,
+MIGRATION_DIRTY_FLAG);
 
-p = qemu_get_ram_ptr(current_addr);
+p = qemu_get_ram_ptr(page_addr);
 
-if (is_dup_page(p, *p)) {
-qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
-qemu_put_byte(f, *p);
-} else {
-qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
-qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+if (is_dup_page(p, *p)) {
+qemu_put_be64(f, (page_addr) |
+  RAM_SAVE_FLAG_COMPRESS);
+qemu_put_byte(f, *p);
+} else {
+qemu_put_be64(f, (page_addr) |
+  RAM_SAVE_FLAG_PAGE);
+qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+}
 }
-
-found = 1;
+   
 break;
+} else {
+addr += dirty_rams[0];
+current_addr = (saved_addr + addr) % last_ram_offset;
 }
-addr += TARGET_PAGE_SIZE;
-current_addr = (saved_addr + addr) % last_ram_offset;
 }
 
 return found;
@@ -2822,12 +2830,19 @@ static uint64_t bytes_transferred;
 
 static ram_addr_t ram_save_remaining(void)
 {
-ram_addr_t addr;
+ram_addr_t addr = 0;
 ram_addr_t count = 0;
+ram_addr_t dirty_rams[HOST_LONG_BITS];
+int found = 0;
 
-for (addr = 0; addr  last_ram_offset; addr += TARGET_PAGE_SIZE) {
-if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
-count++;
+while (addr  last_ram_offset) {
+if ((found = cpu_physical_memory_get_dirty_range(addr, last_ram_offset,
+dirty_rams, HOST_LONG_BITS, MIGRATION_DIRTY_FLAG))) {
+count += found;
+addr = dirty_rams[found - 1] + TARGET_PAGE_SIZE;
+} else {
+addr += dirty_rams[0];
+}
 }
 
 return count;
-- 
1.7.0.31.g1df487

[Qemu-devel] [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.

2010-03-16 Thread Yoshiaki Tamura

Modifies wrapper functions for byte-based phys_ram_dirty bitmap to
bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent
direct access to the phys_ram_dirty bitmap.

Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
---
 cpu-all.h |   94 ++--
 1 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index 9bc01b9..91ec3e5 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -843,7 +843,9 @@ int cpu_str_to_log_mask(const char *str);
 /* memory API */
 
 extern int phys_ram_fd;
-extern uint8_t *phys_ram_dirty;
+extern unsigned long *phys_ram_vga_dirty;
+extern unsigned long *phys_ram_code_dirty;
+extern unsigned long *phys_ram_migration_dirty;
 extern ram_addr_t ram_size;
 extern ram_addr_t last_ram_offset;
 extern uint8_t *bios_mem;
@@ -879,20 +881,104 @@ int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
 /* read dirty bit (return 0 or 1) */
 static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
 {
-return phys_ram_dirty[addr  TARGET_PAGE_BITS] == 0xff;
+unsigned long mask;
+int index = (addr  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int offset = (addr  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+
+mask = 1UL  offset;
+return (phys_ram_vga_dirty[index]  
+phys_ram_code_dirty[index] 
+phys_ram_migration_dirty[index]  mask) == mask;
+}
+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
+{
+unsigned long mask;
+int index = (addr  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int offset = (addr  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+int ret = 0;
+
+mask = 1UL  offset;
+if (phys_ram_vga_dirty[index]  mask)
+ret |= VGA_DIRTY_FLAG;
+if (phys_ram_code_dirty[index]  mask)
+ret |=  CODE_DIRTY_FLAG;
+if (phys_ram_migration_dirty[index]  mask)
+ret |= MIGRATION_DIRTY_FLAG;
+
+return ret;
 }
 
 static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
 int dirty_flags)
 {
-return phys_ram_dirty[addr  TARGET_PAGE_BITS]  dirty_flags;
+return cpu_physical_memory_get_dirty_flags(addr)  dirty_flags;
 }
 
 static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
 {
-phys_ram_dirty[addr  TARGET_PAGE_BITS] = 0xff;
+unsigned long mask;
+int index = (addr  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int offset = (addr  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+
+mask = 1UL  offset;
+phys_ram_vga_dirty[index] |= mask;
+phys_ram_code_dirty[index] |= mask;
+phys_ram_migration_dirty[index] |= mask;
+}
+
+static inline void cpu_physical_memory_set_dirty_range(ram_addr_t addr,
+   unsigned long mask)
+{
+int index = (addr  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+
+phys_ram_vga_dirty[index] |= mask;
+phys_ram_code_dirty[index] |= mask;
+phys_ram_migration_dirty[index] |= mask;
 }
 
+static inline void cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
+   int dirty_flags)
+{
+unsigned long mask;
+int index = (addr  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int offset = (addr  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+
+mask = 1UL  offset;
+if (dirty_flags  VGA_DIRTY_FLAG)
+phys_ram_vga_dirty[index] |= mask;
+if (dirty_flags  CODE_DIRTY_FLAG)
+phys_ram_code_dirty[index] |= mask;
+if (dirty_flags  MIGRATION_DIRTY_FLAG)
+phys_ram_migration_dirty[index] |= mask;
+}
+
+static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
+int length,
+int dirty_flags)
+{
+ram_addr_t addr = start;
+unsigned long mask;
+int index, offset, i;
+
+for (i = 0;  i  length; i += TARGET_PAGE_SIZE) {
+index = ((addr + i)  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+offset = ((addr + i)  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+mask = ~(1UL  offset);
+
+if (dirty_flags  VGA_DIRTY_FLAG)
+phys_ram_vga_dirty[index] = mask;
+if (dirty_flags  CODE_DIRTY_FLAG)
+phys_ram_code_dirty[index] = mask;
+if (dirty_flags  MIGRATION_DIRTY_FLAG)
+phys_ram_migration_dirty[index] = mask;
+ }
+}
+
+int cpu_physical_memory_get_dirty_range(ram_addr_t start, ram_addr_t end, 
+ram_addr_t *dirty_rams, int length,
+int dirty_flags);
+
 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
  int dirty_flags);
 void cpu_tlb_update_dirty(CPUState *env);
-- 
1.7.0.31.g1df487

[Qemu-devel] [PATCH 3/6] qemu-kvm: Replace direct phys_ram_dirty access with wrapper functions.

2010-03-16 Thread Yoshiaki Tamura

Replaces direct phys_ram_dirty access with wrapper functions to prevent
direct access to the phys_ram_dirty bitmap.

Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
---
 exec.c |   45 -
 1 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/exec.c b/exec.c
index ba334e7..b31c349 100644
--- a/exec.c
+++ b/exec.c
@@ -1946,7 +1946,7 @@ static void tlb_protect_code(ram_addr_t ram_addr)
 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
 target_ulong vaddr)
 {
-phys_ram_dirty[ram_addr  TARGET_PAGE_BITS] |= CODE_DIRTY_FLAG;
+cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
 }
 
 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
@@ -1967,8 +1967,7 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, 
ram_addr_t end,
 {
 CPUState *env;
 unsigned long length, start1;
-int i, mask, len;
-uint8_t *p;
+int i;
 
 start = TARGET_PAGE_MASK;
 end = TARGET_PAGE_ALIGN(end);
@@ -1976,11 +1975,7 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, 
ram_addr_t end,
 length = end - start;
 if (length == 0)
 return;
-len = length  TARGET_PAGE_BITS;
-mask = ~dirty_flags;
-p = phys_ram_dirty + (start  TARGET_PAGE_BITS);
-for(i = 0; i  len; i++)
-p[i] = mask;
+cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
 
 /* we modify the TLB cache so that the dirty bit will be set again
when accessing the range */
@@ -2837,16 +2832,16 @@ static void notdirty_mem_writeb(void *opaque, 
target_phys_addr_t ram_addr,
 uint32_t val)
 {
 int dirty_flags;
-dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
+dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 if (!(dirty_flags  CODE_DIRTY_FLAG)) {
 #if !defined(CONFIG_USER_ONLY)
 tb_invalidate_phys_page_fast(ram_addr, 1);
-dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
+dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 #endif
 }
 stb_p(qemu_get_ram_ptr(ram_addr), val);
 dirty_flags |= (0xff  ~CODE_DIRTY_FLAG);
-phys_ram_dirty[ram_addr  TARGET_PAGE_BITS] = dirty_flags;
+cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
 /* we remove the notdirty callback only if the code has been
flushed */
 if (dirty_flags == 0xff)
@@ -2857,16 +2852,16 @@ static void notdirty_mem_writew(void *opaque, 
target_phys_addr_t ram_addr,
 uint32_t val)
 {
 int dirty_flags;
-dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
+dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 if (!(dirty_flags  CODE_DIRTY_FLAG)) {
 #if !defined(CONFIG_USER_ONLY)
 tb_invalidate_phys_page_fast(ram_addr, 2);
-dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
+dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 #endif
 }
 stw_p(qemu_get_ram_ptr(ram_addr), val);
 dirty_flags |= (0xff  ~CODE_DIRTY_FLAG);
-phys_ram_dirty[ram_addr  TARGET_PAGE_BITS] = dirty_flags;
+cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
 /* we remove the notdirty callback only if the code has been
flushed */
 if (dirty_flags == 0xff)
@@ -2877,16 +2872,16 @@ static void notdirty_mem_writel(void *opaque, 
target_phys_addr_t ram_addr,
 uint32_t val)
 {
 int dirty_flags;
-dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
+dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 if (!(dirty_flags  CODE_DIRTY_FLAG)) {
 #if !defined(CONFIG_USER_ONLY)
 tb_invalidate_phys_page_fast(ram_addr, 4);
-dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
+dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
 #endif
 }
 stl_p(qemu_get_ram_ptr(ram_addr), val);
 dirty_flags |= (0xff  ~CODE_DIRTY_FLAG);
-phys_ram_dirty[ram_addr  TARGET_PAGE_BITS] = dirty_flags;
+cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
 /* we remove the notdirty callback only if the code has been
flushed */
 if (dirty_flags == 0xff)
@@ -3337,8 +3332,8 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, 
uint8_t *buf,
 /* invalidate code */
 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
 /* set dirty bit */
-phys_ram_dirty[addr1  TARGET_PAGE_BITS] |=
-(0xff  ~CODE_DIRTY_FLAG);
+cpu_physical_memory_set_dirty_flags(
+addr1, (0xff  ~CODE_DIRTY_FLAG));
 }
/* qemu doesn't execute guest code directly, but kvm does
   therefore flush instruction caches

[Qemu-devel] [PATCH 4/6] qemu-kvm: Introduce cpu_physical_memory_get_dirty_range().

2010-03-16 Thread Yoshiaki Tamura

Introduces cpu_physical_memory_get_dirty_range().
It checks the first row and puts dirty addr in the array.
If the first row is empty, it skips to the first non-dirty row 
or the end addr, and put the length in the first entry of the array.

Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp
---
 exec.c |   73 
 1 files changed, 73 insertions(+), 0 deletions(-)

diff --git a/exec.c b/exec.c
index b31c349..87056a6 100644
--- a/exec.c
+++ b/exec.c
@@ -1961,6 +1961,79 @@ static inline void tlb_reset_dirty_range(CPUTLBEntry 
*tlb_entry,
 }
 }
 
+/* It checks the first row and puts dirty addrs in the array.
+   If the first row is empty, it skips to the first non-dirty row
+   or the end addr, and put the length in the first entry of the array. */
+int cpu_physical_memory_get_dirty_range(ram_addr_t start, ram_addr_t end, 
+ram_addr_t *dirty_rams, int length,
+int dirty_flag)
+{
+unsigned long phys_ram_dirty, page_number, *p;
+ram_addr_t addr;
+int s_idx = (start  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int e_idx = (end  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int i, j, offset;
+
+switch (dirty_flag) {
+case VGA_DIRTY_FLAG:
+p = phys_ram_vga_dirty;
+break;
+case CODE_DIRTY_FLAG:
+p = phys_ram_code_dirty;
+break;
+case MIGRATION_DIRTY_FLAG:
+p = phys_ram_migration_dirty;
+break;
+default:
+abort();
+}
+
+/* mask bits before the start addr */
+offset = (start  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+phys_ram_dirty = p[s_idx]  ~((1UL  offset) - 1);
+
+if (s_idx == e_idx) {
+/* mask bits after the end addr */
+offset = (end  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+phys_ram_dirty = (1UL  offset) - 1;
+}
+
+if (phys_ram_dirty == 0) {
+/* when the row is empty */
+ram_addr_t skip;
+if (s_idx == e_idx)
+skip = end;
+else {
+/* skip empty rows */
+while (s_idx  e_idx  p[++s_idx] == 0);
+skip = (s_idx * HOST_LONG_BITS * TARGET_PAGE_SIZE);
+}
+dirty_rams[0] = skip - start;
+i = 0;
+
+} else if (phys_ram_dirty == ~0UL) {
+/* when the row is fully dirtied */
+addr = start;
+for (i = 0; i  length; i++) {
+dirty_rams[i] = addr;
+addr += TARGET_PAGE_SIZE;
+}
+} else {
+/* when the row is partially dirtied */
+i = 0;
+do {
+j = ffsl(phys_ram_dirty) - 1;
+phys_ram_dirty = ~(1UL  j);
+page_number = s_idx * HOST_LONG_BITS + j;
+addr = page_number * TARGET_PAGE_SIZE;
+dirty_rams[i] = addr;
+i++;
+} while (phys_ram_dirty != 0  i  length);
+}
+
+return i;
+}
+
 /* Note: start and end must be within the same ram block.  */
 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
  int dirty_flags)
-- 
1.7.0.31.g1df487

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Avi Kivity


On 03/16/2010 12:45 PM, Daniel P. Berrange wrote:

On Tue, Mar 16, 2010 at 12:38:02PM +0200, Avi Kivity wrote:
   

On 03/16/2010 12:31 PM, Daniel P. Berrange wrote:
 

Polling loops are an indication that something is wrong.

 

Except when people suggest they are the right answer, qcow high
watermark ;-P

   

I liked Anthony's suggestion of an lvm2 block format driver.  No polling.
 

Doesn't that require giving QEMU privileges to perform LVM operations which
implies QEMU having CAP_SYS_ADMIN  ?
   


Ouch.  I expect fd permissions on the volume are insufficient, and fd 
permissions on the group are excessive.


--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: [PATCH 1/6] qemu-kvm: Introduce bit-based phys_ram_dirty for VGA, CODE and MIGRATION.

2010-03-16 Thread Avi Kivity


On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Replaces byte-based phys_ram_dirty bitmap with
three bit-based phys_ram_dirty bitmap.
On allocation, it sets all bits in the bitmap.

Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp
---
  exec.c |   22 +-
  1 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/exec.c b/exec.c
index 9bcb4de..ba334e7 100644
--- a/exec.c
+++ b/exec.c
@@ -119,7 +119,9 @@ uint8_t *code_gen_ptr;

  #if !defined(CONFIG_USER_ONLY)
  int phys_ram_fd;
-uint8_t *phys_ram_dirty;
+unsigned long *phys_ram_vga_dirty;
+unsigned long *phys_ram_code_dirty;
+unsigned long *phys_ram_migration_dirty;
   


Would be nice to make this an array.

--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.

2010-03-16 Thread Avi Kivity


On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Modifies wrapper functions for byte-based phys_ram_dirty bitmap to
bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent
direct access to the phys_ram_dirty bitmap.
   



+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
+{
+unsigned long mask;
+int index = (addr  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int offset = (addr  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+int ret = 0;
+
+mask = 1UL  offset;
+if (phys_ram_vga_dirty[index]  mask)
+ret |= VGA_DIRTY_FLAG;
+if (phys_ram_code_dirty[index]  mask)
+ret |=  CODE_DIRTY_FLAG;
+if (phys_ram_migration_dirty[index]  mask)
+ret |= MIGRATION_DIRTY_FLAG;
+
+return ret;
  }

  static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
  int dirty_flags)
  {
-return phys_ram_dirty[addr  TARGET_PAGE_BITS]  dirty_flags;
+return cpu_physical_memory_get_dirty_flags(addr)  dirty_flags;
  }
   


This turns one cacheline access into three.  If the dirty bitmaps were 
in an array, you could do


  return dirty_bitmaps[dirty_index][addr  (TARGET_PAGE_BITS + 
BITS_IN_LONG)]  mask;


with one cacheline access.



  static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
  {
-phys_ram_dirty[addr  TARGET_PAGE_BITS] = 0xff;
+unsigned long mask;
+int index = (addr  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int offset = (addr  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+
+mask = 1UL  offset;
+phys_ram_vga_dirty[index] |= mask;
+phys_ram_code_dirty[index] |= mask;
+phys_ram_migration_dirty[index] |= mask;
+}
   


This is also three cacheline accesses.  I think we should have a master 
bitmap which is updated by set_dirty(), and which is or'ed into the 
other bitmaps when they are accessed.  At least the vga and migration 
bitmaps are only read periodically, not randomly, so this would be very 
fast.  In a way, this is similar to how the qemu bitmap is updated from 
the kvm bitmap today.


I am not sure about the code bitmap though.


--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: [PATCH 4/6] qemu-kvm: Introduce cpu_physical_memory_get_dirty_range().

2010-03-16 Thread Avi Kivity


On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Introduces cpu_physical_memory_get_dirty_range().
It checks the first row and puts dirty addr in the array.
If the first row is empty, it skips to the first non-dirty row
or the end addr, and put the length in the first entry of the array.



+/* It checks the first row and puts dirty addrs in the array.
+   If the first row is empty, it skips to the first non-dirty row
+   or the end addr, and put the length in the first entry of the array. */
+int cpu_physical_memory_get_dirty_range(ram_addr_t start, ram_addr_t end,
+ram_addr_t *dirty_rams, int length,
+int dirty_flag)
+{
+unsigned long phys_ram_dirty, page_number, *p;
+ram_addr_t addr;
+int s_idx = (start  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int e_idx = (end  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int i, j, offset;
+
+switch (dirty_flag) {
+case VGA_DIRTY_FLAG:
+p = phys_ram_vga_dirty;
+break;
+case CODE_DIRTY_FLAG:
+p = phys_ram_code_dirty;
+break;
+case MIGRATION_DIRTY_FLAG:
+p = phys_ram_migration_dirty;
+break;
+default:
+abort();
+}
   


This bit would be improved by switching to an array of bitmaps.


--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: [PATCH 1/6] qemu-kvm: Introduce bit-based phys_ram_dirty for VGA, CODE and MIGRATION.

2010-03-16 Thread Yoshiaki Tamura


Avi Kivity wrote:

On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Replaces byte-based phys_ram_dirty bitmap with
three bit-based phys_ram_dirty bitmap.
On allocation, it sets all bits in the bitmap.

Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp
Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp
---
exec.c | 22 +-
1 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/exec.c b/exec.c
index 9bcb4de..ba334e7 100644
--- a/exec.c
+++ b/exec.c
@@ -119,7 +119,9 @@ uint8_t *code_gen_ptr;

#if !defined(CONFIG_USER_ONLY)
int phys_ram_fd;
-uint8_t *phys_ram_dirty;
+unsigned long *phys_ram_vga_dirty;
+unsigned long *phys_ram_code_dirty;
+unsigned long *phys_ram_migration_dirty;


Would be nice to make this an array.


Thanks for pointing out.
I have a question regarding the index of the array.
From the compatibility perspective, I would prefer using the existing macros.

#define VGA_DIRTY_FLAG   0x01
#define CODE_DIRTY_FLAG  0x02
#define MIGRATION_DIRTY_FLAG 0x08

However, if I use them as is, I'll get a sparse array...
Is it acceptable to change these values like 0, 1, 2?

[Qemu-devel] Re: [PATCH 1/6] qemu-kvm: Introduce bit-based phys_ram_dirty for VGA, CODE and MIGRATION.

2010-03-16 Thread Avi Kivity


On 03/16/2010 03:01 PM, Yoshiaki Tamura wrote:

-uint8_t *phys_ram_dirty;
+unsigned long *phys_ram_vga_dirty;
+unsigned long *phys_ram_code_dirty;
+unsigned long *phys_ram_migration_dirty;


Would be nice to make this an array.



Thanks for pointing out.
I have a question regarding the index of the array.
From the compatibility perspective, I would prefer using the existing 
macros.


#define VGA_DIRTY_FLAG   0x01
#define CODE_DIRTY_FLAG  0x02
#define MIGRATION_DIRTY_FLAG 0x08

However, if I use them as is, I'll get a sparse array...
Is it acceptable to change these values like 0, 1, 2?


Sure.

--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: [PATCH 0/6] qemu-kvm: Introduce bit-based phys_ram_dirty, and bit-based dirty page checker.

2010-03-16 Thread Avi Kivity


On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Experimental results:
Cond1: 1.9 ~ 61 times speed up
Cond2: 1.9 ~ 56 times speed up
Cond3: 1.9 ~ 59 times speed up
Cond4: 1.7 ~ 59 times speed up
   


Impressive results.  What's the typical speedup?  Closer to 1.9 or 61?

Note the issue with the cache accesses for set_dirty() is only 
applicable to tcg, since kvm always updates the dirty bitmap in a batch 
(well, I/O also updates the bitmap).


--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.

2010-03-16 Thread Yoshiaki Tamura


Avi Kivity wrote:

On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Modifies wrapper functions for byte-based phys_ram_dirty bitmap to
bit-based phys_ram_dirty bitmap, and adds more wrapper functions to
prevent
direct access to the phys_ram_dirty bitmap.



+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
+{
+ unsigned long mask;
+ int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS;
+ int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1);
+ int ret = 0;
+
+ mask = 1UL offset;
+ if (phys_ram_vga_dirty[index] mask)
+ ret |= VGA_DIRTY_FLAG;
+ if (phys_ram_code_dirty[index] mask)
+ ret |= CODE_DIRTY_FLAG;
+ if (phys_ram_migration_dirty[index] mask)
+ ret |= MIGRATION_DIRTY_FLAG;
+
+ return ret;
}

static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
int dirty_flags)
{
- return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags;
+ return cpu_physical_memory_get_dirty_flags(addr) dirty_flags;
}


This turns one cacheline access into three. If the dirty bitmaps were in
an array, you could do

return dirty_bitmaps[dirty_index][addr  (TARGET_PAGE_BITS +
BITS_IN_LONG)]  mask;

with one cacheline access.


If I'm understanding the existing code correctly,
int dirty_flags can be combined, like VGA + MIGRATION.
If we only have to worry about a single dirty flag, I agree with your idea.

On the other hand, qemu seems to require getting combined dirty flags.
If we introduce dirty bitmaps for each type, we need to access each bitmap to 
get combined flags.  I wasn't sure how to make this more efficient...



static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
{
- phys_ram_dirty[addr TARGET_PAGE_BITS] = 0xff;
+ unsigned long mask;
+ int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS;
+ int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1);
+
+ mask = 1UL offset;
+ phys_ram_vga_dirty[index] |= mask;
+ phys_ram_code_dirty[index] |= mask;
+ phys_ram_migration_dirty[index] |= mask;
+}


This is also three cacheline accesses. I think we should have a master
bitmap which is updated by set_dirty(), and which is or'ed into the
other bitmaps when they are accessed. At least the vga and migration
bitmaps are only read periodically, not randomly, so this would be very
fast. In a way, this is similar to how the qemu bitmap is updated from
the kvm bitmap today.


Sounds good to me.
So we're going to introduce 4 (VGA, CODE, MIGRATION, master) bit-based bitmaps 
in total.

[Qemu-devel] Re: [PATCH 1/7] Add support for generic notifier lists (v2)

2010-03-16 Thread Juan Quintela

Anthony Liguori aligu...@us.ibm.com wrote:
 Notifiers are data-less callbacks and a notifier list is a list of registered
 notifiers that all are interested in a particular event.

 We'll use this in a few patches to implement mouse change notification.

We could use that for migration also.

spice just needs to have start/end migration happening.  And block
migration added a new callback that is basically call this on start.

Later, Juan.

[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.

2010-03-16 Thread Avi Kivity


On 03/16/2010 03:17 PM, Yoshiaki Tamura wrote:

Avi Kivity wrote:

On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Modifies wrapper functions for byte-based phys_ram_dirty bitmap to
bit-based phys_ram_dirty bitmap, and adds more wrapper functions to
prevent
direct access to the phys_ram_dirty bitmap.



+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
+{
+ unsigned long mask;
+ int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS;
+ int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1);
+ int ret = 0;
+
+ mask = 1UL offset;
+ if (phys_ram_vga_dirty[index] mask)
+ ret |= VGA_DIRTY_FLAG;
+ if (phys_ram_code_dirty[index] mask)
+ ret |= CODE_DIRTY_FLAG;
+ if (phys_ram_migration_dirty[index] mask)
+ ret |= MIGRATION_DIRTY_FLAG;
+
+ return ret;
}

static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
int dirty_flags)
{
- return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags;
+ return cpu_physical_memory_get_dirty_flags(addr) dirty_flags;
}


This turns one cacheline access into three. If the dirty bitmaps were in
an array, you could do

return dirty_bitmaps[dirty_index][addr  (TARGET_PAGE_BITS +
BITS_IN_LONG)]  mask;

with one cacheline access.


If I'm understanding the existing code correctly,
int dirty_flags can be combined, like VGA + MIGRATION.
If we only have to worry about a single dirty flag, I agree with your 
idea.


From a quick grep it seems flags are not combined, except for something 
strange with CODE_DIRTY_FLAG:



static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
uint32_t val)
{
int dirty_flags;
dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
if (!(dirty_flags  CODE_DIRTY_FLAG)) {
#if !defined(CONFIG_USER_ONLY)
tb_invalidate_phys_page_fast(ram_addr, 4);
dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
#endif
}
stl_p(qemu_get_ram_ptr(ram_addr), val);
dirty_flags |= (0xff  ~CODE_DIRTY_FLAG);
phys_ram_dirty[ram_addr  TARGET_PAGE_BITS] = dirty_flags;
/* we remove the notdirty callback only if the code has been
   flushed */
if (dirty_flags == 0xff)
tlb_set_dirty(cpu_single_env, cpu_single_env-mem_io_vaddr);
}


I can't say I understand what it does.



On the other hand, qemu seems to require getting combined dirty flags.
If we introduce dirty bitmaps for each type, we need to access each 
bitmap to get combined flags.  I wasn't sure how to make this more 
efficient...



static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
{
- phys_ram_dirty[addr TARGET_PAGE_BITS] = 0xff;
+ unsigned long mask;
+ int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS;
+ int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1);
+
+ mask = 1UL offset;
+ phys_ram_vga_dirty[index] |= mask;
+ phys_ram_code_dirty[index] |= mask;
+ phys_ram_migration_dirty[index] |= mask;
+}


This is also three cacheline accesses. I think we should have a master
bitmap which is updated by set_dirty(), and which is or'ed into the
other bitmaps when they are accessed. At least the vga and migration
bitmaps are only read periodically, not randomly, so this would be very
fast. In a way, this is similar to how the qemu bitmap is updated from
the kvm bitmap today.


Sounds good to me.
So we're going to introduce 4 (VGA, CODE, MIGRATION, master) bit-based 
bitmaps in total.




Yeah, except CODE doesn't behave like the others.  Would be best to 
understand what it's requirements are before making the change.  Maybe 
CODE will need separate handling (so master will only feed VGA and 
MIGRATION).


--
error compiling committee.c: too many arguments to function

[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.

2010-03-16 Thread Anthony Liguori


On 03/16/2010 07:45 AM, Avi Kivity wrote:

On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Modifies wrapper functions for byte-based phys_ram_dirty bitmap to
bit-based phys_ram_dirty bitmap, and adds more wrapper functions to 
prevent

direct access to the phys_ram_dirty bitmap.



+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
+{
+unsigned long mask;
+int index = (addr  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int offset = (addr  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+int ret = 0;
+
+mask = 1UL  offset;
+if (phys_ram_vga_dirty[index]  mask)
+ret |= VGA_DIRTY_FLAG;
+if (phys_ram_code_dirty[index]  mask)
+ret |=  CODE_DIRTY_FLAG;
+if (phys_ram_migration_dirty[index]  mask)
+ret |= MIGRATION_DIRTY_FLAG;
+
+return ret;
  }

  static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
  int dirty_flags)
  {
-return phys_ram_dirty[addr  TARGET_PAGE_BITS]  dirty_flags;
+return cpu_physical_memory_get_dirty_flags(addr)  dirty_flags;
  }


This turns one cacheline access into three.  If the dirty bitmaps were 
in an array, you could do


  return dirty_bitmaps[dirty_index][addr  (TARGET_PAGE_BITS + 
BITS_IN_LONG)]  mask;


with one cacheline access.


As far as I can tell, we only ever call with a single flag so your 
suggestion makes sense.


I'd suggest introducing these functions before splitting the bitmap up.  
It makes review a bit easier.




  static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
  {
-phys_ram_dirty[addr  TARGET_PAGE_BITS] = 0xff;
+unsigned long mask;
+int index = (addr  TARGET_PAGE_BITS) / HOST_LONG_BITS;
+int offset = (addr  TARGET_PAGE_BITS)  (HOST_LONG_BITS - 1);
+
+mask = 1UL  offset;
+phys_ram_vga_dirty[index] |= mask;
+phys_ram_code_dirty[index] |= mask;
+phys_ram_migration_dirty[index] |= mask;
+}


This is also three cacheline accesses.  I think we should have a 
master bitmap which is updated by set_dirty(), and which is or'ed into 
the other bitmaps when they are accessed.  At least the vga and 
migration bitmaps are only read periodically, not randomly, so this 
would be very fast.  In a way, this is similar to how the qemu bitmap 
is updated from the kvm bitmap today.


I am not sure about the code bitmap though.


I think your suggestion makes sense and would also work for the code bitmap.

Regards,

Anthony Liguori

[Qemu-devel] Re: [PATCH 0/6] qemu-kvm: Introduce bit-based phys_ram_dirty, and bit-based dirty page checker.

2010-03-16 Thread Yoshiaki Tamura


Avi Kivity wrote:

On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Experimental results:
Cond1: 1.9 ~ 61 times speed up
Cond2: 1.9 ~ 56 times speed up
Cond3: 1.9 ~ 59 times speed up
Cond4: 1.7 ~ 59 times speed up


Impressive results. What's the typical speedup? Closer to 1.9 or 61?


To be honest, I thought the result above was too vague...
The speed up grows when the number of dirty pages decreases.
Let me paste the snipped actual data measured during live migration on Cond1.
This result is measured with cpu_get_real_ticks(), so the values should be in 
raw ticks.


135200 dirty pages: orig.2488419, bitbased.1251171, ratio.1.99
...
98346 dirty pages: orig.3580533, bitbased.1386918, ratio.2.58
...
54865 dirty pages: orig.4220865, bitbased.984924, ratio.4.29
...
27883 dirty pages: orig.4088970, bitbased.514602, ratio.7.95
...
11541 dirty pages: orig.3854277, bitbased.220410, ratio.17.49
...
8117 dirty pages: orig.4041765, bitbased.175446, ratio.23.04
3231 dirty pages: orig.3337083, bitbased.105921, ratio.31.51
2401 dirty pages: orig.4103469, bitbased.89406, ratio.45.90
1595 dirty pages: orig.4028949, bitbased.78570, ratio.51.28
756 dirty pages: orig.4036707, bitbased.67662, ratio.59.66
0 dirty pages: orig.3938085, bitbased.23634, ratio.166.63
0 dirty pages: orig.3968163, bitbased.23526, ratio.168.67

We didn't show the data for checking completely empty bitmap because it was too 
fast and didn't wan't to get wrong impression.



Note the issue with the cache accesses for set_dirty() is only
applicable to tcg, since kvm always updates the dirty bitmap in a batch
(well, I/O also updates the bitmap).


I understand.
I'm still concerned regarding the way of reseting the dirty bitmap.
I was thinking to reset them in a batch, but it seems difficult because of the 
consistency with the tlb.

[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.

2010-03-16 Thread Yoshiaki Tamura


Avi Kivity wrote:

On 03/16/2010 03:17 PM, Yoshiaki Tamura wrote:

Avi Kivity wrote:

On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Modifies wrapper functions for byte-based phys_ram_dirty bitmap to
bit-based phys_ram_dirty bitmap, and adds more wrapper functions to
prevent
direct access to the phys_ram_dirty bitmap.



+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
+{
+ unsigned long mask;
+ int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS;
+ int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1);
+ int ret = 0;
+
+ mask = 1UL offset;
+ if (phys_ram_vga_dirty[index] mask)
+ ret |= VGA_DIRTY_FLAG;
+ if (phys_ram_code_dirty[index] mask)
+ ret |= CODE_DIRTY_FLAG;
+ if (phys_ram_migration_dirty[index] mask)
+ ret |= MIGRATION_DIRTY_FLAG;
+
+ return ret;
}

static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
int dirty_flags)
{
- return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags;
+ return cpu_physical_memory_get_dirty_flags(addr) dirty_flags;
}


This turns one cacheline access into three. If the dirty bitmaps were in
an array, you could do

return dirty_bitmaps[dirty_index][addr  (TARGET_PAGE_BITS +
BITS_IN_LONG)]  mask;

with one cacheline access.


If I'm understanding the existing code correctly,
int dirty_flags can be combined, like VGA + MIGRATION.
If we only have to worry about a single dirty flag, I agree with your
idea.


 From a quick grep it seems flags are not combined, except for something
strange with CODE_DIRTY_FLAG:


Thanks for checking out.
But the CODE_DIRTY_FLAG makes me really nervous...


static void notdirty_mem_writel(void *opaque, target_phys_addr_t
ram_addr,
uint32_t val)
{
int dirty_flags;
dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
if (!(dirty_flags  CODE_DIRTY_FLAG)) {
#if !defined(CONFIG_USER_ONLY)
tb_invalidate_phys_page_fast(ram_addr, 4);
dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
#endif
}
stl_p(qemu_get_ram_ptr(ram_addr), val);
dirty_flags |= (0xff  ~CODE_DIRTY_FLAG);
phys_ram_dirty[ram_addr  TARGET_PAGE_BITS] = dirty_flags;
/* we remove the notdirty callback only if the code has been
flushed */
if (dirty_flags == 0xff)
tlb_set_dirty(cpu_single_env, cpu_single_env-mem_io_vaddr);
}


I can't say I understand what it does.


Me neither.
This the reason I had to take naive approach...


On the other hand, qemu seems to require getting combined dirty flags.
If we introduce dirty bitmaps for each type, we need to access each
bitmap to get combined flags. I wasn't sure how to make this more
efficient...


static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
{
- phys_ram_dirty[addr TARGET_PAGE_BITS] = 0xff;
+ unsigned long mask;
+ int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS;
+ int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1);
+
+ mask = 1UL offset;
+ phys_ram_vga_dirty[index] |= mask;
+ phys_ram_code_dirty[index] |= mask;
+ phys_ram_migration_dirty[index] |= mask;
+}


This is also three cacheline accesses. I think we should have a master
bitmap which is updated by set_dirty(), and which is or'ed into the
other bitmaps when they are accessed. At least the vga and migration
bitmaps are only read periodically, not randomly, so this would be very
fast. In a way, this is similar to how the qemu bitmap is updated from
the kvm bitmap today.


Sounds good to me.
So we're going to introduce 4 (VGA, CODE, MIGRATION, master) bit-based
bitmaps in total.



Yeah, except CODE doesn't behave like the others. Would be best to
understand what it's requirements are before making the change. Maybe
CODE will need separate handling (so master will only feed VGA and
MIGRATION).


After implementing this patch set, I thought separating the wrapper functions 
for each dirty flag type might be an option.  Unifying everything makes 
inefficient here.  But anyway, do you know somebody who has a strong insight on 
this CODE_DIRTY_FLAG?

[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.

2010-03-16 Thread Anthony Liguori


On 03/16/2010 08:29 AM, Avi Kivity wrote:

On 03/16/2010 03:17 PM, Yoshiaki Tamura wrote:

Avi Kivity wrote:

On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Modifies wrapper functions for byte-based phys_ram_dirty bitmap to
bit-based phys_ram_dirty bitmap, and adds more wrapper functions to
prevent
direct access to the phys_ram_dirty bitmap.



+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t 
addr)

+{
+ unsigned long mask;
+ int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS;
+ int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1);
+ int ret = 0;
+
+ mask = 1UL offset;
+ if (phys_ram_vga_dirty[index] mask)
+ ret |= VGA_DIRTY_FLAG;
+ if (phys_ram_code_dirty[index] mask)
+ ret |= CODE_DIRTY_FLAG;
+ if (phys_ram_migration_dirty[index] mask)
+ ret |= MIGRATION_DIRTY_FLAG;
+
+ return ret;
}

static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
int dirty_flags)
{
- return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags;
+ return cpu_physical_memory_get_dirty_flags(addr) dirty_flags;
}


This turns one cacheline access into three. If the dirty bitmaps 
were in

an array, you could do

return dirty_bitmaps[dirty_index][addr  (TARGET_PAGE_BITS +
BITS_IN_LONG)]  mask;

with one cacheline access.


If I'm understanding the existing code correctly,
int dirty_flags can be combined, like VGA + MIGRATION.
If we only have to worry about a single dirty flag, I agree with your 
idea.


From a quick grep it seems flags are not combined, except for 
something strange with CODE_DIRTY_FLAG:


static void notdirty_mem_writel(void *opaque, target_phys_addr_t 
ram_addr,

uint32_t val)
{
int dirty_flags;
dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
if (!(dirty_flags  CODE_DIRTY_FLAG)) {
#if !defined(CONFIG_USER_ONLY)
tb_invalidate_phys_page_fast(ram_addr, 4);
dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
#endif
}
stl_p(qemu_get_ram_ptr(ram_addr), val);
dirty_flags |= (0xff  ~CODE_DIRTY_FLAG);
phys_ram_dirty[ram_addr  TARGET_PAGE_BITS] = dirty_flags;
/* we remove the notdirty callback only if the code has been
   flushed */
if (dirty_flags == 0xff)
tlb_set_dirty(cpu_single_env, cpu_single_env-mem_io_vaddr);
}


I can't say I understand what it does.


The semantics of CODE_DIRTY_FLAG are a little counter intuitive.  
CODE_DIRTY_FLAG means that we know that something isn't code so writes 
do not need checking for self modifying code.


notdirty_mem_write() is called for any ram that is in the virtual TLB 
that has not been updated yet and once a write has occurred, we can 
switch to faster access functions (provided we've invalidated any 
translation blocks).


That's why the check is if (!(dirty_flags  CODE_DIRTY_FLAG)), if it 
hasn't been set yet, we have to assume that it could be a TB so we need 
to invalidate it.  tb_invalidate_phys_page_fast() will set the 
CODE_DIRTY_FLAG if no code is present in that memory area which is why 
we fetch dirty_flags again.


We do the store, and then set the dirty bits to mark that the page is 
now dirty taking care to not change the CODE_DIRTY_FLAG bit.


At the very end, we check to see if CODE_DIRTY_FLAG which indicates that 
we no longer need to trap writes.  If so, we call tlb_set_dirty() which 
will ultimately remove the notdirty callback in favor of a faster access 
mechanism.


With respect patch series, there should be no problem having a separate 
code bitmap that gets updated along with a main bitmap provided that the 
semantics of CODE_DIRTY_FLAG are preserved.



Sounds good to me.
So we're going to introduce 4 (VGA, CODE, MIGRATION, master) 
bit-based bitmaps in total.




Yeah, except CODE doesn't behave like the others.  Would be best to 
understand what it's requirements are before making the change.  Maybe 
CODE will need separate handling (so master will only feed VGA and 
MIGRATION).


Generally speaking, cpu_physical_memory_set_dirty() is called by the 
device model.  Any writes by the device model that results in 
self-modifying code are not going to have predictable semantics which is 
why it can set CODE_DIRTY_FLAG.


CODE_DIRTY_FLAG doesn't need to get updated from a master bitmap.  It 
should be treated as a separate bitmap that is strictly dealt with by 
the virtual TLB.


Regards,

Anthony Liguori

Re: [Qemu-devel] wake-on-lan IPMI implementation; real power-off and -no-shutdown

2010-03-16 Thread Luiz Capitulino

On Mon, 15 Mar 2010 15:55:26 +
Daniel P. Berrange berra...@redhat.com wrote:

 On Mon, Mar 15, 2010 at 04:01:27PM +0100, Fran?ois Revol wrote:
  Hello,
  while working on a demonstrator for a green-IT project, to show
  scheduled machine shutdown and powering depending on various
  conditions, I wondered if I could use QEMU with wake-on-lan
  transparently, but it seems it's not implemented at all.
  
  I though I could try to add support for it, and with -S it theorically
  should be doable at least for the first boot, but the network packets
  do not go much further until the NIC is actually initialized, as most
  network layers use qemu_can_send_packet() which returns 0 if the
  machine is stopped.
  Hacking this function to return 1 seems to push the packet upward, but
  I couldn't find a single point where I could check for WOL packets,
  different -net subsystems using different code paths.
  
  Also, it seems -no-shutdown doesn't actually stop the emulation as
  said in the manual, it actually keeps the vm running (and using cpu),
  despite the OS trying to shutdown via ACPI. At least I tested so with
  Haiku (and acpi=true in kernel config), which properly exits QEMU
  without -no-shutdown.
 
 Hmm, I think -no-shutdown should at least stop the CPUs executing. It is
 not really useful on its own though. The app managing QEMU would want to
 use the new JSON based monitor to listen for the SHUTDOWN event to be
 emitted, so it can detect the shutdown completing  then take action it
 wants either reset the guest, or kill QEMU, etc

 If I'm not missing something, -no-shutdown calls vm_stop(), which calls
pause_all_vcpus().

[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.

2010-03-16 Thread Avi Kivity


On 03/16/2010 03:51 PM, Anthony Liguori wrote:

On 03/16/2010 08:29 AM, Avi Kivity wrote:

On 03/16/2010 03:17 PM, Yoshiaki Tamura wrote:

Avi Kivity wrote:

On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Modifies wrapper functions for byte-based phys_ram_dirty bitmap to
bit-based phys_ram_dirty bitmap, and adds more wrapper functions to
prevent
direct access to the phys_ram_dirty bitmap.



+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t 
addr)

+{
+ unsigned long mask;
+ int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS;
+ int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1);
+ int ret = 0;
+
+ mask = 1UL offset;
+ if (phys_ram_vga_dirty[index] mask)
+ ret |= VGA_DIRTY_FLAG;
+ if (phys_ram_code_dirty[index] mask)
+ ret |= CODE_DIRTY_FLAG;
+ if (phys_ram_migration_dirty[index] mask)
+ ret |= MIGRATION_DIRTY_FLAG;
+
+ return ret;
}

static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
int dirty_flags)
{
- return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags;
+ return cpu_physical_memory_get_dirty_flags(addr) dirty_flags;
}


This turns one cacheline access into three. If the dirty bitmaps 
were in

an array, you could do

return dirty_bitmaps[dirty_index][addr  (TARGET_PAGE_BITS +
BITS_IN_LONG)]  mask;

with one cacheline access.


If I'm understanding the existing code correctly,
int dirty_flags can be combined, like VGA + MIGRATION.
If we only have to worry about a single dirty flag, I agree with 
your idea.


From a quick grep it seems flags are not combined, except for 
something strange with CODE_DIRTY_FLAG:


static void notdirty_mem_writel(void *opaque, target_phys_addr_t 
ram_addr,

uint32_t val)
{
int dirty_flags;
dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
if (!(dirty_flags  CODE_DIRTY_FLAG)) {
#if !defined(CONFIG_USER_ONLY)
tb_invalidate_phys_page_fast(ram_addr, 4);
dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
#endif
}
stl_p(qemu_get_ram_ptr(ram_addr), val);
dirty_flags |= (0xff  ~CODE_DIRTY_FLAG);
phys_ram_dirty[ram_addr  TARGET_PAGE_BITS] = dirty_flags;
/* we remove the notdirty callback only if the code has been
   flushed */
if (dirty_flags == 0xff)
tlb_set_dirty(cpu_single_env, cpu_single_env-mem_io_vaddr);
}


I can't say I understand what it does.


The semantics of CODE_DIRTY_FLAG are a little counter intuitive.  
CODE_DIRTY_FLAG means that we know that something isn't code so writes 
do not need checking for self modifying code.


So the hardware equivalent is, when the Instruction TLB loads a page 
address, clear CODE_DIRTY_FLAG?




notdirty_mem_write() is called for any ram that is in the virtual TLB 
that has not been updated yet and once a write has occurred, we can 
switch to faster access functions (provided we've invalidated any 
translation blocks).


That's why the check is if (!(dirty_flags  CODE_DIRTY_FLAG)), if it 
hasn't been set yet, we have to assume that it could be a TB so we 
need to invalidate it.  tb_invalidate_phys_page_fast() will set the 
CODE_DIRTY_FLAG if no code is present in that memory area which is why 
we fetch dirty_flags again.


Ok.



We do the store, and then set the dirty bits to mark that the page is 
now dirty taking care to not change the CODE_DIRTY_FLAG bit.


At the very end, we check to see if CODE_DIRTY_FLAG which indicates 
that we no longer need to trap writes.  If so, we call tlb_set_dirty() 
which will ultimately remove the notdirty callback in favor of a 
faster access mechanism.


With respect patch series, there should be no problem having a 
separate code bitmap that gets updated along with a main bitmap 
provided that the semantics of CODE_DIRTY_FLAG are preserved.



Sounds good to me.
So we're going to introduce 4 (VGA, CODE, MIGRATION, master) 
bit-based bitmaps in total.




Yeah, except CODE doesn't behave like the others.  Would be best to 
understand what it's requirements are before making the change.  
Maybe CODE will need separate handling (so master will only feed VGA 
and MIGRATION).


Generally speaking, cpu_physical_memory_set_dirty() is called by the 
device model.  Any writes by the device model that results in 
self-modifying code are not going to have predictable semantics which 
is why it can set CODE_DIRTY_FLAG.


CODE_DIRTY_FLAG doesn't need to get updated from a master bitmap.  It 
should be treated as a separate bitmap that is strictly dealt with by 
the virtual TLB.


Thanks.

--
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] wake-on-lan IPMI implementation; real power-off and -no-shutdown

2010-03-16 Thread François Revol

   Also, it seems -no-shutdown doesn't actually stop the emulation
   as
   said in the manual, it actually keeps the vm running (and using
   cpu),
   despite the OS trying to shutdown via ACPI. At least I tested so
   with
   Haiku (and acpi=true in kernel config), which properly exits QEMU
   without -no-shutdown.
 
  Hmm, I think -no-shutdown should at least stop the CPUs executing.
  It is
  not really useful on its own though. The app managing QEMU would
  want to
  use the new JSON based monitor to listen for the SHUTDOWN event to
  be
  emitted, so it can detect the shutdown completing  then take
  action it
  wants either reset the guest, or kill QEMU, etc

  If I'm not missing something, -no-shutdown calls vm_stop(), which
 calls
 pause_all_vcpus().

Oh indeed, info status shows the VM as paused.
I was misled because the GUI window was still open...

François.

[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.

2010-03-16 Thread Anthony Liguori


On 03/16/2010 08:57 AM, Avi Kivity wrote:

On 03/16/2010 03:51 PM, Anthony Liguori wrote:

On 03/16/2010 08:29 AM, Avi Kivity wrote:

On 03/16/2010 03:17 PM, Yoshiaki Tamura wrote:

Avi Kivity wrote:

On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote:

Modifies wrapper functions for byte-based phys_ram_dirty bitmap to
bit-based phys_ram_dirty bitmap, and adds more wrapper functions to
prevent
direct access to the phys_ram_dirty bitmap.



+
+static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t 
addr)

+{
+ unsigned long mask;
+ int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS;
+ int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1);
+ int ret = 0;
+
+ mask = 1UL offset;
+ if (phys_ram_vga_dirty[index] mask)
+ ret |= VGA_DIRTY_FLAG;
+ if (phys_ram_code_dirty[index] mask)
+ ret |= CODE_DIRTY_FLAG;
+ if (phys_ram_migration_dirty[index] mask)
+ ret |= MIGRATION_DIRTY_FLAG;
+
+ return ret;
}

static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
int dirty_flags)
{
- return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags;
+ return cpu_physical_memory_get_dirty_flags(addr) dirty_flags;
}


This turns one cacheline access into three. If the dirty bitmaps 
were in

an array, you could do

return dirty_bitmaps[dirty_index][addr  (TARGET_PAGE_BITS +
BITS_IN_LONG)]  mask;

with one cacheline access.


If I'm understanding the existing code correctly,
int dirty_flags can be combined, like VGA + MIGRATION.
If we only have to worry about a single dirty flag, I agree with 
your idea.


From a quick grep it seems flags are not combined, except for 
something strange with CODE_DIRTY_FLAG:


static void notdirty_mem_writel(void *opaque, target_phys_addr_t 
ram_addr,

uint32_t val)
{
int dirty_flags;
dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
if (!(dirty_flags  CODE_DIRTY_FLAG)) {
#if !defined(CONFIG_USER_ONLY)
tb_invalidate_phys_page_fast(ram_addr, 4);
dirty_flags = phys_ram_dirty[ram_addr  TARGET_PAGE_BITS];
#endif
}
stl_p(qemu_get_ram_ptr(ram_addr), val);
dirty_flags |= (0xff  ~CODE_DIRTY_FLAG);
phys_ram_dirty[ram_addr  TARGET_PAGE_BITS] = dirty_flags;
/* we remove the notdirty callback only if the code has been
   flushed */
if (dirty_flags == 0xff)
tlb_set_dirty(cpu_single_env, cpu_single_env-mem_io_vaddr);
}


I can't say I understand what it does.


The semantics of CODE_DIRTY_FLAG are a little counter intuitive.  
CODE_DIRTY_FLAG means that we know that something isn't code so 
writes do not need checking for self modifying code.


So the hardware equivalent is, when the Instruction TLB loads a page 
address, clear CODE_DIRTY_FLAG?


Yes, and is what tlb_protect_code() does and it's called from 
tb_alloc_page() which is what's code when a TB is created.


Regards,

Anthony Liguori

[Qemu-devel] [PULL] e100/pci fixes

2010-03-16 Thread Michael S. Tsirkin

The following changes since commit cb66ffcf9e298dc1bfc11682172ff9472bcd4495:
  Kevin Wolf (1):
qemu-img rebase: Document -f option

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mst/qemu.git pci

Michael S. Tsirkin (2):
  eepro100: address pci todo's, use pci_set_xx
  pcnet: make subsystem vendor id match hardware

 hw/eepro100.c |   94 +++-
 hw/pcnet.c|3 ++
 2 files changed, 35 insertions(+), 62 deletions(-)

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Anthony Liguori


On 03/16/2010 05:45 AM, Daniel P. Berrange wrote:

On Tue, Mar 16, 2010 at 12:38:02PM +0200, Avi Kivity wrote:
   

On 03/16/2010 12:31 PM, Daniel P. Berrange wrote:
 

Polling loops are an indication that something is wrong.

 

Except when people suggest they are the right answer, qcow high
watermark ;-P

   

I liked Anthony's suggestion of an lvm2 block format driver.  No polling.
 

Doesn't that require giving QEMU privileges to perform LVM operations which
implies QEMU having CAP_SYS_ADMIN  ?
   


If QEMU is able to resize an LVM partition, it needs to carry privileges.

I'm not sure how this can be done safely in a lesser privileged 
environment.  Presumably, you're over committing storage and there's not 
much you can do if the guests exhaust their storage all at once.


Regards,

Anthony Liguori


Daniel

[Qemu-devel] Re: [PATCHv4 09/12] vhost: vhost net support

2010-03-16 Thread Michael S. Tsirkin

On Mon, Mar 08, 2010 at 11:50:23AM +0530, Amit Shah wrote:
 On (Sat) Mar 06 2010 [21:06:35], Michael S. Tsirkin wrote:
  
+r = vhost_virtqueue_set_addr(dev, vq, idx, dev-log_enabled);
+if (r  0) {
+r = -errno;
+goto fail_alloc;
+}
+if (!vdev-binding-guest_notifier || 
!vdev-binding-host_notifier) {
+fprintf(stderr, binding does not support irqfd/queuefd\n);
+r = -ENOSYS;
+goto fail_alloc;
+}
   
   This could be checked much earlier on in the function; so that we avoid
   doing all that stuff above and the cleanup.
  
  Whatever order we put checks in, we'll have to undo stuff
  done beforehand on error.
 
 Not if you do this check before any ioctls or allocations.
 !vdev-binding-guest_notifier is not dependent on anything you do above
 it in this function, so just checking for this first thing in the
 function will not need any cleanup.
 
 
   Amit

Yes, but I think it's clearer to do check function just before
calling it. No?

[Qemu-devel] [PATCH 0/4] tcg-hppa git it working, v2

2010-03-16 Thread Richard Henderson

Changes since v2:
  * Fix cpu_signal_handler.

At this point the port passes all of the integer gcc tests for
i386-linux-uclibc.  Many of the fp tests fail because target-i386
is horribly confused about how to represent the fpu when not
being built on a i386 host.  I briefly tried to force the use
of the floatx80 format, but there's a whole tangle of follow-on
errors in cpu.h and the helper files.

I've been considering putting together the parts for an arm or
mips userland to avoid this problem entirely, but havn't quite
gotten that far.


r~


Richard Henderson (4):
  tcg-hppa: Fix const errors in hppa-dis.c.
  tcg-hppa: Fix 64-bit argument ordering.
  tcg-hppa: Finish the port.
  tcg-hppa: Compute is_write in cpu_signal_handler.

 configure |5 +-
 cpu-exec.c|   38 +-
 hppa-dis.c|4 +-
 tcg/hppa/tcg-target.c | 1846 +++--
 tcg/hppa/tcg-target.h |  142 +---
 tcg/tcg.c |   12 +-
 6 files changed, 1387 insertions(+), 660 deletions(-)

[Qemu-devel] [PATCH 1/4] tcg-hppa: Fix const errors in hppa-dis.c.

2010-03-16 Thread Richard Henderson

---
 hppa-dis.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hppa-dis.c b/hppa-dis.c
index 9d96d72..49f99c8 100644
--- a/hppa-dis.c
+++ b/hppa-dis.c
@@ -576,7 +576,7 @@ struct pa_opcode
 const char *name;
 unsigned long int match;   /* Bits that must be set...  */
 unsigned long int mask;/* ... in these bits. */
-char *args;
+const char *args;
 enum pa_arch arch;
 char flags;
 };
@@ -2753,7 +2753,7 @@ print_insn_hppa (bfd_vma memaddr, disassemble_info *info)
int sf = GET_FIELD (insn, 19, 20);
const char * const * source = float_format_names;
const char * const * dest = float_format_names;
-   char *t = ;
+   const char *t = ;
 
if (sub == 4)
  {
-- 
1.6.6.1

[Qemu-devel] [PATCH 4/4] tcg-hppa: Compute is_write in cpu_signal_handler.

2010-03-16 Thread Richard Henderson

---
 cpu-exec.c |   38 +++---
 1 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index bcfcda2..14204f4 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -1193,15 +1193,39 @@ int cpu_signal_handler(int host_signum, void *pinfo,
 {
 struct siginfo *info = pinfo;
 struct ucontext *uc = puc;
-unsigned long pc;
-int is_write;
+unsigned long pc = uc-uc_mcontext.sc_iaoq[0];
+uint32_t insn = *(uint32_t *)pc;
+int is_write = 0;
+
+/* XXX: need kernel patch to get write flag faster.  */
+switch (insn  26) {
+case 0x1a: /* STW */
+case 0x19: /* STH */
+case 0x18: /* STB */
+case 0x1b: /* STWM */
+is_write = 1;
+break;
+
+case 0x09: /* CSTWX, FSTWX, FSTWS */
+case 0x0b: /* CSTDX, FSTDX, FSTDS */
+/* Distinguish from coprocessor load ... */
+is_write = (insn  9)  1;
+break;
+
+case 0x03:
+switch ((insn  6)  15) {
+case 0xa: /* STWS */
+case 0x9: /* STHS */
+case 0x8: /* STBS */
+case 0xe: /* STWAS */
+case 0xc: /* STBYS */
+is_write = 1;
+}
+break;
+}
 
-pc = uc-uc_mcontext.sc_iaoq[0];
-/* FIXME: compute is_write */
-is_write = 0;
 return handle_cpu_signal(pc, (unsigned long)info-si_addr, 
- is_write,
- uc-uc_sigmask, puc);
+ is_write, uc-uc_sigmask, puc);
 }
 
 #else
-- 
1.6.6.1

[Qemu-devel] [PATCH 2/4] tcg-hppa: Fix 64-bit argument ordering.

2010-03-16 Thread Richard Henderson

---
 tcg/tcg.c |   12 +++-
 1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 1818868..d753149 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -596,7 +596,17 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned 
int flags,
 real_args++;
 }
 #endif
-#ifdef TCG_TARGET_WORDS_BIGENDIAN
+   /* If stack grows up, then we will be placing successive
+  arguments at lower addresses, which means we need to
+  reverse the order compared to how we would normally
+  treat either big or little-endian.  For those arguments
+  that will wind up in registers, this still works for
+  HPPA (the only current STACK_GROWSUP target) since the
+  argument registers are *also* allocated in decreasing
+  order.  If another such target is added, this logic may
+  have to get more complicated to differentiate between
+  stack arguments and register arguments.  */
+#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
 *gen_opparam_ptr++ = args[i] + 1;
 *gen_opparam_ptr++ = args[i];
 #else
-- 
1.6.6.1

[Qemu-devel] Re: KVM call agenda for Mar 16

2010-03-16 Thread Anthony Liguori


On 03/16/2010 10:23 AM, Daniel P. Berrange wrote:

In the context of the RHEV management application, iSCSI/SCSI Fibre are
providing the raw storage, with LVM VGs on top and the carving LVs for
the guests. In the common case the admin/app would monitor VG usage  LV
rate of increase to ensure extra space was available in the VG ahead of
it being needed. eg if the VG comes close to exhaustion then further LUNS
can be obtained and added as PVs to the LVM volume group. So you can't
guarentee that a VM won't stop on ENOSPC, but it is very unlikely if the
system is operating correctly.

As an added complication, since cluster-LVM isn't used, all LVM operations
have to be performed on a dedicated/exclusive storage host and then metadata
refreshed/propagated to other hosts running VMs. This last issue implies that
letting QEMU resize its LV would never be possible, even if it were not for
the permissions problem.
   


Sounds like a good argument for polling :-)

Regards,

Anthony Liguori


Regards,
Daniel

[Qemu-devel] [PATCH 0/5] target-alpha improvements

2010-03-16 Thread Richard Henderson

The major thing here is the addition of the CONST/PURE markers
to the helper functions.  In the process of studying the helper
implmentations to see how each ought to get marked, there were
several follow-on improvements that caught my eye.


r~


Richard Henderson (5):
  target-alpha: Add flags markups to helpers.h.
  target-alpha: Implement cpys{,n,e} inline.
  target-alpha: Implement rs/rc properly.
  target-alpha: Implement cvtql inline.
  target-alpha: Implement cvtlq inline.

 linux-user/main.c|5 ++
 target-alpha/helper.h|  179 +
 target-alpha/op_helper.c |   73 +--
 target-alpha/translate.c |  163 ++
 4 files changed, 239 insertions(+), 181 deletions(-)

[Qemu-devel] [PATCH 1/5] target-alpha: Add flags markups to helpers.h.

2010-03-16 Thread Richard Henderson

Almost all alpha helpers are at least TCG_CALL_CONST
and a fair few are also TCG_CALL_PURE.

Signed-off-by: Richard Henderson r...@twiddle.net
---
 target-alpha/helper.h |  184 
 1 files changed, 92 insertions(+), 92 deletions(-)

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index 79cf375..a508077 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -1,9 +1,9 @@
 #include def-helper.h
 
 DEF_HELPER_2(excp, void, int, int)
-DEF_HELPER_0(load_pcc, i64)
-DEF_HELPER_0(rc, i64)
-DEF_HELPER_0(rs, i64)
+DEF_HELPER_FLAGS_0(load_pcc, TCG_CALL_CONST | TCG_CALL_PURE, i64)
+DEF_HELPER_FLAGS_0(rc, TCG_CALL_CONST, i64)
+DEF_HELPER_FLAGS_0(rs, TCG_CALL_CONST, i64)
 
 DEF_HELPER_2(addqv, i64, i64, i64)
 DEF_HELPER_2(addlv, i64, i64, i64)
@@ -11,98 +11,98 @@ DEF_HELPER_2(subqv, i64, i64, i64)
 DEF_HELPER_2(sublv, i64, i64, i64)
 DEF_HELPER_2(mullv, i64, i64, i64)
 DEF_HELPER_2(mulqv, i64, i64, i64)
-DEF_HELPER_2(umulh, i64, i64, i64)
-
-DEF_HELPER_1(ctpop, i64, i64)
-DEF_HELPER_1(ctlz, i64, i64)
-DEF_HELPER_1(cttz, i64, i64)
-
-DEF_HELPER_2(zap, i64, i64, i64)
-DEF_HELPER_2(zapnot, i64, i64, i64)
-
-DEF_HELPER_2(cmpbge, i64, i64, i64)
-
-DEF_HELPER_2(minub8, i64, i64, i64)
-DEF_HELPER_2(minsb8, i64, i64, i64)
-DEF_HELPER_2(minuw4, i64, i64, i64)
-DEF_HELPER_2(minsw4, i64, i64, i64)
-DEF_HELPER_2(maxub8, i64, i64, i64)
-DEF_HELPER_2(maxsb8, i64, i64, i64)
-DEF_HELPER_2(maxuw4, i64, i64, i64)
-DEF_HELPER_2(maxsw4, i64, i64, i64)
-DEF_HELPER_2(perr, i64, i64, i64)
-DEF_HELPER_1(pklb, i64, i64)
-DEF_HELPER_1(pkwb, i64, i64)
-DEF_HELPER_1(unpkbl, i64, i64)
-DEF_HELPER_1(unpkbw, i64, i64)
-
-DEF_HELPER_0(load_fpcr, i64)
-DEF_HELPER_1(store_fpcr, void, i64)
-
-DEF_HELPER_1(f_to_memory, i32, i64)
-DEF_HELPER_1(memory_to_f, i64, i32)
-DEF_HELPER_2(addf, i64, i64, i64)
-DEF_HELPER_2(subf, i64, i64, i64)
-DEF_HELPER_2(mulf, i64, i64, i64)
-DEF_HELPER_2(divf, i64, i64, i64)
-DEF_HELPER_1(sqrtf, i64, i64)
-
-DEF_HELPER_1(g_to_memory, i64, i64)
-DEF_HELPER_1(memory_to_g, i64, i64)
-DEF_HELPER_2(addg, i64, i64, i64)
-DEF_HELPER_2(subg, i64, i64, i64)
-DEF_HELPER_2(mulg, i64, i64, i64)
-DEF_HELPER_2(divg, i64, i64, i64)
-DEF_HELPER_1(sqrtg, i64, i64)
-
-DEF_HELPER_1(s_to_memory, i32, i64)
-DEF_HELPER_1(memory_to_s, i64, i32)
-DEF_HELPER_2(adds, i64, i64, i64)
-DEF_HELPER_2(subs, i64, i64, i64)
-DEF_HELPER_2(muls, i64, i64, i64)
-DEF_HELPER_2(divs, i64, i64, i64)
-DEF_HELPER_1(sqrts, i64, i64)
-
-DEF_HELPER_2(addt, i64, i64, i64)
-DEF_HELPER_2(subt, i64, i64, i64)
-DEF_HELPER_2(mult, i64, i64, i64)
-DEF_HELPER_2(divt, i64, i64, i64)
-DEF_HELPER_1(sqrtt, i64, i64)
-
-DEF_HELPER_2(cmptun, i64, i64, i64)
-DEF_HELPER_2(cmpteq, i64, i64, i64)
-DEF_HELPER_2(cmptle, i64, i64, i64)
-DEF_HELPER_2(cmptlt, i64, i64, i64)
-DEF_HELPER_2(cmpgeq, i64, i64, i64)
-DEF_HELPER_2(cmpgle, i64, i64, i64)
-DEF_HELPER_2(cmpglt, i64, i64, i64)
-
-DEF_HELPER_2(cpys, i64, i64, i64)
-DEF_HELPER_2(cpysn, i64, i64, i64)
-DEF_HELPER_2(cpyse, i64, i64, i64)
-
-DEF_HELPER_1(cvtts, i64, i64)
-DEF_HELPER_1(cvtst, i64, i64)
-DEF_HELPER_1(cvtqs, i64, i64)
-DEF_HELPER_1(cvtqt, i64, i64)
-DEF_HELPER_1(cvtqf, i64, i64)
-DEF_HELPER_1(cvtgf, i64, i64)
-DEF_HELPER_1(cvtgq, i64, i64)
-DEF_HELPER_1(cvtqg, i64, i64)
-DEF_HELPER_1(cvtlq, i64, i64)
-
-DEF_HELPER_1(cvttq, i64, i64)
-DEF_HELPER_1(cvttq_c, i64, i64)
-DEF_HELPER_1(cvttq_svic, i64, i64)
-
-DEF_HELPER_1(cvtql, i64, i64)
+DEF_HELPER_FLAGS_2(umulh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_1(ctpop, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(ctlz, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(cttz, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+
+DEF_HELPER_FLAGS_2(zap, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(zapnot, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(cmpbge, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(minub8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(minsb8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(minuw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(minsw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxub8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxsb8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxuw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxsw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(perr, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(pklb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(pkwb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(unpkbl, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(unpkbw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+
+DEF_HELPER_FLAGS_0(load_fpcr, TCG_CALL_CONST | TCG_CALL_PURE, i64)
+DEF_HELPER_FLAGS_1(store_fpcr,

[Qemu-devel] [PATCH 2/5] target-alpha: Implement cpys{, n, e} inline.

2010-03-16 Thread Richard Henderson

Signed-off-by: Richard Henderson r...@twiddle.net
---
 target-alpha/helper.h|4 --
 target-alpha/op_helper.c |   18 --
 target-alpha/translate.c |   78 +++--
 3 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index a508077..8e11304 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -77,10 +77,6 @@ DEF_HELPER_FLAGS_2(cmpgeq, TCG_CALL_CONST | TCG_CALL_PURE, 
i64, i64, i64)
 DEF_HELPER_FLAGS_2(cmpgle, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(cmpglt, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(cpys, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(cpysn, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(cpyse, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-
 DEF_HELPER_FLAGS_1(cvtts, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvtst, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvtqs, TCG_CALL_CONST, i64, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index 4d2c2ee..2419dc4 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -921,24 +921,6 @@ uint64_t helper_sqrtt (uint64_t a)
 return float64_to_t(fr);
 }
 
-
-/* Sign copy */
-uint64_t helper_cpys(uint64_t a, uint64_t b)
-{
-return (a  0x8000ULL) | (b  ~0x8000ULL);
-}
-
-uint64_t helper_cpysn(uint64_t a, uint64_t b)
-{
-return ((~a)  0x8000ULL) | (b  ~0x8000ULL);
-}
-
-uint64_t helper_cpyse(uint64_t a, uint64_t b)
-{
-return (a  0xFFF0ULL) | (b  ~0xFFF0ULL);
-}
-
-
 /* Comparisons */
 uint64_t helper_cmptun (uint64_t a, uint64_t b)
 {
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 719b423..b677378 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -741,6 +741,80 @@ static inline void glue(gen_f, name)(DisasContext *ctx,
 \
 IEEE_INTCVT(cvtqs)
 IEEE_INTCVT(cvtqt)
 
+static void gen_cpys_internal(int ra, int rb, int rc, int inv_a, uint64_t mask)
+{
+TCGv va, vb, vmask;
+int za = 0, zb = 0;
+
+if (unlikely(rc == 31)) {
+return;
+}
+
+vmask = tcg_const_i64(mask);
+
+TCGV_UNUSED_I64(va);
+if (ra == 31) {
+if (inv_a) {
+va = vmask;
+} else {
+za = 1;
+}
+} else {
+va = tcg_temp_new_i64();
+tcg_gen_mov_i64(va, cpu_fir[ra]);
+if (inv_a) {
+tcg_gen_not_i64(va, va);
+}
+tcg_gen_and_i64(va, va, vmask);
+}
+
+TCGV_UNUSED_I64(vb);
+if (rb == 31) {
+zb = 1;
+} else {
+vb = tcg_temp_new_i64();
+tcg_gen_andc_i64(vb, cpu_fir[rb], vmask);
+}
+
+switch (za * 2 + zb) {
+case 0:
+tcg_gen_or_i64(cpu_fir[rc], va, vb);
+break;
+case 1:
+tcg_gen_mov_i64(cpu_fir[rc], va);
+break;
+case 2:
+tcg_gen_mov_i64(cpu_fir[rc], vb);
+break;
+case 3:
+tcg_gen_movi_i64(cpu_fir[rc], 0);
+break;
+}
+
+tcg_temp_free(vmask);
+if (ra != 31) {
+tcg_temp_free(va);
+}
+if (rb != 31) {
+tcg_temp_free(vb);
+}
+}
+
+static inline void gen_fcpys(int ra, int rb, int rc)
+{
+gen_cpys_internal(ra, rb, rc, 0, 0x8000ULL);
+}
+
+static inline void gen_fcpysn(int ra, int rb, int rc)
+{
+gen_cpys_internal(ra, rb, rc, 1, 0x8000ULL);
+}
+
+static inline void gen_fcpyse(int ra, int rb, int rc)
+{
+gen_cpys_internal(ra, rb, rc, 0, 0xFFF0ULL);
+}
+
 #define FARITH3(name)   \
 static inline void glue(gen_f, name)(int ra, int rb, int rc)\
 {   \
@@ -769,10 +843,6 @@ static inline void glue(gen_f, name)(int ra, int rb, int 
rc)\
 tcg_temp_free(vb);  \
 }   \
 }
-/* ??? Ought to expand these inline; simple masking operations.  */
-FARITH3(cpys)
-FARITH3(cpysn)
-FARITH3(cpyse)
 
 /* ??? VAX instruction qualifiers ignored.  */
 FARITH3(addf)
-- 
1.6.6.1

[Qemu-devel] [PATCH 4/5] target-alpha: Implement cvtql inline.

2010-03-16 Thread Richard Henderson

It's a simple mask and shift sequence.
Also, fix a typo in the actual masks used.

Signed-off-by: Richard Henderson r...@twiddle.net
---
 target-alpha/helper.h|4 
 target-alpha/op_helper.c |   20 
 target-alpha/translate.c |   45 +++--
 3 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index c378195..10c78d0 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -89,10 +89,6 @@ DEF_HELPER_FLAGS_1(cvttq, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvttq_c, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvttq_svic, TCG_CALL_CONST, i64, i64)
 
-DEF_HELPER_FLAGS_1(cvtql, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-DEF_HELPER_1(cvtql_v, i64, i64)
-DEF_HELPER_1(cvtql_sv, i64, i64)
-
 DEF_HELPER_FLAGS_1(setroundmode, TCG_CALL_CONST, void, i32)
 DEF_HELPER_FLAGS_1(setflushzero, TCG_CALL_CONST, void, i32)
 DEF_HELPER_FLAGS_0(fp_exc_clear, TCG_CALL_CONST, void)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index 84867b8..f9cd07a 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -1159,26 +1159,6 @@ uint64_t helper_cvtlq (uint64_t a)
 return (lo  0x3FFF) | (hi  0xc000);
 }
 
-uint64_t helper_cvtql (uint64_t a)
-{
-return ((a  0xC000)  32) | ((a  0x7FFF)  29);
-}
-
-uint64_t helper_cvtql_v (uint64_t a)
-{
-if ((int32_t)a != (int64_t)a)
-helper_excp(EXCP_ARITH, EXC_M_IOV);
-return helper_cvtql(a);
-}
-
-uint64_t helper_cvtql_sv (uint64_t a)
-{
-/* ??? I'm pretty sure there's nothing that /sv needs to do that /v
-   doesn't do.  The only thing I can think is that /sv is a valid
-   instruction merely for completeness in the ISA.  */
-return helper_cvtql_v(a);
-}
-
 /* PALcode support special instructions */
 #if !defined (CONFIG_USER_ONLY)
 void helper_hw_rei (void)
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 188e76c..cfdf441 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -597,6 +597,41 @@ static inline void gen_fp_exc_raise(int rc, int fn11)
 gen_fp_exc_raise_ignore(rc, fn11, fn11  QUAL_I ? 0 : float_flag_inexact);
 }
 
+static void gen_fcvtql(int rb, int rc)
+{
+if (unlikely(rc == 31)) {
+return;
+}
+if (unlikely(rb == 31)) {
+tcg_gen_movi_i64(cpu_fir[rc], 0);
+} else {
+TCGv tmp = tcg_temp_new();
+
+tcg_gen_andi_i64(tmp, cpu_fir[rb], 0xC000);
+tcg_gen_andi_i64(cpu_fir[rc], cpu_fir[rb], 0x3FFF);
+tcg_gen_shli_i64(tmp, tmp, 32);
+tcg_gen_shli_i64(cpu_fir[rc], cpu_fir[rc], 29);
+tcg_gen_or_i64(cpu_fir[rc], cpu_fir[rc], tmp);
+
+tcg_temp_free(tmp);
+}
+}
+
+static void gen_fcvtql_v(DisasContext *ctx, int rb, int rc)
+{
+if (rb != 31) {
+int lab = gen_new_label();
+TCGv tmp = tcg_temp_new();
+
+tcg_gen_ext_i32_i64(tmp, cpu_fir[rb]);
+tcg_gen_brcond_i64(TCG_COND_EQ, tmp, cpu_fir[rb], lab);
+gen_excp(ctx, EXCP_ARITH, EXC_M_IOV);
+
+gen_set_label(lab);
+}
+gen_fcvtql(rb, rc);
+}
+
 #define FARITH2(name)   \
 static inline void glue(gen_f, name)(int rb, int rc)\
 {   \
@@ -612,9 +647,6 @@ static inline void glue(gen_f, name)(int rb, int rc)\
 }   \
 }
 FARITH2(cvtlq)
-FARITH2(cvtql)
-FARITH2(cvtql_v)
-FARITH2(cvtql_sv)
 
 /* ??? VAX instruction qualifiers ignored.  */
 FARITH2(sqrtf)
@@ -2327,11 +2359,12 @@ static inline int translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0x130:
 /* CVTQL/V */
-gen_fcvtql_v(rb, rc);
-break;
 case 0x530:
 /* CVTQL/SV */
-gen_fcvtql_sv(rb, rc);
+/* ??? I'm pretty sure there's nothing that /sv needs to do that
+   /v doesn't do.  The only thing I can think is that /sv is a
+   valid instruction merely for completeness in the ISA.  */
+gen_fcvtql_v(ctx, rb, rc);
 break;
 default:
 goto invalid_opc;
-- 
1.6.6.1

[Qemu-devel] [PATCH 3/5] target-alpha: Implement rs/rc properly.

2010-03-16 Thread Richard Henderson

This is a per-cpu flag; there's no need for a spinlock of any kind.

We were also failing to manipulate the flag with $31 as a target reg
and failing to clear the flag on execution of a return-from-interrupt
instruction.

Signed-off-by: Richard Henderson r...@twiddle.net
---
 linux-user/main.c|5 +
 target-alpha/helper.h|2 --
 target-alpha/op_helper.c |   28 ++--
 target-alpha/translate.c |   19 +++
 4 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 4614e3c..d4a29cb 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2356,6 +2356,11 @@ void cpu_loop (CPUState *env)
 while (1) {
 trapnr = cpu_alpha_exec (env);
 
+   /* All of the traps imply a transition through PALcode, which
+  implies an REI instruction has been executed.  Which means
+  that the intr_flag should be cleared.  */
+   env-intr_flag = 0;
+
 switch (trapnr) {
 case EXCP_RESET:
 fprintf(stderr, Reset requested. Exit\n);
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index 8e11304..c378195 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -2,8 +2,6 @@
 
 DEF_HELPER_2(excp, void, int, int)
 DEF_HELPER_FLAGS_0(load_pcc, TCG_CALL_CONST | TCG_CALL_PURE, i64)
-DEF_HELPER_FLAGS_0(rc, TCG_CALL_CONST, i64)
-DEF_HELPER_FLAGS_0(rs, TCG_CALL_CONST, i64)
 
 DEF_HELPER_2(addqv, i64, i64, i64)
 DEF_HELPER_2(addlv, i64, i64, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index 2419dc4..84867b8 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -47,32 +47,6 @@ void helper_store_fpcr (uint64_t val)
 cpu_alpha_store_fpcr (env, val);
 }
 
-static spinlock_t intr_cpu_lock = SPIN_LOCK_UNLOCKED;
-
-uint64_t helper_rs(void)
-{
-uint64_t tmp;
-
-spin_lock(intr_cpu_lock);
-tmp = env-intr_flag;
-env-intr_flag = 1;
-spin_unlock(intr_cpu_lock);
-
-return tmp;
-}
-
-uint64_t helper_rc(void)
-{
-uint64_t tmp;
-
-spin_lock(intr_cpu_lock);
-tmp = env-intr_flag;
-env-intr_flag = 0;
-spin_unlock(intr_cpu_lock);
-
-return tmp;
-}
-
 uint64_t helper_addqv (uint64_t op1, uint64_t op2)
 {
 uint64_t tmp = op1;
@@ -1211,6 +1185,7 @@ void helper_hw_rei (void)
 {
 env-pc = env-ipr[IPR_EXC_ADDR]  ~3;
 env-ipr[IPR_EXC_ADDR] = env-ipr[IPR_EXC_ADDR]  1;
+env-intr_flag = 0;
 /* XXX: re-enable interrupts and memory mapping */
 }
 
@@ -1218,6 +1193,7 @@ void helper_hw_ret (uint64_t a)
 {
 env-pc = a  ~3;
 env-ipr[IPR_EXC_ADDR] = a  1;
+env-intr_flag = 0;
 /* XXX: re-enable interrupts and memory mapping */
 }
 
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index b677378..188e76c 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -1266,6 +1266,19 @@ static inline void gen_cmp(TCGCond cond, int ra, int rb, 
int rc, int islit,
 gen_set_label(l2);
 }
 
+static void gen_rx(int ra, int set)
+{
+TCGv_i32 tmp;
+
+if (ra != 31) {
+tcg_gen_ld8u_i64(cpu_ir[ra], cpu_env, offsetof(CPUState, intr_flag));
+}
+
+tmp = tcg_const_i32(set);
+tcg_gen_st8_i32(tmp, cpu_env, offsetof(CPUState, intr_flag));
+tcg_temp_free_i32(tmp);
+}
+
 static inline int translate_one(DisasContext *ctx, uint32_t insn)
 {
 uint32_t palcode;
@@ -2359,16 +2372,14 @@ static inline int translate_one(DisasContext *ctx, 
uint32_t insn)
 break;
 case 0xE000:
 /* RC */
-if (ra != 31)
-gen_helper_rc(cpu_ir[ra]);
+gen_rx(ra, 0);
 break;
 case 0xE800:
 /* ECB */
 break;
 case 0xF000:
 /* RS */
-if (ra != 31)
-gen_helper_rs(cpu_ir[ra]);
+gen_rx(ra, 1);
 break;
 case 0xF800:
 /* WH64 */
-- 
1.6.6.1

[Qemu-devel] [PATCH 5/5] target-alpha: Implement cvtlq inline.

2010-03-16 Thread Richard Henderson

It's a simple shift and mask sequence.

Signed-off-by: Richard Henderson r...@twiddle.net
---
 target-alpha/helper.h|1 -
 target-alpha/op_helper.c |7 ---
 target-alpha/translate.c |   21 -
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index 10c78d0..ccf6a2a 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -83,7 +83,6 @@ DEF_HELPER_FLAGS_1(cvtqf, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvtgf, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvtgq, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvtqg, TCG_CALL_CONST, i64, i64)
-DEF_HELPER_FLAGS_1(cvtlq, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
 
 DEF_HELPER_FLAGS_1(cvttq, TCG_CALL_CONST, i64, i64)
 DEF_HELPER_FLAGS_1(cvttq_c, TCG_CALL_CONST, i64, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index f9cd07a..a209130 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -1152,13 +1152,6 @@ uint64_t helper_cvtqg (uint64_t a)
 return float64_to_g(fr);
 }
 
-uint64_t helper_cvtlq (uint64_t a)
-{
-int32_t lo = a  29;
-int32_t hi = a  32;
-return (lo  0x3FFF) | (hi  0xc000);
-}
-
 /* PALcode support special instructions */
 #if !defined (CONFIG_USER_ONLY)
 void helper_hw_rei (void)
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index cfdf441..c52cac3 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -597,6 +597,26 @@ static inline void gen_fp_exc_raise(int rc, int fn11)
 gen_fp_exc_raise_ignore(rc, fn11, fn11  QUAL_I ? 0 : float_flag_inexact);
 }
 
+static void gen_fcvtlq(int rb, int rc)
+{
+if (unlikely(rc == 31)) {
+return;
+}
+if (unlikely(rb == 31)) {
+tcg_gen_movi_i64(cpu_fir[rc], 0);
+} else {
+TCGv tmp = tcg_temp_new();
+
+tcg_gen_shri_i64(tmp, cpu_fir[rb], 32);
+tcg_gen_shri_i64(cpu_fir[rc], cpu_fir[rb], 29);
+tcg_gen_andi_i64(tmp, tmp, 0xc000);
+tcg_gen_andi_i64(cpu_fir[rc], cpu_fir[rc], 0x3FFF);
+tcg_gen_or_i64(cpu_fir[rc], cpu_fir[rc], tmp);
+
+tcg_temp_free(tmp);
+}
+}
+
 static void gen_fcvtql(int rb, int rc)
 {
 if (unlikely(rc == 31)) {
@@ -646,7 +666,6 @@ static inline void glue(gen_f, name)(int rb, int rc)\
 tcg_temp_free(tmp); \
 }   \
 }
-FARITH2(cvtlq)
 
 /* ??? VAX instruction qualifiers ignored.  */
 FARITH2(sqrtf)
-- 
1.6.6.1

[Qemu-devel] [PATCH 0/4] tcg-hppa get it working, v2.1

2010-03-16 Thread Richard Henderson

Gah.  Left out --thread and -s options to format-patch.
Sorry about that.


r~



Richard Henderson (4):
  tcg-hppa: Fix const errors in hppa-dis.c.
  tcg-hppa: Fix 64-bit argument ordering.
  tcg-hppa: Finish the port.
  tcg-hppa: Compute is_write in cpu_signal_handler.

 configure |5 +-
 cpu-exec.c|   38 +-
 hppa-dis.c|4 +-
 tcg/hppa/tcg-target.c | 1846 +++--
 tcg/hppa/tcg-target.h |  142 +---
 tcg/tcg.c |   12 +-
 6 files changed, 1387 insertions(+), 660 deletions(-)

[Qemu-devel] [PATCH 1/4] tcg-hppa: Fix const errors in hppa-dis.c.

2010-03-16 Thread Richard Henderson

Signed-off-by: Richard Henderson r...@twiddle.net
---
 hppa-dis.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hppa-dis.c b/hppa-dis.c
index 9d96d72..49f99c8 100644
--- a/hppa-dis.c
+++ b/hppa-dis.c
@@ -576,7 +576,7 @@ struct pa_opcode
 const char *name;
 unsigned long int match;   /* Bits that must be set...  */
 unsigned long int mask;/* ... in these bits. */
-char *args;
+const char *args;
 enum pa_arch arch;
 char flags;
 };
@@ -2753,7 +2753,7 @@ print_insn_hppa (bfd_vma memaddr, disassemble_info *info)
int sf = GET_FIELD (insn, 19, 20);
const char * const * source = float_format_names;
const char * const * dest = float_format_names;
-   char *t = ;
+   const char *t = ;
 
if (sub == 4)
  {
-- 
1.6.6.1

[Qemu-devel] [PATCH 2/4] tcg-hppa: Fix 64-bit argument ordering.

2010-03-16 Thread Richard Henderson

Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/tcg.c |   12 +++-
 1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 1818868..d753149 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -596,7 +596,17 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned 
int flags,
 real_args++;
 }
 #endif
-#ifdef TCG_TARGET_WORDS_BIGENDIAN
+   /* If stack grows up, then we will be placing successive
+  arguments at lower addresses, which means we need to
+  reverse the order compared to how we would normally
+  treat either big or little-endian.  For those arguments
+  that will wind up in registers, this still works for
+  HPPA (the only current STACK_GROWSUP target) since the
+  argument registers are *also* allocated in decreasing
+  order.  If another such target is added, this logic may
+  have to get more complicated to differentiate between
+  stack arguments and register arguments.  */
+#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
 *gen_opparam_ptr++ = args[i] + 1;
 *gen_opparam_ptr++ = args[i];
 #else
-- 
1.6.6.1

[Qemu-devel] [PATCH 4/4] tcg-hppa: Compute is_write in cpu_signal_handler.

2010-03-16 Thread Richard Henderson

Signed-off-by: Richard Henderson r...@twiddle.net
---
 cpu-exec.c |   38 +++---
 1 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index bcfcda2..14204f4 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -1193,15 +1193,39 @@ int cpu_signal_handler(int host_signum, void *pinfo,
 {
 struct siginfo *info = pinfo;
 struct ucontext *uc = puc;
-unsigned long pc;
-int is_write;
+unsigned long pc = uc-uc_mcontext.sc_iaoq[0];
+uint32_t insn = *(uint32_t *)pc;
+int is_write = 0;
+
+/* XXX: need kernel patch to get write flag faster.  */
+switch (insn  26) {
+case 0x1a: /* STW */
+case 0x19: /* STH */
+case 0x18: /* STB */
+case 0x1b: /* STWM */
+is_write = 1;
+break;
+
+case 0x09: /* CSTWX, FSTWX, FSTWS */
+case 0x0b: /* CSTDX, FSTDX, FSTDS */
+/* Distinguish from coprocessor load ... */
+is_write = (insn  9)  1;
+break;
+
+case 0x03:
+switch ((insn  6)  15) {
+case 0xa: /* STWS */
+case 0x9: /* STHS */
+case 0x8: /* STBS */
+case 0xe: /* STWAS */
+case 0xc: /* STBYS */
+is_write = 1;
+}
+break;
+}
 
-pc = uc-uc_mcontext.sc_iaoq[0];
-/* FIXME: compute is_write */
-is_write = 0;
 return handle_cpu_signal(pc, (unsigned long)info-si_addr, 
- is_write,
- uc-uc_sigmask, puc);
+ is_write, uc-uc_sigmask, puc);
 }
 
 #else
-- 
1.6.6.1

[Qemu-devel] [PATCH QEMU] Transparent Hugepage Support #2

2010-03-16 Thread Andrea Arcangeli

From: Andrea Arcangeli aarca...@redhat.com

This will allow proper alignment so NPT/EPT can take advantage of
linux host backing the guest memory with hugepages. It also ensures
that when KVM isn't used the first 2M of guest physical memory are
backed by a large TLB. To complete it, it will also notify the kernel
that this memory is important to be backed by hugepages with madvise
(needed for both KVM and QEMU) so that hugepages can also be used in
embedded systems without any memory waste and in the future it will
allow khugepaged to prioritize the collapse of hugepages into the
madvise regions.

Ideally the max hugepage size provided by the transparent hugepage
support in the kernel should be exported by some sysfs file, but
there is no reason to expect x86_64 host to have hugepages larger than
2M or to expect those to be supported by the kernel transparent
hugepage support in the short and medium term, so we can defer the
invention of a fixed kernel API until this happens, by that time we'll
surely have a better clue of what's the best way to provide that
information to userland and it'll be a few liner change to adapt qemu
to use it so there's no hurry to do it right now. Plus the below will
keep to remain optimal and there is no risk of memory waste as virtual
memory is practically zero cost on 64bit archs.

NOTE: if the callers of qemu_ram_alloc changes significantly we may
later be required to pass a second parameter to qemu_ram_alloc that
will tell it what is the first guest physical address that corresponds
to the sized memory block being allocated. I'd defer this change for
later too as it may never be needed.

I verified this is more than enough to get the max benefit from the
kernel side feature.

cat /sys/kernel/debug/kvm/largepages 
301

Signed-off-by: Andrea Arcangeli aarca...@redhat.com
---
diff --git a/exec.c b/exec.c
index b0b6056..9552366 100644
--- a/exec.c
+++ b/exec.c
@@ -2733,11 +2733,30 @@ ram_addr_t qemu_ram_alloc(ram_addr_t size)
 PROT_EXEC|PROT_READ|PROT_WRITE,
 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 #else
-new_block-host = qemu_vmalloc(size);
+#if defined(__linux__)  defined(__x86_64__)
+#define MAX_TRANSPARENT_HUGEPAGE_SIZE (2*1024*1024)
+   if (size = MAX_TRANSPARENT_HUGEPAGE_SIZE)
+   /*
+* Align on the max transparent hugepage size so that
+* (gfn ^ pfn)  (HPAGE_SIZE-1) == 0 to allow KVM to
+* take advantage of hugepages with NPT/EPP or to
+* ensure the first 2M of the guest physical ram will
+* be mapped by the same hugetlb for QEMU (it is worth
+* it even without NPT/EPT).
+*/
+   new_block-host = qemu_memalign(MAX_TRANSPARENT_HUGEPAGE_SIZE,
+   size);
+   else
+#undef MAX_TRANSPARENT_HUGEPAGE_SIZE
+#endif 
+   new_block-host = qemu_vmalloc(size);
 #endif
 #ifdef MADV_MERGEABLE
 madvise(new_block-host, size, MADV_MERGEABLE);
 #endif
+#ifdef MADV_HUGEPAGE
+madvise(new_block-host, size, MADV_HUGEPAGE);
+#endif
 }
 new_block-offset = last_ram_offset;
 new_block-length = size;

[Qemu-devel] Re: [PATCH] pcnet: make subsystem vendor id match hardware

2010-03-16 Thread Jan Kiszka

Michael S. Tsirkin wrote:
 Real pcnet device (AT2450) apparently has subsystem
 device and vendor id set to 0, this is out of spec
 (which requires that vendor id is obtained from PCI SIG)
 but windows xp driver seems to need this in order
 to associate.
 
 qemu sets pci subsystem id to qumranet/qemu
 since d350d97d196a632b6c7493acf07a061017fc6f7d,
 debian does not yet have this patch.
 
 https://bugzilla.redhat.com/show_bug.cgi?id=521247
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 Cc: Gerd Hoffmann kra...@redhat.com
 Cc: Anthony Liguori aligu...@us.ibm.com
 ---
  hw/pcnet.c |3 +++
  1 files changed, 3 insertions(+), 0 deletions(-)
 
 diff --git a/hw/pcnet.c b/hw/pcnet.c
 index 44b5b31..12260be 100644
 --- a/hw/pcnet.c
 +++ b/hw/pcnet.c
 @@ -1997,6 +1997,9 @@ static int pci_pcnet_init(PCIDevice *pci_dev)
  pci_set_long(pci_conf + PCI_BASE_ADDRESS_0 + 4,
   PCI_BASE_ADDRESS_SPACE_MEMORY);
  
 +pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, 0x0);
 +pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, 0x0);
 +
  /* TODO: value must be 0 at RST# */
  pci_conf[PCI_INTERRUPT_PIN] = 1; // interrupt pin 0
  pci_conf[PCI_MIN_GNT] = 0x06;

No concerns from my side, still works here.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

Re: [Qemu-devel] [PATCH QEMU] Transparent Hugepage Support #2

2010-03-16 Thread Jamie Lokier

Andrea Arcangeli wrote:
 +  * take advantage of hugepages with NPT/EPP or to

Spelling: NPT/EPT?

-- Jamie

[Qemu-devel] [PATCHv5 00/11] vhost-net: upstream integration

2010-03-16 Thread Michael S. Tsirkin

Here's a patchset with vhost support for upstream qemu,
rebased to latest bits, and with all comments I'm aware of
addressed.

Please consider for merging.  Anthony, if you are still deliberating
some issues, maybe the series can be merged partially?  This will at
least reduce the amount of noise from reposting the large patchset.

Changes from v4:
  address amit's style comments: mostly renaming for clarity

Changes from v3:
  vhost: vhost net support: use typedef instead of struct name
  virtio: add set_status callback: fix up non-PCI bindings

Changes from v2:
  Addressed style comments
  Detect mapping changes and abort
  Unmap ring on cleanup

Changes from v1:
  Addressed style comments
  Migration fixes.
  Gracefully fail with non-tap backends.

Michael S. Tsirkin (11):
  tap: add interface to get device fd
  kvm: add API to set ioeventfd
  notifier: event notifier implementation
  virtio: notifier support + APIs for queue fields
  virtio: add set_status callback
  virtio: move typedef to qemu-common
  virtio-pci: fill in notifier support
  vhost: vhost net support
  tap: add vhost/vhostfd options
  tap: add API to retrieve vhost net header
  virtio-net: vhost net support

 Makefile.target  |3 +
 configure|   36 +++
 hw/event_notifier.c  |   62 +
 hw/event_notifier.h  |   16 ++
 hw/s390-virtio-bus.c |2 +-
 hw/syborg_virtio.c   |2 +-
 hw/vhost.c   |  706 ++
 hw/vhost.h   |   48 
 hw/vhost_net.c   |  198 ++
 hw/vhost_net.h   |   19 ++
 hw/virtio-net.c  |   71 +-
 hw/virtio-pci.c  |   68 +-
 hw/virtio.c  |   80 ++-
 hw/virtio.h  |   28 ++-
 kvm-all.c|   22 ++
 kvm.h|   16 ++
 net.c|8 +
 net/tap.c|   43 +++
 net/tap.h|5 +
 qemu-common.h|2 +
 qemu-options.hx  |4 +-
 21 files changed, 1429 insertions(+), 10 deletions(-)
 create mode 100644 hw/event_notifier.c
 create mode 100644 hw/event_notifier.h
 create mode 100644 hw/vhost.c
 create mode 100644 hw/vhost.h
 create mode 100644 hw/vhost_net.c
 create mode 100644 hw/vhost_net.h

[Qemu-devel] [PATCHv5 01/11] tap: add interface to get device fd

2010-03-16 Thread Michael S. Tsirkin

Will be used by vhost to attach/detach to backend.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 net/tap.c |7 +++
 net/tap.h |2 ++
 2 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/net/tap.c b/net/tap.c
index 7a7320c..fc59fd4 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -269,6 +269,13 @@ static void tap_poll(VLANClientState *nc, bool enable)
 tap_write_poll(s, enable);
 }
 
+int tap_get_fd(VLANClientState *nc)
+{
+TAPState *s = DO_UPCAST(TAPState, nc, nc);
+assert(nc-info-type == NET_CLIENT_TYPE_TAP);
+return s-fd;
+}
+
 /* fd support */
 
 static NetClientInfo net_tap_info = {
diff --git a/net/tap.h b/net/tap.h
index 538a562..a244b28 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -48,4 +48,6 @@ int tap_probe_vnet_hdr(int fd);
 int tap_probe_has_ufo(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int 
ufo);
 
+int tap_get_fd(VLANClientState *vc);
+
 #endif /* QEMU_NET_TAP_H */
-- 
1.7.0.18.g0d53a5

[Qemu-devel] [PATCHv5 03/11] notifier: event notifier implementation

2010-03-16 Thread Michael S. Tsirkin

event notifiers are slightly generalized eventfd descriptors. Current
implementation depends on eventfd because vhost is the only user, and
vhost depends on eventfd anyway, but a stub is provided for non-eventfd
case.

We'll be able to further generalize this when another user comes along
and we see how to best do this.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 Makefile.target |1 +
 hw/event_notifier.c |   62 +++
 hw/event_notifier.h |   16 +
 qemu-common.h   |1 +
 4 files changed, 80 insertions(+), 0 deletions(-)
 create mode 100644 hw/event_notifier.c
 create mode 100644 hw/event_notifier.h

diff --git a/Makefile.target b/Makefile.target
index ab3c438..004a703 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -175,6 +175,7 @@ obj-y = vl.o async.o monitor.o pci.o pci_host.o pcie_host.o 
machine.o gdbstub.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-pci.o 
virtio-serial-bus.o
+obj-y += event_notifier.o
 obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
diff --git a/hw/event_notifier.c b/hw/event_notifier.c
new file mode 100644
index 000..13f3656
--- /dev/null
+++ b/hw/event_notifier.c
@@ -0,0 +1,62 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin m...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include hw.h
+#include event_notifier.h
+#ifdef CONFIG_EVENTFD
+#include sys/eventfd.h
+#endif
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+#ifdef CONFIG_EVENTFD
+int fd = eventfd(!!active, EFD_NONBLOCK | EFD_CLOEXEC);
+if (fd  0)
+return -errno;
+e-fd = fd;
+return 0;
+#else
+return -ENOSYS;
+#endif
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+close(e-fd);
+}
+
+int event_notifier_get_fd(EventNotifier *e)
+{
+return e-fd;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+uint64_t value;
+int r = read(e-fd, value, sizeof(value));
+return r == sizeof(value);
+}
+
+int event_notifier_test(EventNotifier *e)
+{
+uint64_t value;
+int r = read(e-fd, value, sizeof(value));
+if (r == sizeof(value)) {
+/* restore previous value. */
+int s = write(e-fd, value, sizeof(value));
+/* never blocks because we use EFD_SEMAPHORE.
+ * If we didn't we'd get EAGAIN on overflow
+ * and we'd have to write code to ignore it. */
+assert(s == sizeof(value));
+}
+return r == sizeof(value);
+}
diff --git a/hw/event_notifier.h b/hw/event_notifier.h
new file mode 100644
index 000..24117ea
--- /dev/null
+++ b/hw/event_notifier.h
@@ -0,0 +1,16 @@
+#ifndef QEMU_EVENT_NOTIFIER_H
+#define QEMU_EVENT_NOTIFIER_H
+
+#include qemu-common.h
+
+struct EventNotifier {
+   int fd;
+};
+
+int event_notifier_init(EventNotifier *, int active);
+void event_notifier_cleanup(EventNotifier *);
+int event_notifier_get_fd(EventNotifier *);
+int event_notifier_test_and_clear(EventNotifier *);
+int event_notifier_test(EventNotifier *);
+
+#endif
diff --git a/qemu-common.h b/qemu-common.h
index 805be1a..f12a8f5 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -227,6 +227,7 @@ typedef struct uWireSlave uWireSlave;
 typedef struct I2SCodec I2SCodec;
 typedef struct DeviceState DeviceState;
 typedef struct SSIBus SSIBus;
+typedef struct EventNotifier EventNotifier;
 
 typedef uint64_t pcibus_t;
 
-- 
1.7.0.18.g0d53a5

[Qemu-devel] [PATCHv5 04/11] virtio: notifier support + APIs for queue fields

2010-03-16 Thread Michael S. Tsirkin

vhost needs physical addresses for ring and other queue fields,
so add APIs for these. In particular, add binding API to set
host/guest notifiers.  Will be used by vhost.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio.c |   80 ++-
 hw/virtio.h |   18 -
 2 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/hw/virtio.c b/hw/virtio.c
index 7c020a3..f54129f 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -73,6 +73,9 @@ struct VirtQueue
 int inuse;
 uint16_t vector;
 void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
+VirtIODevice *vdev;
+EventNotifier guest_notifier;
+EventNotifier host_notifier;
 };
 
 /* virt queue functions */
@@ -592,6 +595,12 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int 
queue_size,
 return vdev-vq[i];
 }
 
+void virtio_irq(VirtQueue *vq)
+{
+vq-vdev-isr |= 0x01;
+virtio_notify_vector(vq-vdev, vq-vector);
+}
+
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
 /* Always notify when queue is empty (when feature acknowledge) */
@@ -714,8 +723,10 @@ VirtIODevice *virtio_common_init(const char *name, 
uint16_t device_id,
 vdev-queue_sel = 0;
 vdev-config_vector = VIRTIO_NO_VECTOR;
 vdev-vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
-for(i = 0; i  VIRTIO_PCI_QUEUE_MAX; i++)
+for(i = 0; i  VIRTIO_PCI_QUEUE_MAX; i++) {
 vdev-vq[i].vector = VIRTIO_NO_VECTOR;
+vdev-vq[i].vdev = vdev;
+}
 
 vdev-name = name;
 vdev-config_len = config_size;
@@ -733,3 +744,70 @@ void virtio_bind_device(VirtIODevice *vdev, const 
VirtIOBindings *binding,
 vdev-binding = binding;
 vdev-binding_opaque = opaque;
 }
+
+target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
+{
+return vdev-vq[n].vring.desc;
+}
+
+target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
+{
+return vdev-vq[n].vring.avail;
+}
+
+target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
+{
+return vdev-vq[n].vring.used;
+}
+
+target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
+{
+return vdev-vq[n].vring.desc;
+}
+
+target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
+{
+return sizeof(VRingDesc) * vdev-vq[n].vring.num;
+}
+
+target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
+{
+return offsetof(VRingAvail, ring) +
+sizeof(u_int64_t) * vdev-vq[n].vring.num;
+}
+
+target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n)
+{
+return offsetof(VRingUsed, ring) +
+sizeof(VRingUsedElem) * vdev-vq[n].vring.num;
+}
+
+target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
+{
+return vdev-vq[n].vring.used - vdev-vq[n].vring.desc +
+   virtio_queue_get_used_size(vdev, n);
+}
+
+uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
+{
+return vdev-vq[n].last_avail_idx;
+}
+
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
+{
+vdev-vq[n].last_avail_idx = idx;
+}
+
+VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
+{
+return vdev-vq + n;
+}
+
+EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
+{
+return vq-guest_notifier;
+}
+EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
+{
+return vq-host_notifier;
+}
diff --git a/hw/virtio.h b/hw/virtio.h
index 3baa2a3..a074a65 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -19,6 +19,7 @@
 #include qdev.h
 #include sysemu.h
 #include block_int.h
+#include event_notifier.h
 
 /* from Linux's linux/virtio_config.h */
 
@@ -89,6 +90,8 @@ typedef struct {
 int (*load_config)(void * opaque, QEMUFile *f);
 int (*load_queue)(void * opaque, int n, QEMUFile *f);
 unsigned (*get_features)(void * opaque);
+int (*guest_notifier)(void * opaque, int n, bool assigned);
+int (*host_notifier)(void * opaque, int n, bool assigned);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 64
@@ -181,5 +184,18 @@ void virtio_net_exit(VirtIODevice *vdev);
DEFINE_PROP_BIT(indirect_desc, _state, _field, \
VIRTIO_RING_F_INDIRECT_DESC, true)
 
-
+target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
+target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
+target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n);
+target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n);
+target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n);
+target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n);
+target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n);
+target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t

[Qemu-devel] [PATCHv5 05/11] virtio: add set_status callback

2010-03-16 Thread Michael S. Tsirkin

vhost net backend needs to be notified when
frontend status changes. Add a callback,
similar to set_features.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/s390-virtio-bus.c |2 +-
 hw/syborg_virtio.c   |2 +-
 hw/virtio-pci.c  |5 +++--
 hw/virtio.h  |9 +
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index 9fc01e9..3efbaab 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -242,7 +242,7 @@ void s390_virtio_device_update_status(VirtIOS390Device *dev)
 VirtIODevice *vdev = dev-vdev;
 uint32_t features;
 
-vdev-status = ldub_phys(dev-dev_offs + VIRTIO_DEV_OFFS_STATUS);
+virtio_set_status(vdev, ldub_phys(dev-dev_offs + VIRTIO_DEV_OFFS_STATUS));
 
 /* Update guest supported feature bitmap */
 
diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 65239a0..abf0370 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -149,7 +149,7 @@ static void syborg_virtio_writel(void *opaque, 
target_phys_addr_t offset,
 virtio_queue_notify(vdev, value);
 break;
 case SYBORG_VIRTIO_STATUS:
-vdev-status = value  0xFF;
+virtio_set_status(vdev, value  0xFF);
 if (vdev-status == 0)
 virtio_reset(vdev);
 break;
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 799f664..ee67a8a 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -206,7 +206,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
 virtio_queue_notify(vdev, val);
 break;
 case VIRTIO_PCI_STATUS:
-vdev-status = val  0xFF;
+virtio_set_status(vdev, val  0xFF);
 if (vdev-status == 0) {
 virtio_reset(proxy-vdev);
 msix_unuse_all_vectors(proxy-pci_dev);
@@ -377,7 +377,8 @@ static void virtio_write_config(PCIDevice *pci_dev, 
uint32_t address,
 
 if (PCI_COMMAND == address) {
 if (!(val  PCI_COMMAND_MASTER)) {
-proxy-vdev-status = ~VIRTIO_CONFIG_S_DRIVER_OK;
+virtio_set_status(proxy-vdev,
+  proxy-vdev-status  
~VIRTIO_CONFIG_S_DRIVER_OK);
 }
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index a074a65..5b07176 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -115,12 +115,21 @@ struct VirtIODevice
 void (*get_config)(VirtIODevice *vdev, uint8_t *config);
 void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
 void (*reset)(VirtIODevice *vdev);
+void (*set_status)(VirtIODevice *vdev, uint8_t val);
 VirtQueue *vq;
 const VirtIOBindings *binding;
 void *binding_opaque;
 uint16_t device_id;
 };
 
+static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val)
+{
+if (vdev-set_status) {
+vdev-set_status(vdev, val);
+}
+vdev-status = val;
+}
+
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
 void (*handle_output)(VirtIODevice *,
   VirtQueue *));
-- 
1.7.0.18.g0d53a5

[Qemu-devel] [PATCHv5 06/11] virtio: move typedef to qemu-common

2010-03-16 Thread Michael S. Tsirkin

make it possible to use type without header include,
simplifying header dependencies.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio.h   |1 -
 qemu-common.h |1 +
 2 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/virtio.h b/hw/virtio.h
index 5b07176..4a4131d 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -69,7 +69,6 @@ static inline target_phys_addr_t 
vring_align(target_phys_addr_t addr,
 }
 
 typedef struct VirtQueue VirtQueue;
-typedef struct VirtIODevice VirtIODevice;
 
 #define VIRTQUEUE_MAX_SIZE 1024
 
diff --git a/qemu-common.h b/qemu-common.h
index f12a8f5..90ca3b8 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -228,6 +228,7 @@ typedef struct I2SCodec I2SCodec;
 typedef struct DeviceState DeviceState;
 typedef struct SSIBus SSIBus;
 typedef struct EventNotifier EventNotifier;
+typedef struct VirtIODevice VirtIODevice;
 
 typedef uint64_t pcibus_t;
 
-- 
1.7.0.18.g0d53a5

[Qemu-devel] [PATCHv5 07/11] virtio-pci: fill in notifier support

2010-03-16 Thread Michael S. Tsirkin

Support host/guest notifiers in virtio-pci.
The last one only with kvm, that's okay
because vhost relies on kvm anyway.

Note on kvm usage: kvm ioeventfd API
is implemented on non-kvm systems as well,
this is the reason we don't need if (kvm_enabled())
around it.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio-pci.c |   63 +++
 1 files changed, 63 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index ee67a8a..a7e1bcb 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -24,6 +24,7 @@
 #include net.h
 #include block_int.h
 #include loader.h
+#include kvm.h
 
 /* from Linux's linux/virtio_pci.h */
 
@@ -392,6 +393,66 @@ static unsigned virtio_pci_get_features(void *opaque)
 return proxy-host_features;
 }
 
+static void virtio_pci_guest_notifier_read(void *opaque)
+{
+VirtQueue *vq = opaque;
+EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+if (event_notifier_test_and_clear(n)) {
+virtio_irq(vq);
+}
+}
+
+static int virtio_pci_guest_notifier(void *opaque, int n, bool assign)
+{
+VirtIOPCIProxy *proxy = opaque;
+VirtQueue *vq = virtio_get_queue(proxy-vdev, n);
+EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
+
+if (assign) {
+int r = event_notifier_init(notifier, 0);
+if (r  0) {
+return r;
+}
+qemu_set_fd_handler(event_notifier_get_fd(notifier),
+virtio_pci_guest_notifier_read, NULL, vq);
+} else {
+qemu_set_fd_handler(event_notifier_get_fd(notifier),
+NULL, NULL, NULL);
+event_notifier_cleanup(notifier);
+}
+
+return 0;
+}
+
+static int virtio_pci_host_notifier(void *opaque, int n, bool assign)
+{
+VirtIOPCIProxy *proxy = opaque;
+VirtQueue *vq = virtio_get_queue(proxy-vdev, n);
+EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+int r;
+if (assign) {
+r = event_notifier_init(notifier, 1);
+if (r  0) {
+return r;
+}
+r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+   proxy-addr + VIRTIO_PCI_QUEUE_NOTIFY,
+   n, assign);
+if (r  0) {
+event_notifier_cleanup(notifier);
+}
+} else {
+r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
+   proxy-addr + VIRTIO_PCI_QUEUE_NOTIFY,
+   n, assign);
+if (r  0) {
+return r;
+}
+event_notifier_cleanup(notifier);
+}
+return r;
+}
+
 static const VirtIOBindings virtio_pci_bindings = {
 .notify = virtio_pci_notify,
 .save_config = virtio_pci_save_config,
@@ -399,6 +460,8 @@ static const VirtIOBindings virtio_pci_bindings = {
 .save_queue = virtio_pci_save_queue,
 .load_queue = virtio_pci_load_queue,
 .get_features = virtio_pci_get_features,
+.host_notifier = virtio_pci_host_notifier,
+.guest_notifier = virtio_pci_guest_notifier,
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
-- 
1.7.0.18.g0d53a5

[Qemu-devel] [PATCHv5 09/11] tap: add vhost/vhostfd options

2010-03-16 Thread Michael S. Tsirkin

This adds vhost binary option to tap, to enable vhost net accelerator.
Default is off for now, we'll be able to make default on long term
when we know it's stable.

vhostfd option can be used by management, to pass in the fd. Assigning
vhostfd implies vhost=on.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 net.c   |8 
 net/tap.c   |   29 +
 qemu-options.hx |4 +++-
 3 files changed, 40 insertions(+), 1 deletions(-)

diff --git a/net.c b/net.c
index e47f727..48d9fb0 100644
--- a/net.c
+++ b/net.c
@@ -976,6 +976,14 @@ static const struct {
 .name = vnet_hdr,
 .type = QEMU_OPT_BOOL,
 .help = enable the IFF_VNET_HDR flag on the tap interface
+}, {
+.name = vhost,
+.type = QEMU_OPT_BOOL,
+.help = enable vhost-net network accelerator,
+}, {
+.name = vhostfd,
+.type = QEMU_OPT_STRING,
+.help = file descriptor of an already opened vhost net 
device,
 },
 #endif /* _WIN32 */
 { /* end of list */ }
diff --git a/net/tap.c b/net/tap.c
index fc59fd4..19c4fa2 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -41,6 +41,8 @@
 
 #include net/tap-linux.h
 
+#include hw/vhost_net.h
+
 /* Maximum GSO packet size (64k) plus plenty of room for
  * the ethernet and virtio_net headers
  */
@@ -57,6 +59,7 @@ typedef struct TAPState {
 unsigned int has_vnet_hdr : 1;
 unsigned int using_vnet_hdr : 1;
 unsigned int has_ufo: 1;
+VHostNetState *vhost_net;
 } TAPState;
 
 static int launch_script(const char *setup_script, const char *ifname, int fd);
@@ -252,6 +255,10 @@ static void tap_cleanup(VLANClientState *nc)
 {
 TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
+if (s-vhost_net) {
+vhost_net_cleanup(s-vhost_net);
+}
+
 qemu_purge_queued_packets(nc);
 
 if (s-down_script[0])
@@ -307,6 +314,7 @@ static TAPState *net_tap_fd_init(VLANState *vlan,
 s-has_ufo = tap_probe_has_ufo(s-fd);
 tap_set_offload(s-nc, 0, 0, 0, 0, 0);
 tap_read_poll(s, 1);
+s-vhost_net = NULL;
 return s;
 }
 
@@ -456,5 +464,26 @@ int net_init_tap(QemuOpts *opts, Monitor *mon, const char 
*name, VLANState *vlan
 }
 }
 
+if (qemu_opt_get_bool(opts, vhost, !!qemu_opt_get(opts, vhostfd))) {
+int vhostfd, r;
+if (qemu_opt_get(opts, vhostfd)) {
+r = net_handle_fd_param(mon, qemu_opt_get(opts, vhostfd));
+if (r == -1) {
+return -1;
+}
+vhostfd = r;
+} else {
+vhostfd = -1;
+}
+s-vhost_net = vhost_net_init(s-nc, vhostfd);
+if (!s-vhost_net) {
+qemu_error(vhost-net requested but could not be initialized\n);
+return -1;
+}
+} else if (qemu_opt_get(opts, vhostfd)) {
+qemu_error(vhostfd= is not valid without vhost\n);
+return -1;
+}
+
 return 0;
 }
diff --git a/qemu-options.hx b/qemu-options.hx
index fd50add..4d9f4da 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -895,7 +895,7 @@ DEF(net, HAS_ARG, QEMU_OPTION_net,
 -net tap[,vlan=n][,name=str],ifname=name\n
 connect the host TAP network interface to VLAN 'n'\n
 #else
--net 
tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off]\n
+-net 
tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h]\n
 connect the host TAP network interface to VLAN 'n' and 
use the\n
 network scripts 'file' (default= DEFAULT_NETWORK_SCRIPT 
)\n
 and 'dfile' (default= DEFAULT_NETWORK_DOWN_SCRIPT )\n
@@ -905,6 +905,8 @@ DEF(net, HAS_ARG, QEMU_OPTION_net,
 default of 'sndbuf=1048576' can be disabled using 
'sndbuf=0')\n
 use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap 
flag\n
 use vnet_hdr=on to make the lack of IFF_VNET_HDR support 
an error condition\n
+use vhost=on to enable experimental in kernel 
accelerator\n
+use 'vhostfd=h' to connect to an already opened vhost net 
device\n
 #endif
 -net 
socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n
 connect the vlan 'n' to another VLAN using a socket 
connection\n
-- 
1.7.0.18.g0d53a5

[Qemu-devel] [PATCHv5 10/11] tap: add API to retrieve vhost net header

2010-03-16 Thread Michael S. Tsirkin

will be used by virtio-net for vhost net support

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 net/tap.c |7 +++
 net/tap.h |3 +++
 2 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/net/tap.c b/net/tap.c
index 19c4fa2..35c05d7 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -487,3 +487,10 @@ int net_init_tap(QemuOpts *opts, Monitor *mon, const char 
*name, VLANState *vlan
 
 return 0;
 }
+
+VHostNetState *tap_get_vhost_net(VLANClientState *nc)
+{
+TAPState *s = DO_UPCAST(TAPState, nc, nc);
+assert(nc-info-type == NET_CLIENT_TYPE_TAP);
+return s-vhost_net;
+}
diff --git a/net/tap.h b/net/tap.h
index a244b28..b8cec83 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -50,4 +50,7 @@ void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, 
int ecn, int ufo);
 
 int tap_get_fd(VLANClientState *vc);
 
+struct vhost_net;
+struct vhost_net *tap_get_vhost_net(VLANClientState *vc);
+
 #endif /* QEMU_NET_TAP_H */
-- 
1.7.0.18.g0d53a5

[Qemu-devel] [PATCHv5 11/11] virtio-net: vhost net support

2010-03-16 Thread Michael S. Tsirkin

This connects virtio-net to vhost net backend.
The code is structured in a way analogous to what we have with vnet
header capability in tap.

We start/stop backend on driver start/stop as
well as on save and vm start (for migration).

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio-net.c |   71 +-
 1 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 5c0093e..9ddd58c 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -17,6 +17,7 @@
 #include net/tap.h
 #include qemu-timer.h
 #include virtio-net.h
+#include vhost_net.h
 
 #define VIRTIO_NET_VM_VERSION11
 
@@ -47,6 +48,8 @@ typedef struct VirtIONet
 uint8_t nomulti;
 uint8_t nouni;
 uint8_t nobcast;
+uint8_t vhost_started;
+VMChangeStateEntry *vmstate;
 struct {
 int in_use;
 int first_multi;
@@ -114,6 +117,10 @@ static void virtio_net_reset(VirtIODevice *vdev)
 n-nomulti = 0;
 n-nouni = 0;
 n-nobcast = 0;
+if (n-vhost_started) {
+vhost_net_stop(tap_get_vhost_net(n-nic-nc.peer), vdev);
+n-vhost_started = 0;
+}
 
 /* Flush any MAC and VLAN filter table state */
 n-mac_table.in_use = 0;
@@ -172,7 +179,14 @@ static uint32_t virtio_net_get_features(VirtIODevice 
*vdev, uint32_t features)
 features = ~(0x1  VIRTIO_NET_F_HOST_UFO);
 }
 
-return features;
+if (!n-nic-nc.peer ||
+n-nic-nc.peer-info-type != NET_CLIENT_TYPE_TAP) {
+return features;
+}
+if (!tap_get_vhost_net(n-nic-nc.peer)) {
+return features;
+}
+return vhost_net_get_features(tap_get_vhost_net(n-nic-nc.peer), 
features);
 }
 
 static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
@@ -698,6 +712,12 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
 {
 VirtIONet *n = opaque;
 
+if (n-vhost_started) {
+/* TODO: should we really stop the backend?
+ * If we don't, it might keep writing to memory. */
+vhost_net_stop(tap_get_vhost_net(n-nic-nc.peer), n-vdev);
+n-vhost_started = 0;
+}
 virtio_save(n-vdev, f);
 
 qemu_put_buffer(f, n-mac, ETH_ALEN);
@@ -810,7 +830,6 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int 
version_id)
 qemu_mod_timer(n-tx_timer,
qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
 }
-
 return 0;
 }
 
@@ -830,6 +849,47 @@ static NetClientInfo net_virtio_info = {
 .link_status_changed = virtio_net_set_link_status,
 };
 
+static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
+{
+VirtIONet *n = to_virtio_net(vdev);
+if (!n-nic-nc.peer) {
+return;
+}
+if (n-nic-nc.peer-info-type != NET_CLIENT_TYPE_TAP) {
+return;
+}
+
+if (!tap_get_vhost_net(n-nic-nc.peer)) {
+return;
+}
+if (!!n-vhost_started == !!(status  VIRTIO_CONFIG_S_DRIVER_OK)) {
+return;
+}
+if (status  VIRTIO_CONFIG_S_DRIVER_OK) {
+int r = vhost_net_start(tap_get_vhost_net(n-nic-nc.peer), vdev);
+if (r  0) {
+fprintf(stderr, unable to start vhost net: %d: 
+falling back on userspace virtio\n, -r);
+} else {
+n-vhost_started = 1;
+}
+} else {
+vhost_net_stop(tap_get_vhost_net(n-nic-nc.peer), vdev);
+n-vhost_started = 0;
+}
+}
+
+static void virtio_net_vmstate_change(void *opaque, int running, int reason)
+{
+VirtIONet *n = opaque;
+if (!running) {
+return;
+}
+/* This is called when vm is started, it will start vhost backend if
+ * appropriate e.g. after migration. */
+virtio_net_set_status(n-vdev, n-vdev.status);
+}
+
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
 {
 VirtIONet *n;
@@ -845,6 +905,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf 
*conf)
 n-vdev.set_features = virtio_net_set_features;
 n-vdev.bad_features = virtio_net_bad_features;
 n-vdev.reset = virtio_net_reset;
+n-vdev.set_status = virtio_net_set_status;
 n-rx_vq = virtio_add_queue(n-vdev, 256, virtio_net_handle_rx);
 n-tx_vq = virtio_add_queue(n-vdev, 256, virtio_net_handle_tx);
 n-ctrl_vq = virtio_add_queue(n-vdev, 64, virtio_net_handle_ctrl);
@@ -867,6 +928,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf 
*conf)
 
 register_savevm(virtio-net, virtio_net_id++, VIRTIO_NET_VM_VERSION,
 virtio_net_save, virtio_net_load, n);
+n-vmstate = qemu_add_vm_change_state_handler(virtio_net_vmstate_change, 
n);
 
 return n-vdev;
 }
@@ -874,6 +936,11 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf 
*conf)
 void virtio_net_exit(VirtIODevice *vdev)
 {
 VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
+qemu_del_vm_change_state_handler(n-vmstate);
+
+if (n-vhost_started) {
+vhost_net_stop(tap_get_vhost_net(n-nic-nc.peer), vdev);
+}

[Qemu-devel] [PATCHv5 08/11] vhost: vhost net support

2010-03-16 Thread Michael S. Tsirkin

This adds vhost net device support in qemu. Will be tied to tap device
and virtio by following patches.  Raw backend is currently missing,
will be worked on/submitted separately.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 Makefile.target |2 +
 configure   |   36 +++
 hw/vhost.c  |  706 +++
 hw/vhost.h  |   48 
 hw/vhost_net.c  |  198 
 hw/vhost_net.h  |   19 ++
 6 files changed, 1009 insertions(+), 0 deletions(-)
 create mode 100644 hw/vhost.c
 create mode 100644 hw/vhost.h
 create mode 100644 hw/vhost_net.c
 create mode 100644 hw/vhost_net.h

diff --git a/Makefile.target b/Makefile.target
index 004a703..ea5207c 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -176,6 +176,8 @@ obj-y = vl.o async.o monitor.o pci.o pci_host.o pcie_host.o 
machine.o gdbstub.o
 # need to fix this properly
 obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-pci.o 
virtio-serial-bus.o
 obj-y += event_notifier.o
+obj-y += vhost_net.o
+obj-$(CONFIG_VHOST_NET) += vhost.o
 obj-y += rwhandler.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
diff --git a/configure b/configure
index d728799..ebfc774 100755
--- a/configure
+++ b/configure
@@ -87,6 +87,7 @@ libs_softmmu=
 libs_tools=
 audio_pt_int=
 audio_win_int=
+audio_win_int=
 
 # parse CC options first
 for opt do
@@ -263,6 +264,7 @@ vnc_tls=
 vnc_sasl=
 xen=
 linux_aio=
+vhost_net=
 
 gprof=no
 debug_tcg=no
@@ -651,6 +653,10 @@ for opt do
   ;;
   --enable-docs) docs=yes
   ;;
+  --disable-vhost-net) vhost_net=no
+  ;;
+  --enable-vhost-net) vhost_net=yes
+  ;;
   *) echo ERROR: unknown option $opt; show_help=yes
   ;;
   esac
@@ -1498,6 +1504,32 @@ EOF
 fi
 
 ##
+# test for vhost net
+
+if test $vhost_net != no; then
+if test $kvm != no; then
+cat  $TMPC EOF
+#include linux/vhost.h
+int main(void) { return 0; }
+EOF
+if compile_prog $kvm_cflags  ; then
+vhost_net=yes
+else
+if $vhost_net == yes ; then
+feature_not_found vhost-net
+fi
+vhost_net=no
+fi
+else
+if $vhost_net == yes ; then
+echo -e NOTE: vhost-net feature requires KVM (--enable-kvm).
+feature_not_found vhost-net
+fi
+vhost_net=no
+fi
+fi
+
+##
 # pthread probe
 PTHREADLIBS_LIST=-lpthread -lpthreadGC2
 
@@ -1968,6 +2000,7 @@ echo fdt support   $fdt
 echo preadv support$preadv
 echo fdatasync $fdatasync
 echo uuid support  $uuid
+echo vhost-net support $vhost_net
 
 if test $sdl_too_old = yes; then
 echo - Your SDL version is too old - please upgrade to have SDL support
@@ -2492,6 +2525,9 @@ case $target_arch2 in
   if test $kvm_para = yes; then
 echo CONFIG_KVM_PARA=y  $config_target_mak
   fi
+  if test $vhost_net = yes ; then
+echo CONFIG_VHOST_NET=y  $config_target_mak
+  fi
 fi
 esac
 if test $target_bigendian = yes ; then
diff --git a/hw/vhost.c b/hw/vhost.c
new file mode 100644
index 000..2e93f33
--- /dev/null
+++ b/hw/vhost.c
@@ -0,0 +1,706 @@
+/*
+ * vhost support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin m...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include linux/vhost.h
+#include sys/ioctl.h
+#include sys/eventfd.h
+#include vhost.h
+#include hw/hw.h
+/* For range_get_last */
+#include pci.h
+
+static void vhost_dev_sync_region(struct vhost_dev *dev,
+  uint64_t mfirst, uint64_t mlast,
+  uint64_t rfirst, uint64_t rlast)
+{
+uint64_t start = MAX(mfirst, rfirst);
+uint64_t end = MIN(mlast, rlast);
+vhost_log_chunk_t *from = dev-log + start / VHOST_LOG_CHUNK;
+vhost_log_chunk_t *to = dev-log + end / VHOST_LOG_CHUNK + 1;
+uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
+
+assert(end / VHOST_LOG_CHUNK  dev-log_size);
+assert(start / VHOST_LOG_CHUNK  dev-log_size);
+if (end  start) {
+return;
+}
+for (;from  to; ++from) {
+vhost_log_chunk_t log;
+int bit;
+/* We first check with non-atomic: much cheaper,
+ * and we expect non-dirty to be the common case. */
+if (!*from) {
+continue;
+}
+/* Data must be read atomically. We don't really
+ * need the barrier semantics of __sync
+ * builtins, but it's easier to use them than
+ * roll our own. */
+log = __sync_fetch_and_and(from, 0);
+while ((bit = sizeof(log)  sizeof(int) ?
+ffsll(log) : ffs(log))) {
+bit -= 1;
+cpu_physical_memory_set_dirty(addr + bit *

[Qemu-devel] [PATCHv5 02/11] kvm: add API to set ioeventfd

2010-03-16 Thread Michael S. Tsirkin

Comment on kvm usage: rather than require users to do if (kvm_enabled())
and/or ifdefs, this patch adds an API that, internally, is defined to
stub function on non-kvm build, and checks kvm_enabled for non-kvm
run.

While rest of qemu code still uses if (kvm_enabled()), I think this
approach is cleaner, and we should convert rest of code to it
long term.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 kvm-all.c |   22 ++
 kvm.h |   16 
 2 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index 534ead0..f427f73 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1153,3 +1153,25 @@ int kvm_set_signal_mask(CPUState *env, const sigset_t 
*sigset)
 
 return r;
 }
+
+#ifdef KVM_IOEVENTFD
+int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool 
assign)
+{
+struct kvm_ioeventfd kick = {
+.datamatch = val,
+.addr = addr,
+.len = 2,
+.flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
+.fd = fd,
+};
+int r;
+if (!kvm_enabled())
+return -ENOSYS;
+if (!assign)
+kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
+r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, kick);
+if (r  0)
+return r;
+return 0;
+}
+#endif
diff --git a/kvm.h b/kvm.h
index fd8d0c1..2dfcb15 100644
--- a/kvm.h
+++ b/kvm.h
@@ -14,10 +14,16 @@
 #ifndef QEMU_KVM_H
 #define QEMU_KVM_H
 
+#include stdbool.h
+#include errno.h
 #include config.h
 #include qemu-queue.h
 
 #ifdef CONFIG_KVM
+#include linux/kvm.h
+#endif
+
+#ifdef CONFIG_KVM
 extern int kvm_allowed;
 
 #define kvm_enabled() (kvm_allowed)
@@ -161,4 +167,14 @@ static inline void cpu_synchronize_post_init(CPUState *env)
 }
 }
 
+#if defined(KVM_IOEVENTFD)  defined(CONFIG_KVM)
+int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool 
assign);
+#else
+static inline
+int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign)
+{
+return -ENOSYS;
+}
+#endif
+
 #endif
-- 
1.7.0.18.g0d53a5

Re: [Qemu-devel] wake-on-lan IPMI implementation; real power-off and -no-shutdown

2010-03-16 Thread Jamie Lokier

 The semantics of -no-shutdown are awful.
 
 I'd personally prefer to see the option deprecated and a new set of 
 options introduced with clearer semantics.
 
 Currently, -no-shutdown does too many things.  It affects reboot 
 behaviour, shutdown behaviour, the behavior of the SDL close button.  
 Each of these things should be individual tunables.

I'm not sure about -no-shutdown, but I've had some problems with -no-reboot,
which I use for semi-automated OS installations.

I use -no-reboot so that when the guest does a reboot during
installation, as they invariably do one or more times, QEMU exits, my
scripts does things eject the CD/floppy, or change it for the next in
sequence, and modify the guest's installed files to add virtio
drivers, install extra fiels, edit boot scripts and whatever else is
useful, and then restart QEMU.

The guest thinks it's just rebooted, but it has the virtualisation
goodies in place to run better.

Unfortunately with an MS-DOS 5.00 guest, -no-reboot does not
work.  It fails to exit QEMU; instead it just reboots.  I guess that
means a QJSON event would not be sent either.

For my use case, it would be even better if guest reboot paused the
guest and sent a QJSON event instead of having to use -no-reboot.
Then I wouldn't have to close and restart the VNC client repeatedly
during installs.

Now that we have ways to choose what kind of events and actions are
triggered by the QEMU watchdog device, it would be nice to fit guest
reboot (perhaps even the different types of reboot) / host-forced
reboot / guest powerdown / host-forced powerdown (like holding down
the power button for 5 seconds on a real PC) into the same/similar
framework as the watchdog, with same/similar event types and action
choices.

-- Jamie

Re: [Qemu-devel] wake-on-lan IPMI implementation; real power-off and -no-shutdown

2010-03-16 Thread Daniel P. Berrange

On Tue, Mar 16, 2010 at 05:28:51PM +, Jamie Lokier wrote:
  The semantics of -no-shutdown are awful.
  
  I'd personally prefer to see the option deprecated and a new set of 
  options introduced with clearer semantics.
  
  Currently, -no-shutdown does too many things.  It affects reboot 
  behaviour, shutdown behaviour, the behavior of the SDL close button.  
  Each of these things should be individual tunables.
 
 I'm not sure about -no-shutdown, but I've had some problems with -no-reboot,
 which I use for semi-automated OS installations.
 
 I use -no-reboot so that when the guest does a reboot during
 installation, as they invariably do one or more times, QEMU exits, my
 scripts does things eject the CD/floppy, or change it for the next in
 sequence, and modify the guest's installed files to add virtio
 drivers, install extra fiels, edit boot scripts and whatever else is
 useful, and then restart QEMU.
 
 The guest thinks it's just rebooted, but it has the virtualisation
 goodies in place to run better.

That's the way libvirt / virt-manager does provisioning too.

 Unfortunately with an MS-DOS 5.00 guest, -no-reboot does not
 work.  It fails to exit QEMU; instead it just reboots.  I guess that
 means a QJSON event would not be sent either.
 
 For my use case, it would be even better if guest reboot paused the
 guest and sent a QJSON event instead of having to use -no-reboot.
 Then I wouldn't have to close and restart the VNC client repeatedly
 during installs.

That would be nice!

 Now that we have ways to choose what kind of events and actions are
 triggered by the QEMU watchdog device, it would be nice to fit guest
 reboot (perhaps even the different types of reboot) / host-forced
 reboot / guest powerdown / host-forced powerdown (like holding down
 the power button for 5 seconds on a real PC) into the same/similar
 framework as the watchdog, with same/similar event types and action
 choices.

Agreed, it'd be good to have a more generalized method of controlling
the lifecycle actions in QEMU.

Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

Re: [Qemu-devel] Re: [PATCH, RFC] Replace assert(0) with abort() or cpu_abort()

2010-03-16 Thread Jamie Lokier

Paolo Bonzini wrote:
 On 03/15/2010 07:36 PM, Markus Armbruster wrote:
 Please don't tell me that user emulators make abort() return.  abort()
 is declared __noreturn__, and the optimizer may well rely on that.
 
 If the user programs make a signal (SIGABRT, SIG_IGN) call, I suppose 
 abort() will return.

On Linux, man abort says:

   If  the SIGABRT signal is ignored, or caught by a handler that returns,
   the abort() function will still terminate the process.  It does this by
   restoring the default disposition for SIGABRT and then raising the sig‐
   nal for a second time.

However I have a suspicious that I've seen abort() return on some
other OS in the distant past, maybe SunOS 4.

I wouldn't rely on abort() always terminating the process on all OSes.

-- Jamie

Re: [Qemu-devel] Re: [PATCH, RFC] Replace assert(0) with abort() or cpu_abort()

2010-03-16 Thread Jamie Lokier

Markus Armbruster wrote:
 Paolo Bonzini pbonz...@redhat.com writes:
 
  On 03/15/2010 07:36 PM, Markus Armbruster wrote:
  Please don't tell me that user emulators make abort() return.  abort()
  is declared __noreturn__, and the optimizer may well rely on that.
 
  If the user programs make a signal (SIGABRT, SIG_IGN) call, I
  suppose abort() will return.
 
 I program doing that gets what it asks for, and richly deserves.

A guest program is also allowed to trap SIGABRT with a signal handler,
and that does have some uses.  E.g. cleaning up temporary files and
shmem segments following a crash when calling 3rd party code.

Whatever the guest does with SIGABRT, it should not result in _QEMU_
crashing - whether due to abort() returning, or QEMU's control flow
jumping to the guest's signal handler from an unexpected location.

-- Jamie

[Qemu-devel] [PATCH] qemu-io: fix aio help texts

2010-03-16 Thread Christoph Hellwig

Fix a few typos in the helptexts for the various aio commands.

Signed-off-by: Christoph Hellwig h...@lst.de

Index: qemu/qemu-io.c
===
--- qemu.orig/qemu-io.c 2010-03-16 19:07:43.089009269 +0100
+++ qemu/qemu-io.c  2010-03-16 19:08:36.597005148 +0100
@@ -904,8 +904,8 @@ aio_read_help(void)
 \n
  Reads a segment of the currently open file, optionally dumping it to the\n
  standard output stream (with -v option) for subsequent inspection.\n
- The read is performed asynchronously and should the aio_flush command \n
- should be used to ensure all outstanding aio requests have been completed\n
+ The read is performed asynchronously and the aio_flush command must be\n
+ to ensure all outstanding aio requests have been completed\n
  -C, -- report statistics in a machine parsable format\n
  -P, -- use a pattern to verify read data\n
  -v, -- dump buffer to standard output\n
@@ -1003,8 +1003,8 @@ aio_write_help(void)
 \n
  Writes into a segment of the currently open file, using a buffer\n
  filled with a set pattern (0xcdcdcdcd).\n
- The write is performed asynchronously and should the aio_flush command \n
- should be used to ensure all outstanding aio requests have been completed\n
+ The write is performed asynchronously and the aio_flush command must be\n
+ used to ensure all outstanding aio requests have been completed\n
  -P, -- use different pattern to fill file\n
  -C, -- report statistics in a machine parsable format\n
  -q, -- quite mode, do not show I/O statistics\n
@@ -1095,7 +1095,7 @@ aio_flush_f(int argc, char **argv)
 static const cmdinfo_t aio_flush_cmd = {
.name   = aio_flush,
.cfunc  = aio_flush_f,
-   .oneline= completes all outstanding aio requets
+   .oneline= completes all outstanding aio requests
 };
 
 static int

[Qemu-devel] [PATCH] [Also for STABLE-0.12] Don't check for bus master for old guests

2010-03-16 Thread Alexander Graf

Older Linux guests don't activate the bus master enable bit. So for those we
can just try to be clever and track if they set the DEVICE_OK bit even though
bus mastering is still disabled.

Under that condition we can disable the windows safety check. With that logic
in place both guests should work just fine. Without PCI hotplug breaks
virtio-net in Linux  2.6.34 guests.

Signed-off-by: Alexander Graf ag...@suse.de
CC: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio-pci.c |   25 -
 1 files changed, 24 insertions(+), 1 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 3594152..4fc4b3c 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -76,6 +76,10 @@
  * 12 is historical, and due to x86 page size. */
 #define VIRTIO_PCI_QUEUE_ADDR_SHIFT12
 
+/* We can catch some guest bugs inside here so we continue supporting older
+   guests. */
+#define VIRTIO_PCI_BUG_BUS_MASTER  (1  0)
+
 /* QEMU doesn't strictly need write barriers since everything runs in
  * lock-step.  We'll leave the calls to wmb() in though to make it obvious for
  * KVM or if kqemu gets SMP support.
@@ -87,6 +91,7 @@
 typedef struct {
 PCIDevice pci_dev;
 VirtIODevice *vdev;
+uint32_t bugs;
 uint32_t addr;
 uint32_t class_code;
 uint32_t nvectors;
@@ -138,6 +143,13 @@ static int virtio_pci_load_config(void * opaque, QEMUFile 
*f)
 if (proxy-vdev-config_vector != VIRTIO_NO_VECTOR) {
 return msix_vector_use(proxy-pci_dev, proxy-vdev-config_vector);
 }
+
+/* Try to find out if the guest has bus master disabled, but is
+   in ready state. Then we have a buggy guest OS. */
+if (!(proxy-vdev-status  VIRTIO_CONFIG_S_DRIVER_OK) 
+!(proxy-pci_dev.config[PCI_COMMAND]  PCI_COMMAND_MASTER)) {
+proxy-bugs |= VIRTIO_PCI_BUG_BUS_MASTER;
+}
 return 0;
 }
 
@@ -162,6 +174,7 @@ static void virtio_pci_reset(DeviceState *d)
 VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
 virtio_reset(proxy-vdev);
 msix_reset(proxy-pci_dev);
+proxy-bugs = 0;
 }
 
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -205,6 +218,14 @@ static void virtio_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
 virtio_reset(proxy-vdev);
 msix_unuse_all_vectors(proxy-pci_dev);
 }
+
+/* Linux before 2.6.34 sets the device as OK without enabling
+   the PCI device bus master bit. In this case we need to disable
+   some safety checks. */
+if ((val  VIRTIO_CONFIG_S_DRIVER_OK) 
+!(proxy-pci_dev.config[PCI_COMMAND]  PCI_COMMAND_MASTER)) {
+proxy-bugs |= VIRTIO_PCI_BUG_BUS_MASTER;
+}
 break;
 case VIRTIO_MSI_CONFIG_VECTOR:
 msix_vector_unuse(proxy-pci_dev, vdev-config_vector);
@@ -372,7 +393,9 @@ static void virtio_write_config(PCIDevice *pci_dev, 
uint32_t address,
 
 if (PCI_COMMAND == address) {
 if (!(val  PCI_COMMAND_MASTER)) {
-proxy-vdev-status = ~VIRTIO_CONFIG_S_DRIVER_OK;
+if (!(proxy-bugs  VIRTIO_PCI_BUG_BUS_MASTER)) {
+proxy-vdev-status = ~VIRTIO_CONFIG_S_DRIVER_OK;
+}
 }
 }
 
-- 
1.6.0.2

Re: [Qemu-devel] wake-on-lan IPMI implementation; real power-off and -no-shutdown

2010-03-16 Thread François Revol

 I use -no-reboot so that when the guest does a reboot during
 installation, as they invariably do one or more times, QEMU exits, my
 scripts does things eject the CD/floppy, or change it for the next in
 sequence, and modify the guest's installed files to add virtio
 drivers, install extra fiels, edit boot scripts and whatever else is
 useful, and then restart QEMU.

 The guest thinks it's just rebooted, but it has the virtualisation
 goodies in place to run better.

 Unfortunately with an MS-DOS 5.00 guest, -no-reboot does not
 work.  It fails to exit QEMU; instead it just reboots.  I guess that
 means a QJSON event would not be sent either.

I suppose it uses a weird way to reboot, there are many (PS/2
controller, calling the BIOS entry point...).

François.

[Qemu-devel] [PULL v2] Convert device_add to QObject / QError

2010-03-16 Thread Markus Armbruster

Anthony ran into conflicts and asked me to rebase and send out a pull
request.

Complete list of conflicts:

* qdev: Improve diagnostics for bad property values
  commit 6bf38816df80a3b50529119c5458b151b3e2c728

  Adds two new errors to qdev_prop_parse(), which need conversion to
  QError.  Resolution straighforward, just needs new
  QERR_PROPERTY_VALUE_IN_USE, QERR_PROPERTY_VALUE_NOT_FOUND.

* scsi: Make device scsi-disk reject /dev/sg*
  commit 32bb404a6a4d726dfd691f75704f08257ce65ffe

  Adds a qemu_error() use, which needs to be changed to error_report().

* slirp: check system() success
  commit 24ac07dec7f23c58dc48aa7754f872781b386d46

  Context changed.  Resolution trivial.


The following changes since commit 0aef4261ac0ec9089ade0e3a92f986cb4ba7317e:
  Aurelien Jarno (1):
target-ppc: fix evsrwu and evsrws (second try)

are available in the git repository at:

  git://repo.or.cz/qemu/armbru.git qerror

Markus Armbruster (52):
  usb: Remove disabled monitor_printf() in usb_read_file()
  savevm: Fix -loadvm to report errors to stderr, not the monitor
  pc: Fix error reporting for -boot once
  pc: Factor common code out of pc_boot_set() and cmos_init()
  tools: Remove unused cur_mon from qemu-tool.c
  monitor: Separate default monitor and current monitor cleanly
  block: Simplify usb_msd_initfn() test for can read bdrv key
  monitor: Factor monitor_set_error() out of qemu_error_internal()
  error: Move qemu_error()  friends from monitor.c to own file
  error: Simplify error sink setup
  error: Move qemu_error  friends into their own header
  error: New error_printf() and error_vprintf()
  error: Don't abuse qemu_error() for non-error in qdev_device_help()
  error: Don't abuse qemu_error() for non-error in qbus_find()
  error: Don't abuse qemu_error() for non-error in scsi_hot_add()
  error: Replace qemu_error() by error_report()
  error: Rename qemu_error_new() to qerror_report()
  error: Infrastructure to track locations for error reporting
  error: Include the program name in error messages to stderr
  error: Track locations in configuration files
  QemuOpts: Fix qemu_config_parse() to catch file read errors
  error: Track locations on command line
  qdev: Fix -device and device_add to handle unsuitable bus gracefully
  qdev: Factor qdev_create_from_info() out of qdev_create()
  qdev: Hide no_user devices from users
  qdev: Hide ptr properties from users
  monitor: New monitor_cur_is_qmp()
  error: Let converted handlers print in human monitor
  error: Polish human-readable error descriptions
  error: New QERR_PROPERTY_NOT_FOUND
  error: New QERR_PROPERTY_VALUE_BAD
  error: New QERR_PROPERTY_VALUE_IN_USE
  error: New QERR_PROPERTY_VALUE_NOT_FOUND
  qdev: convert setting device properties to QError
  qdev: Relax parsing of bus option
  error: New QERR_BUS_NOT_FOUND
  error: New QERR_DEVICE_MULTIPLE_BUSSES
  error: New QERR_DEVICE_NO_BUS
  qdev: Convert qbus_find() to QError
  error: New error_printf_unless_qmp()
  error: New QERR_BAD_BUS_FOR_DEVICE
  error: New QERR_BUS_NO_HOTPLUG
  error: New QERR_DEVICE_INIT_FAILED
  error: New QERR_NO_BUS_FOR_DEVICE
  Revert qdev: Use QError for 'device not found' error
  error: Convert do_device_add() to QError
  qemu-option: Functions to convert to/from QDict
  qemu-option: Move the implied first name into QemuOptsList
  qemu-option: Rename find_list() to qemu_find_opts()  external linkage
  monitor: New argument type 'O'
  monitor: Use argument type 'O' for device_add
  monitor: convert do_device_add() to QObject

 Makefile.target|1 +
 audio/audio.c  |4 +-
 hw/pc.c|   35 ++
 hw/pci-hotplug.c   |   14 +-
 hw/pci.c   |   14 +-
 hw/qdev-properties.c   |   36 ++---
 hw/qdev.c  |  236 --
 hw/qdev.h  |2 +-
 hw/scsi-bus.c  |4 +-
 hw/scsi-disk.c |7 +-
 hw/scsi-generic.c  |9 +-
 hw/usb-bus.c   |4 +-
 hw/usb-msd.c   |4 +-
 hw/usb-net.c   |2 +-
 hw/usb-serial.c|9 +-
 hw/virtio-net.c|5 +-
 hw/virtio-pci.c|4 +-
 hw/virtio-serial-bus.c |2 +-
 monitor.c  |  337 +---
 monitor.h  |7 +
 net.c  |   32 +++---
 net/dump.c |5 +-
 net/slirp.c|   28 ++--
 net/socket.c   |   12 +-
 net/tap-bsd.c  |7 +-
 net/tap-linux.c|9 +-
 net/tap-solaris.c  |4 +-
 net/tap-win32.c|2 +-
 net/tap.c  |3 +-
 qemu-config.c  |   56 +---
 qemu-config.h  |3 +-
 qemu-error.c   |  227 
 qemu-error.h   |   47 +++

1 2 >

1 - 100 of 135 matches

Mail list logo