[Qemu-devel] KVM call agenda for Mar 16
Please send in any agenda items you are interested in covering. thanks, -chris
[Qemu-devel] [trivial one-liner] be more specific in -mem-path error messages
The error message qemu gives when hugetlbfs is not accessible is cryptic at best: mkstemp: Permission denied Make it a bit more specific instead: unable to create backing store for hugepages: Permission denied Thanks! /mjt diff --git a/exec.c b/exec.c index 891e0ee..985bdde 100644 --- a/exec.c +++ b/exec.c @@ -2569,5 +2569,5 @@ static void *file_ram_alloc(ram_addr_t memory, const char *path) fd = mkstemp(filename); if (fd 0) { - perror(mkstemp); + perror(unable to create backing store for hugepages); free(filename); return NULL;
[Qemu-devel] Re: [PATCH, RFC] Replace assert(0) with abort() or cpu_abort()
On 03/15/2010 07:36 PM, Markus Armbruster wrote: Please don't tell me that user emulators make abort() return. abort() is declared __noreturn__, and the optimizer may well rely on that. If the user programs make a signal (SIGABRT, SIG_IGN) call, I suppose abort() will return. Paolo
Re: [Qemu-devel] Re: [PATCH, RFC] Replace assert(0) with abort() or cpu_abort()
Paolo Bonzini pbonz...@redhat.com writes: On 03/15/2010 07:36 PM, Markus Armbruster wrote: Please don't tell me that user emulators make abort() return. abort() is declared __noreturn__, and the optimizer may well rely on that. If the user programs make a signal (SIGABRT, SIG_IGN) call, I suppose abort() will return. I program doing that gets what it asks for, and richly deserves.
[Qemu-devel] [PATCH -v2 02/22] vrtio-9p: Implement P9_TVERSION for 9P
From: Anthony Liguori aligu...@us.ibm.com [ki...@linux.vnet.ibm.com: malloc to qemu_malloc coversion] Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p.c | 263 +++- 1 files changed, 261 insertions(+), 2 deletions(-) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index 115c93b..53b3d78 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -111,10 +111,269 @@ static void free_pdu(V9fsState *s, V9fsPDU *pdu) } } +static void v9fs_string_free(V9fsString *str) +{ +free(str-data); +str-data = NULL; +str-size = 0; +} + +static size_t pdu_unpack(void *dst, V9fsPDU *pdu, size_t offset, size_t size) +{ +struct iovec *sg = pdu-elem.out_sg; +BUG_ON((offset + size) sg[0].iov_len); +memcpy(dst, sg[0].iov_base + offset, size); +return size; +} + +/* FIXME i can do this with less variables */ +static size_t pdu_pack(V9fsPDU *pdu, size_t offset, const void *src, size_t size) +{ +struct iovec *sg = pdu-elem.in_sg; +size_t off = 0; +size_t copied = 0; +int i = 0; + +for (i = 0; size i pdu-elem.in_num; i++) { +size_t len; + +if (offset = off offset (off + sg[i].iov_len)) { +len = MIN(sg[i].iov_len - (offset - off), size); +memcpy(sg[i].iov_base + (offset - off), src, len); +size -= len; +offset += len; +off = offset; +copied += len; +src += len; +} else { +off += sg[i].iov_len; +} +} + +return copied; +} + +static int pdu_copy_sg(V9fsPDU *pdu, size_t offset, int rx, struct iovec *sg) +{ +size_t pos = 0; +int i, j; +struct iovec *src_sg; +unsigned int num; + +if (rx) { +src_sg = pdu-elem.in_sg; +num = pdu-elem.in_num; +} else { +src_sg = pdu-elem.out_sg; +num = pdu-elem.out_num; +} + +j = 0; +for (i = 0; i num; i++) { +if (offset = pos) { +sg[j].iov_base = src_sg[i].iov_base; +sg[j].iov_len = src_sg[i].iov_len; +j++; +} else if (offset (src_sg[i].iov_len + pos)) { +sg[j].iov_base = src_sg[i].iov_base; +sg[j].iov_len = src_sg[i].iov_len; +sg[j].iov_base += (offset - pos); +sg[j].iov_len -= (offset - pos); +j++; +} +pos += src_sg[i].iov_len; +} + +return j; +} + +static size_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) +{ +size_t old_offset = offset; +va_list ap; +int i; + +va_start(ap, fmt); +for (i = 0; fmt[i]; i++) { + switch (fmt[i]) { + case 'b': { + int8_t *valp = va_arg(ap, int8_t *); + offset += pdu_unpack(valp, pdu, offset, sizeof(*valp)); + break; + } + case 'w': { + int16_t *valp = va_arg(ap, int16_t *); + offset += pdu_unpack(valp, pdu, offset, sizeof(*valp)); + break; + } + case 'd': { + int32_t *valp = va_arg(ap, int32_t *); + offset += pdu_unpack(valp, pdu, offset, sizeof(*valp)); + break; + } + case 'q': { + int64_t *valp = va_arg(ap, int64_t *); + offset += pdu_unpack(valp, pdu, offset, sizeof(*valp)); + break; + } + case 'v': { + struct iovec *iov = va_arg(ap, struct iovec *); + int *iovcnt = va_arg(ap, int *); + *iovcnt = pdu_copy_sg(pdu, offset, 0, iov); + break; + } + case 's': { + V9fsString *str = va_arg(ap, V9fsString *); + offset += pdu_unmarshal(pdu, offset, w, str-size); + /* FIXME: sanity check str-size */ + str-data = qemu_malloc(str-size + 1); + offset += pdu_unpack(str-data, pdu, offset, str-size); + str-data[str-size] = 0; + break; + } + case 'Q': { + V9fsQID *qidp = va_arg(ap, V9fsQID *); + offset += pdu_unmarshal(pdu, offset, bdq, + qidp-type, qidp-version, qidp-path); + break; + } + case 'S': { + V9fsStat *statp = va_arg(ap, V9fsStat *); + offset += pdu_unmarshal(pdu, offset, wwdQdddqsddd, + statp-size, statp-type, statp-dev, + statp-qid, statp-mode, statp-atime, + statp-mtime, statp-length, + statp-name, statp-uid, statp-gid, + statp-muid, statp-extension, + statp-n_uid, statp-n_gid, + statp-n_muid); + break; + } + default: + break; + } +} + +va_end(ap); + +return offset - old_offset; +} + +static size_t pdu_marshal(V9fsPDU *pdu,
[Qemu-devel] [PATCH -v2 01/22] vitio-9p: Add a virtio 9p device to qemu
From: Anthony Liguori aligu...@us.ibm.com This patch doesn't implement the 9p protocol handling code. It add a simple device which dump the protocl data Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- Makefile.target |1 + hw/virtio-9p-debug.c | 442 ++ hw/virtio-9p.c | 275 +++ hw/virtio-9p.h | 70 hw/virtio-pci.c | 25 +++ hw/virtio.h |1 + 6 files changed, 814 insertions(+), 0 deletions(-) create mode 100644 hw/virtio-9p-debug.c create mode 100644 hw/virtio-9p.c create mode 100644 hw/virtio-9p.h diff --git a/Makefile.target b/Makefile.target index 320f807..33f9fcb 100644 --- a/Makefile.target +++ b/Makefile.target @@ -172,6 +172,7 @@ obj-y = vl.o async.o monitor.o pci.o pci_host.o pcie_host.o machine.o gdbstub.o # virtio has to be here due to weird dependency between PCI and virtio-net. # need to fix this properly obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-pci.o virtio-serial-bus.o +obj-y += virtio-9p.o virtio-9p-debug.o obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_ISA_MMIO) += isa_mmio.o diff --git a/hw/virtio-9p-debug.c b/hw/virtio-9p-debug.c new file mode 100644 index 000..9230659 --- /dev/null +++ b/hw/virtio-9p-debug.c @@ -0,0 +1,442 @@ +/* + * Virtio 9p PDU debug + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Anthony Liguori aligu...@us.ibm.com + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ +#include virtio.h +#include pc.h +#include virtio-9p.h + +#include assert.h +#include sys/uio.h + +#define BUG_ON(cond) assert(!(cond)) + +extern int dotu; +static FILE *llogfile; + +static struct iovec *get_sg(V9fsPDU *pdu, int rx) +{ +if (rx) +return pdu-elem.in_sg; +return pdu-elem.out_sg; +} + +static void pprint_int8(V9fsPDU *pdu, int rx, size_t *offsetp, +const char *name) +{ +struct iovec *sg = get_sg(pdu, rx); +size_t offset = *offsetp; +int8_t value; + +BUG_ON((offset + sizeof(value)) sg[0].iov_len); + +memcpy(value, sg[0].iov_base + offset, sizeof(value)); +offset += sizeof(value); + +fprintf(llogfile, %s=0x%x, name, value); + +*offsetp = offset; +} + +static void pprint_int16(V9fsPDU *pdu, int rx, size_t *offsetp, +const char *name) +{ +struct iovec *sg = get_sg(pdu, rx); +size_t offset = *offsetp; +int16_t value; + +BUG_ON((offset + sizeof(value)) sg[0].iov_len); + +memcpy(value, sg[0].iov_base + offset, sizeof(value)); +offset += sizeof(value); + +fprintf(llogfile, %s=0x%x, name, value); + +*offsetp = offset; +} + +static void pprint_int32(V9fsPDU *pdu, int rx, size_t *offsetp, +const char *name) +{ +struct iovec *sg = get_sg(pdu, rx); +size_t offset = *offsetp; +int32_t value; + +BUG_ON((offset + sizeof(value)) sg[0].iov_len); + +memcpy(value, sg[0].iov_base + offset, sizeof(value)); +offset += sizeof(value); + +fprintf(llogfile, %s=0x%x, name, value); + +*offsetp = offset; +} + +static void pprint_int64(V9fsPDU *pdu, int rx, size_t *offsetp, +const char *name) +{ +struct iovec *sg = get_sg(pdu, rx); +size_t offset = *offsetp; +int64_t value; + +BUG_ON((offset + sizeof(value)) sg[0].iov_len); + +memcpy(value, sg[0].iov_base + offset, sizeof(value)); +offset += sizeof(value); + +fprintf(llogfile, %s=0x% PRIx64, name, value); + +*offsetp = offset; +} + +static void pprint_str(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) +{ +struct iovec *sg = get_sg(pdu, rx); +size_t offset = *offsetp; +int16_t size; +size_t result; + +BUG_ON((offset + 2) sg[0].iov_len); +memcpy(size, sg[0].iov_base + offset, 2); +offset += 2; + +BUG_ON((offset + size) sg[0].iov_len); +fprintf(llogfile, %s=, name); +result = fwrite(sg[0].iov_base + offset, 1, size, llogfile); +BUG_ON(result != size); +offset += size; + +*offsetp = offset; +} + +static void pprint_qid(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) +{ +fprintf(llogfile, %s={, name); +pprint_int8(pdu, rx, offsetp, type); +pprint_int32(pdu, rx, offsetp, , version); +pprint_int64(pdu, rx, offsetp, , path); +fprintf(llogfile, }); +} + +static void pprint_stat(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) +{ +fprintf(llogfile, %s={, name); +pprint_int16(pdu, rx, offsetp, size); +pprint_int16(pdu, rx, offsetp, , type); +pprint_int32(pdu, rx, offsetp, , dev); +pprint_qid(pdu, rx, offsetp, , qid); +pprint_int32(pdu, rx, offsetp, , mode); +pprint_int32(pdu, rx, offsetp, , atime); +pprint_int32(pdu, rx, offsetp, , mtime); +pprint_int64(pdu,
[Qemu-devel] [PATCH -V2 00/22] virtio-9p: paravirtual file system passthrough
Hi, This patch series adds a paravirtual file system passthrough mechanism to QEMU based on the 9P protocol. With the current implementation, all I/O is implemented in the VCPU thread. We've modified the protocol handlers so that we can support dispatch I/O in a thread pool. The actual thread pool implementation will be posted later This patch set should work with any recent Linux kernel as virtio-9p has been supported for a few kernel releases now. Export dir is specified using the below Qemu option. -device virtio-9p-pci,share_path=/mnt/,mount_tag=v_mnt mount_tag is used to identify the mount point in the kernel. This will be available in Linux kernel via /sys/devices/virtio-pci/virtio1/mount_tag file. Changes from V1: a) fsstress test suite runs successfully with the patches. That should indicate patches are stable enough to be merged. b) Added proper error handling to all posix_* calls. c) Fixed code to follow Qemu coding style. d) Other bug fixes most of which are folded back into the original patches e) rebased to qemu master 0aef4261ac0ec9089ade0e3a92f986cb4ba7317e -aneesh
[Qemu-devel] [PATCH -v2 03/22] virtio-9p: Implement P9_TATTACH
From: Anthony Liguori aligu...@us.ibm.com [jv...@linux.vnet.ibm.com: Added qemu_vasprintf] Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- Makefile.target |2 +- hw/virtio-9p-local.c | 84 +++ hw/virtio-9p.c | 155 +++--- hw/virtio-9p.h | 33 +++ qemu-common.h|1 + qemu-malloc.c|5 ++ 6 files changed, 270 insertions(+), 10 deletions(-) create mode 100644 hw/virtio-9p-local.c diff --git a/Makefile.target b/Makefile.target index 33f9fcb..97f32a9 100644 --- a/Makefile.target +++ b/Makefile.target @@ -172,7 +172,7 @@ obj-y = vl.o async.o monitor.o pci.o pci_host.o pcie_host.o machine.o gdbstub.o # virtio has to be here due to weird dependency between PCI and virtio-net. # need to fix this properly obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-pci.o virtio-serial-bus.o -obj-y += virtio-9p.o virtio-9p-debug.o +obj-y += virtio-9p.o virtio-9p-debug.o virtio-9p-local.o obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_ISA_MMIO) += isa_mmio.o diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c new file mode 100644 index 000..1d2523b --- /dev/null +++ b/hw/virtio-9p-local.c @@ -0,0 +1,84 @@ +/* + * Virtio 9p Posix callback + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Anthony Liguori aligu...@us.ibm.com + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ +#include virtio.h +#include pc.h +#include qemu_socket.h +#include virtio-9p.h +#include sys/uio.h +#include arpa/inet.h +#include assert.h +#include pwd.h +#include grp.h +#include sys/socket.h +#include sys/un.h + +static const char *base_path; + +static const char *rpath(const char *path) +{ +/* FIXME: so wrong... */ +static char buffer[4096]; +snprintf(buffer, sizeof(buffer), %s/%s, base_path, path); +return buffer; +} + +static int local_lstat(void *opaque, const char *path, struct stat *stbuf) +{ +return lstat(rpath(path), stbuf); +} + +static int local_setuid(void *opaque, uid_t uid) +{ +struct passwd *pw; +gid_t groups[33]; +int ngroups; +static uid_t cur_uid = -1; + +if (cur_uid == uid) +return 0; + +if (setreuid(0, 0)) +return -1; + +pw = getpwuid(uid); +if (pw == NULL) +return -1; + +ngroups = 33; +if (getgrouplist(pw-pw_name, pw-pw_gid, groups, ngroups) == -1) +return -1; + +if (setgroups(ngroups, groups)) +return -1; + +if (setregid(-1, pw-pw_gid)) +return -1; + +if (setreuid(-1, uid)) +return -1; + +cur_uid = uid; + +return 0; +} + +static V9fsPosixFileOperations ops = { +.lstat = local_lstat, +.setuid = local_setuid, +}; + +V9fsPosixFileOperations *virtio_9p_init_local(const char *path) +{ +base_path = path; +return ops; +} diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index 53b3d78..fdff589 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -82,6 +82,7 @@ typedef struct V9fsState V9fsPDU pdus[MAX_REQ]; V9fsPDU *free_pdu; V9fsFidState *fid_list; +V9fsPosixFileOperations *ops; char *root; uid_t uid; } V9fsState; @@ -91,6 +92,123 @@ int debug_9p_pdu = 1; extern void pprint_pdu(V9fsPDU *pdu); +static int posix_lstat(V9fsState *s, V9fsString *path, struct stat *stbuf) +{ +return s-ops-lstat(s-ops-opaque, path-data, stbuf); +} + +static int posix_setuid(V9fsState *s, uid_t uid) +{ +return s-ops-setuid(s-ops-opaque, uid); +} + +static void v9fs_string_free(V9fsString *str) +{ +qemu_free(str-data); +str-data = NULL; +str-size = 0; +} + +static void v9fs_string_sprintf(V9fsString *str, const char *fmt, ...) +{ +va_list ap; +int err; + +v9fs_string_free(str); + +va_start(ap, fmt); +err = qemu_vasprintf(str-data, fmt, ap); +BUG_ON(err == -1); +va_end(ap); + +str-size = err; +} + +static V9fsFidState *lookup_fid(V9fsState *s, int32_t fid) +{ +V9fsFidState *f; + +for (f = s-fid_list; f; f = f-next) { +if (f-fid == fid) { +posix_setuid(s, f-uid); +return f; +} +} + +return NULL; +} + +static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid) +{ +V9fsFidState *f; + +f = lookup_fid(s, fid); +if (f) +return NULL; + +f = qemu_mallocz(sizeof(V9fsFidState)); +BUG_ON(f == NULL); + +f-fid = fid; +f-fd = -1; +f-dir = NULL; + +f-next = s-fid_list; +s-fid_list = f; + +return f; +} + +#define P9_QID_TYPE_DIR0x80 +#define P9_QID_TYPE_SYMLINK0x02 + +#define P9_STAT_MODE_DIR 0x8000 +#define P9_STAT_MODE_APPEND0x4000 +#define P9_STAT_MODE_EXCL 0x2000 +#define P9_STAT_MODE_MOUNT 0x1000 +#define P9_STAT_MODE_AUTH 0x0800 +#define
[Qemu-devel] [PATCH -v2 04/22] virtio-9p: Implement P9_TSTAT
From: Anthony Liguori aligu...@us.ibm.com This get the mount to work on the guest [ki...@linux.vnet.ibm.com: malloc to qemu_malloc conversion] Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Gautham R Shenoy e...@in.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p-local.c |7 ++ hw/virtio-9p.c | 169 +- 2 files changed, 174 insertions(+), 2 deletions(-) diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index 1d2523b..aefb5a8 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -72,9 +72,16 @@ static int local_setuid(void *opaque, uid_t uid) return 0; } +static ssize_t local_readlink(void *opaque, const char *path, +char *buf, size_t bufsz) +{ +return readlink(rpath(path), buf, bufsz); +} + static V9fsPosixFileOperations ops = { .lstat = local_lstat, .setuid = local_setuid, +.readlink = local_readlink, }; V9fsPosixFileOperations *virtio_9p_init_local(const char *path) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index fdff589..de5f6b0 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -102,6 +102,21 @@ static int posix_setuid(V9fsState *s, uid_t uid) return s-ops-setuid(s-ops-opaque, uid); } +static ssize_t posix_readlink(V9fsState *s, V9fsString *path, V9fsString *buf) +{ +ssize_t len; + +buf-data = qemu_malloc(1024); + +len = s-ops-readlink(s-ops-opaque, path-data, buf-data, 1024 - 1); +if (len -1) { +buf-size = len; +buf-data[len] = 0; +} + +return len; +} + static void v9fs_string_free(V9fsString *str) { qemu_free(str-data); @@ -109,6 +124,11 @@ static void v9fs_string_free(V9fsString *str) str-size = 0; } +static void v9fs_string_null(V9fsString *str) +{ +v9fs_string_free(str); +} + static void v9fs_string_sprintf(V9fsString *str, const char *fmt, ...) { va_list ap; @@ -124,6 +144,11 @@ static void v9fs_string_sprintf(V9fsString *str, const char *fmt, ...) str-size = err; } +static size_t v9fs_string_size(V9fsString *str) +{ +return str-size; +} + static V9fsFidState *lookup_fid(V9fsState *s, int32_t fid) { V9fsFidState *f; @@ -437,6 +462,15 @@ static size_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) return offset - old_offset; } +static void v9fs_stat_free(V9fsStat *stat) +{ +v9fs_string_free(stat-name); +v9fs_string_free(stat-uid); +v9fs_string_free(stat-gid); +v9fs_string_free(stat-muid); +v9fs_string_free(stat-extension); +} + static void complete_pdu(V9fsState *s, V9fsPDU *pdu, ssize_t len) { int8_t id = pdu-id + 1; /* Response */ @@ -472,6 +506,88 @@ static void complete_pdu(V9fsState *s, V9fsPDU *pdu, ssize_t len) free_pdu(s, pdu); } +static uint32_t stat_to_v9mode(const struct stat *stbuf) +{ +uint32_t mode; + +mode = stbuf-st_mode 0777; +if (S_ISDIR(stbuf-st_mode)) +mode |= P9_STAT_MODE_DIR; + +if (dotu) { +if (S_ISLNK(stbuf-st_mode)) +mode |= P9_STAT_MODE_SYMLINK; +if (S_ISSOCK(stbuf-st_mode)) +mode |= P9_STAT_MODE_SOCKET; +if (S_ISFIFO(stbuf-st_mode)) +mode |= P9_STAT_MODE_NAMED_PIPE; +if (S_ISBLK(stbuf-st_mode) || S_ISCHR(stbuf-st_mode)) +mode |= P9_STAT_MODE_DEVICE; +if (stbuf-st_mode S_ISUID) +mode |= P9_STAT_MODE_SETUID; +if (stbuf-st_mode S_ISGID) +mode |= P9_STAT_MODE_SETGID; +if (stbuf-st_mode S_ISVTX) +mode |= P9_STAT_MODE_SETVTX; +} + +return mode; +} + +static void stat_to_v9stat(V9fsState *s, V9fsString *name, +const struct stat *stbuf, +V9fsStat *v9stat) +{ +int err; +const char *str; + +memset(v9stat, 0, sizeof(*v9stat)); + +stat_to_qid(stbuf, v9stat-qid); +v9stat-mode = stat_to_v9mode(stbuf); +v9stat-atime = stbuf-st_atime; +v9stat-mtime = stbuf-st_mtime; +v9stat-length = stbuf-st_size; + +v9fs_string_null(v9stat-uid); +v9fs_string_null(v9stat-gid); +v9fs_string_null(v9stat-muid); + +if (dotu) { +v9stat-n_uid = stbuf-st_uid; +v9stat-n_gid = stbuf-st_gid; +v9stat-n_muid = 0; + +v9fs_string_null(v9stat-extension); + +if (v9stat-mode P9_STAT_MODE_SYMLINK) { +err = posix_readlink(s, name, v9stat-extension); +BUG_ON(err == -1); +v9stat-extension.data[err] = 0; +v9stat-extension.size = err; +} else if (v9stat-mode P9_STAT_MODE_DEVICE) { +v9fs_string_sprintf(v9stat-extension, %c %u %u, +S_ISCHR(stbuf-st_mode) ? 'c' : 'b', +major(stbuf-st_rdev), minor(stbuf-st_rdev)); +} +} + +str = strrchr(name-data, '/'); +if (str) +str += 1; +else +str = name-data; + +
[Qemu-devel] [PATCH -v2 08/22] virtio-9p: Implement P9_TCLUNK
From: Anthony Liguori aligu...@us.ibm.com This patch gets ls -al to work Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p.c | 16 ++-- 1 files changed, 14 insertions(+), 2 deletions(-) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index 9bc0a57..3ac6255 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -1303,8 +1303,20 @@ out: static void v9fs_clunk(V9fsState *s, V9fsPDU *pdu) { -if (debug_9p_pdu) -pprint_pdu(pdu); +int32_t fid; +size_t offset = 7; +int err; + +pdu_unmarshal(pdu, offset, d, fid); + +err = free_fid(s, fid); +if (err 0) +goto out; + +offset = 7; +err = offset; +out: +complete_pdu(s, pdu, err); } static void v9fs_write(V9fsState *s, V9fsPDU *pdu) -- 1.7.0.2.273.gc2413
[Qemu-devel] [PATCH -v2 05/22] virtio-9p: Implement P9_TWALK
From: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Gautham R Shenoy e...@in.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p-local.c | 12 +++ hw/virtio-9p.c | 219 +- 2 files changed, 229 insertions(+), 2 deletions(-) diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index aefb5a8..409f5b0 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -78,10 +78,22 @@ static ssize_t local_readlink(void *opaque, const char *path, return readlink(rpath(path), buf, bufsz); } +static int local_close(void *opaque, int fd) +{ +return close(fd); +} + +static int local_closedir(void *opaque, DIR *dir) +{ +return closedir(dir); +} + static V9fsPosixFileOperations ops = { .lstat = local_lstat, .setuid = local_setuid, .readlink = local_readlink, +.close = local_close, +.closedir = local_closedir, }; V9fsPosixFileOperations *virtio_9p_init_local(const char *path) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index de5f6b0..784d399 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -117,6 +117,22 @@ static ssize_t posix_readlink(V9fsState *s, V9fsString *path, V9fsString *buf) return len; } +static int posix_close(V9fsState *s, int fd) +{ +return s-ops-close(s-ops-opaque, fd); +} + +static int posix_closedir(V9fsState *s, DIR *dir) +{ +return s-ops-closedir(s-ops-opaque, dir); +} + +static void v9fs_string_init(V9fsString *str) +{ +str-data = NULL; +str-size = 0; +} + static void v9fs_string_free(V9fsString *str) { qemu_free(str-data); @@ -144,6 +160,12 @@ static void v9fs_string_sprintf(V9fsString *str, const char *fmt, ...) str-size = err; } +static void v9fs_string_copy(V9fsString *lhs, V9fsString *rhs) +{ +v9fs_string_free(lhs); +v9fs_string_sprintf(lhs, %s, rhs-data); +} + static size_t v9fs_string_size(V9fsString *str) { return str-size; @@ -184,6 +206,31 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid) return f; } +static int free_fid(V9fsState *s, int32_t fid) +{ +V9fsFidState **fidpp, *fidp; + +for (fidpp = s-fid_list; *fidpp; fidpp = (*fidpp)-next) { +if ((*fidpp)-fid == fid) +break; +} + +if (*fidpp == NULL) +return -ENOENT; + +fidp = *fidpp; +*fidpp = fidp-next; + +if (fidp-fd != -1) +posix_close(s, fidp-fd); +if (fidp-dir) +posix_closedir(s, fidp-dir); +v9fs_string_free(fidp-path); +qemu_free(fidp); + +return 0; +} + #define P9_QID_TYPE_DIR0x80 #define P9_QID_TYPE_SYMLINK0x02 @@ -689,10 +736,178 @@ out: qemu_free(vs); } +typedef struct V9fsWalkState { +V9fsPDU *pdu; +size_t offset; +int32_t fid; +int32_t newfid; +int16_t nwnames; +int name_idx; +V9fsQID *qids; +V9fsFidState *fidp; +V9fsFidState *newfidp; +V9fsString path; +V9fsString *wnames; +struct stat stbuf; +} V9fsWalkState; + +static void v9fs_walk_complete(V9fsState *s, V9fsWalkState *vs, int err) +{ +complete_pdu(s, vs-pdu, err); + +if(vs-nwnames) { +for (vs-name_idx = 0; vs-name_idx vs-nwnames; vs-name_idx++) +v9fs_string_free(vs-wnames[vs-name_idx]); + +qemu_free(vs-wnames); +qemu_free(vs-qids); +} +} + +static void v9fs_walk_marshal(V9fsWalkState *vs) +{ +int i; +vs-offset = 7; +vs-offset += pdu_marshal(vs-pdu, vs-offset, w, vs-nwnames); + +for (i = 0; i vs-nwnames; i++) +vs-offset += pdu_marshal(vs-pdu, vs-offset, Q, vs-qids[i]); +} + +static void v9fs_walk_post_newfid_lstat(V9fsState *s, V9fsWalkState *vs, +int err) +{ +if (err == -1) { +free_fid(s, vs-newfid); +v9fs_string_free(vs-path); +err = -ENOENT; +goto out; +} + +stat_to_qid(vs-stbuf, vs-qids[vs-name_idx]); + +vs-name_idx++; +if (vs-name_idx vs-nwnames) { +v9fs_string_sprintf(vs-path, %s/%s, vs-newfidp-path.data, +vs-wnames[vs-name_idx].data); +v9fs_string_copy(vs-newfidp-path, vs-path); + +err = posix_lstat(s, vs-newfidp-path, vs-stbuf); +v9fs_walk_post_newfid_lstat(s, vs, err); +return; +} + +v9fs_string_free(vs-path); +v9fs_walk_marshal(vs); +err = vs-offset; +out: +v9fs_walk_complete(s, vs, err); +} + +static void v9fs_walk_post_oldfid_lstat(V9fsState *s, V9fsWalkState *vs, +int err) +{ +if (err == -1) { +v9fs_string_free(vs-path); +err = -ENOENT; +goto out; +} + +stat_to_qid(vs-stbuf, vs-qids[vs-name_idx]); +vs-name_idx++; +if (vs-name_idx vs-nwnames) { + +v9fs_string_sprintf(vs-path, %s/%s, +vs-fidp-path.data, vs-wnames[vs-name_idx].data); +
[Qemu-devel] [PATCH -v2 09/22] virtio-9p: Implement P9_TWRITE
From: Anthony Liguori aligu...@us.ibm.com This gets write to file to work Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Venkateswararao Jujjuri jv...@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p-local.c |7 hw/virtio-9p.c | 97 - 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index d77ecc2..c5d1db3 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -129,6 +129,12 @@ static off_t local_lseek(void *opaque, int fd, off_t offset, int whence) return lseek(fd, offset, whence); } +static ssize_t local_writev(void *opaque, int fd, const struct iovec *iov, + int iovcnt) +{ +return writev(fd, iov, iovcnt); +} + static V9fsPosixFileOperations ops = { .lstat = local_lstat, .setuid = local_setuid, @@ -143,6 +149,7 @@ static V9fsPosixFileOperations ops = { .seekdir = local_seekdir, .readv = local_readv, .lseek = local_lseek, +.writev = local_writev, }; V9fsPosixFileOperations *virtio_9p_init_local(const char *path) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index 3ac6255..bc26d66 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -168,6 +168,12 @@ static off_t posix_lseek(V9fsState *s, int fd, off_t offset, int whence) return s-ops-lseek(s-ops-opaque, fd, offset, whence); } +static int posix_writev(V9fsState *s, int fd, const struct iovec *iov, + int iovcnt) +{ +return s-ops-writev(s-ops-opaque, fd, iov, iovcnt); +} + static void v9fs_string_init(V9fsString *str) { str-data = NULL; @@ -1319,10 +1325,97 @@ out: complete_pdu(s, pdu, err); } +typedef struct V9fsWriteState { +V9fsPDU *pdu; +size_t offset; +int32_t fid; +int32_t len; +int32_t count; +int32_t total; +int64_t off; +V9fsFidState *fidp; +struct iovec iov[128]; /* FIXME: bad, bad, bad */ +struct iovec *sg; +int cnt; +} V9fsWriteState; + +static void v9fs_write_post_writev(V9fsState *s, V9fsWriteState *vs, + ssize_t err) +{ +BUG_ON(vs-len 0); +vs-total += vs-len; +vs-sg = adjust_sg(vs-sg, vs-len, vs-cnt); +if (vs-total vs-count vs-len 0) { +do { +if (0) +print_sg(vs-sg, vs-cnt); +vs-len = posix_writev(s, vs-fidp-fd, vs-sg, vs-cnt); +} while (vs-len == -1 errno == EINTR); +v9fs_write_post_writev(s, vs, err); +} +vs-offset += pdu_marshal(vs-pdu, vs-offset, d, vs-total); + +err = vs-offset; +complete_pdu(s, vs-pdu, err); +qemu_free(vs); +} + +static void v9fs_write_post_lseek(V9fsState *s, V9fsWriteState *vs, ssize_t err) +{ +BUG_ON(err == -1); + +vs-sg = cap_sg(vs-sg, vs-count, vs-cnt); + +if (vs-total vs-count) { +do { +if (0) +print_sg(vs-sg, vs-cnt); +vs-len = posix_writev(s, vs-fidp-fd, vs-sg, vs-cnt); +} while (vs-len == -1 errno == EINTR); + +v9fs_write_post_writev(s, vs, err); +return; +} + +complete_pdu(s, vs-pdu, err); +qemu_free(vs); +} + static void v9fs_write(V9fsState *s, V9fsPDU *pdu) { -if (debug_9p_pdu) -pprint_pdu(pdu); +V9fsWriteState *vs; +ssize_t err; + +vs = qemu_malloc(sizeof(*vs)); + +vs-pdu = pdu; +vs-offset = 7; +vs-sg = vs-iov; +vs-total = 0; +vs-len = 0; + +pdu_unmarshal(vs-pdu, vs-offset, dqdv, vs-fid, vs-off, vs-count, +vs-sg, vs-cnt); + +vs-fidp = lookup_fid(s, vs-fid); +if (vs-fidp == NULL) { +err = -EINVAL; +goto out; +} + +if (vs-fidp-fd == -1) { +err = -EINVAL; +goto out; +} + +err = posix_lseek(s, vs-fidp-fd, vs-off, SEEK_SET); + +v9fs_write_post_lseek(s, vs, err); +return; + +out: +complete_pdu(s, vs-pdu, err); +qemu_free(vs); } static void v9fs_create(V9fsState *s, V9fsPDU *pdu) -- 1.7.0.2.273.gc2413
[Qemu-devel] [PATCH -v2 06/22] virtio-9p: Implement P9_TOPEN
From: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Gautham R Shenoy e...@in.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p-local.c | 12 hw/virtio-9p.c | 145 +++-- 2 files changed, 151 insertions(+), 6 deletions(-) diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index 409f5b0..d8cb70d 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -88,12 +88,24 @@ static int local_closedir(void *opaque, DIR *dir) return closedir(dir); } +static int local_open(void *opaque, const char *path, int flags) +{ +return open(rpath(path), flags); +} + +static DIR *local_opendir(void *opaque, const char *path) +{ +return opendir(rpath(path)); +} + static V9fsPosixFileOperations ops = { .lstat = local_lstat, .setuid = local_setuid, .readlink = local_readlink, .close = local_close, .closedir = local_closedir, +.open = local_open, +.opendir = local_opendir, }; V9fsPosixFileOperations *virtio_9p_init_local(const char *path) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index 784d399..f1df0b9 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -127,6 +127,16 @@ static int posix_closedir(V9fsState *s, DIR *dir) return s-ops-closedir(s-ops-opaque, dir); } +static int posix_open(V9fsState *s, V9fsString *path, int flags) +{ +return s-ops-open(s-ops-opaque, path-data, flags); +} + +static DIR *posix_opendir(V9fsState *s, V9fsString *path) +{ +return s-ops-opendir(s-ops-opaque, path-data); +} + static void v9fs_string_init(V9fsString *str) { str-data = NULL; @@ -910,15 +920,138 @@ out: v9fs_walk_complete(s, vs, err); } -static void v9fs_clunk(V9fsState *s, V9fsPDU *pdu) +typedef struct V9fsOpenState { +V9fsPDU *pdu; +size_t offset; +int32_t fid; +int8_t mode; +V9fsFidState *fidp; +V9fsQID qid; +struct stat stbuf; + +} V9fsOpenState; + +enum { +Oread = 0x00, +Owrite = 0x01, +Ordwr = 0x02, +Oexec = 0x03, +Oexcl = 0x04, +Otrunc = 0x10, +Orexec = 0x20, +Orclose= 0x40, +Oappend= 0x80, +}; + +static int omode_to_uflags(int8_t mode) { -if (debug_9p_pdu) -pprint_pdu(pdu); +int ret = 0; + +switch (mode 3) { +case Oread: +ret = O_RDONLY; +break; +case Ordwr: +ret = O_RDWR; +break; +case Owrite: +ret = O_WRONLY; +break; +case Oexec: +ret = O_RDONLY; +break; +} + +if (mode Otrunc) +ret |= O_TRUNC; + +if (mode Oappend) +ret |= O_APPEND; + +if (mode Oexcl) +ret |= O_EXCL; + +return ret; +} + +static void v9fs_open_post_opendir(V9fsState *s, V9fsOpenState *vs, int err) +{ +if (vs-fidp-dir == NULL) { +err = -errno; +goto out; +} + +vs-offset += pdu_marshal(vs-pdu, vs-offset, Qd, vs-qid, 0); +err = vs-offset; +out: +complete_pdu(s, vs-pdu, err); +qemu_free(vs); + } -static void v9fs_open(V9fsState *s, V9fsPDU *pdu) -{if (debug_9p_pdu) -pprint_pdu(pdu); +static void v9fs_open_post_open(V9fsState *s, V9fsOpenState *vs, int err) +{ +if (vs-fidp-fd == -1) { +err = -errno; +goto out; +} + +vs-offset += pdu_marshal(vs-pdu, vs-offset, Qd, vs-qid, 0); +err = vs-offset; +out: +complete_pdu(s, vs-pdu, err); +qemu_free(vs); +} + +static void v9fs_open_post_lstat(V9fsState *s, V9fsOpenState *vs, int err) +{ +BUG_ON(err == -1); + +stat_to_qid(vs-stbuf, vs-qid); + +if (S_ISDIR(vs-stbuf.st_mode)) { +vs-fidp-dir = posix_opendir(s, vs-fidp-path); +v9fs_open_post_opendir(s, vs, err); +} else { +vs-fidp-fd = posix_open(s, vs-fidp-path, +omode_to_uflags(vs-mode)); +v9fs_open_post_open(s, vs, err); +} + +} + +static void v9fs_open(V9fsState *s, V9fsPDU *pdu) +{ + +V9fsOpenState *vs; +ssize_t err = 0; + + +vs = qemu_malloc(sizeof(*vs)); +vs-pdu = pdu; +vs-offset = 7; + +pdu_unmarshal(vs-pdu, vs-offset, db, vs-fid, vs-mode); + +vs-fidp = lookup_fid(s, vs-fid); +if (vs-fidp == NULL) { +err = -ENOENT; +goto out; +} + +err = posix_lstat(s, vs-fidp-path, vs-stbuf); + +v9fs_open_post_lstat(s, vs, err); +return; +out: +complete_pdu(s, pdu, err); +qemu_free(vs); +} + +static void v9fs_clunk(V9fsState *s, V9fsPDU *pdu) +{ +if (debug_9p_pdu) + pprint_pdu(pdu); } static void v9fs_read(V9fsState *s, V9fsPDU *pdu) -- 1.7.0.2.273.gc2413
[Qemu-devel] [PATCH -v2 07/22] virtio-9p: Implement P9_TREAD
From: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Venkateswararao Jujjuri jv...@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p-local.c | 37 hw/virtio-9p.c | 253 +- 2 files changed, 287 insertions(+), 3 deletions(-) diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index d8cb70d..d77ecc2 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -98,6 +98,37 @@ static DIR *local_opendir(void *opaque, const char *path) return opendir(rpath(path)); } +static void local_rewinddir(void *opaque, DIR *dir) +{ +return rewinddir(dir); +} + +static off_t local_telldir(void *opaque, DIR *dir) +{ +return telldir(dir); +} + +static struct dirent *local_readdir(void *opaque, DIR *dir) +{ +return readdir(dir); +} + +static void local_seekdir(void *opaque, DIR *dir, off_t off) +{ +return seekdir(dir, off); +} + +static ssize_t local_readv(void *opaque, int fd, const struct iovec *iov, + int iovcnt) +{ +return readv(fd, iov, iovcnt); +} + +static off_t local_lseek(void *opaque, int fd, off_t offset, int whence) +{ +return lseek(fd, offset, whence); +} + static V9fsPosixFileOperations ops = { .lstat = local_lstat, .setuid = local_setuid, @@ -106,6 +137,12 @@ static V9fsPosixFileOperations ops = { .closedir = local_closedir, .open = local_open, .opendir = local_opendir, +.rewinddir = local_rewinddir, +.telldir = local_telldir, +.readdir = local_readdir, +.seekdir = local_seekdir, +.readv = local_readv, +.lseek = local_lseek, }; V9fsPosixFileOperations *virtio_9p_init_local(const char *path) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index f1df0b9..9bc0a57 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -137,6 +137,37 @@ static DIR *posix_opendir(V9fsState *s, V9fsString *path) return s-ops-opendir(s-ops-opaque, path-data); } +static void posix_rewinddir(V9fsState *s, DIR *dir) +{ +return s-ops-rewinddir(s-ops-opaque, dir); +} + +static off_t posix_telldir(V9fsState *s, DIR *dir) +{ +return s-ops-telldir(s-ops-opaque, dir); +} + +static struct dirent *posix_readdir(V9fsState *s, DIR *dir) +{ +return s-ops-readdir(s-ops-opaque, dir); +} + +static void posix_seekdir(V9fsState *s, DIR *dir, off_t off) +{ +return s-ops-seekdir(s-ops-opaque, dir, off); +} + +static int posix_readv(V9fsState *s, int fd, const struct iovec *iov, + int iovcnt) +{ +return s-ops-readv(s-ops-opaque, fd, iov, iovcnt); +} + +static off_t posix_lseek(V9fsState *s, int fd, off_t offset, int whence) +{ +return s-ops-lseek(s-ops-opaque, fd, offset, whence); +} + static void v9fs_string_init(V9fsString *str) { str-data = NULL; @@ -1048,14 +1079,230 @@ out: qemu_free(vs); } -static void v9fs_clunk(V9fsState *s, V9fsPDU *pdu) +static struct iovec *adjust_sg(struct iovec *sg, int len, int *iovcnt) { -if (debug_9p_pdu) - pprint_pdu(pdu); +while (len *iovcnt) { +if (len sg-iov_len) { +sg-iov_len -= len; +sg-iov_base += len; +len = 0; +} else { +len -= sg-iov_len; +sg++; +*iovcnt -= 1; +} +} + +return sg; +} + +static struct iovec *cap_sg(struct iovec *sg, int cap, int *cnt) +{ +int i; +int total = 0; + +for (i = 0; i *cnt; i++) { +if ((total + sg[i].iov_len) cap) { +sg[i].iov_len -= ((total + sg[i].iov_len) - cap); +i++; +break; +} +total += sg[i].iov_len; +} + +*cnt = i; + +return sg; +} + +static void print_sg(struct iovec *sg, int cnt) +{ +int i; + +printf(sg[%d]: {, cnt); +for (i = 0; i cnt; i++) { +if (i) +printf(, ); +printf((%p, %zd), sg[i].iov_base, sg[i].iov_len); +} +printf(}\n); +} + +typedef struct V9fsReadState { +V9fsPDU *pdu; +size_t offset; +int32_t fid; +int32_t count; +int32_t total; +int64_t off; +V9fsFidState *fidp; +struct iovec iov[128]; /* FIXME: bad, bad, bad */ +struct iovec *sg; +off_t dir_pos; +struct dirent *dent; +struct stat stbuf; +V9fsString name; +V9fsStat v9stat; +int32_t len; +int32_t cnt; +int32_t max_count; +} V9fsReadState; + +static void v9fs_read_post_readdir(V9fsState *, V9fsReadState *, ssize_t ); + +static void v9fs_read_post_seekdir(V9fsState *s, V9fsReadState *vs, ssize_t err) +{ +v9fs_stat_free(vs-v9stat); +v9fs_string_free(vs-name); +vs-offset += pdu_marshal(vs-pdu, vs-offset, d, vs-count); +vs-offset += vs-count; +err = vs-offset; +complete_pdu(s, vs-pdu, err); +qemu_free(vs); +return; +} + +static void v9fs_read_post_dir_lstat(V9fsState *s, V9fsReadState *vs, +
[Qemu-devel] [PATCH -v2 11/22] virtio-9p: Implement P9_TWSTAT
From: Anthony Liguori aligu...@us.ibm.com This gets file and directory creation to work [jv...@linux.vnet.ibm.com: strdup to qemu_strdup conversion] Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Gautham R Shenoy e...@in.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p-local.c | 42 + hw/virtio-9p.c | 236 +- 2 files changed, 274 insertions(+), 4 deletions(-) diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index cdae5c0..829e79a 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -207,6 +207,44 @@ static int local_link(void *opaque, const char *oldpath, const char *newpath) return err; } +static int local_truncate(void *opaque, const char *path, off_t size) +{ +return truncate(rpath(path), size); +} + +static int local_rename(void *opaque, const char *oldpath, + const char *newpath) +{ +char *tmp; +int err; + +tmp = qemu_strdup(rpath(oldpath)); +if (tmp == NULL) + return -1; + +err = rename(tmp, rpath(newpath)); +if (err == -1) { + int serrno = errno; + qemu_free(tmp); + errno = serrno; +} else + qemu_free(tmp); + +return err; + +} + +static int local_chown(void *opaque, const char *path, uid_t uid, gid_t gid) +{ +return chown(rpath(path), uid, gid); +} + +static int local_utime(void *opaque, const char *path, + const struct utimbuf *buf) +{ +return utime(rpath(path), buf); +} + static V9fsPosixFileOperations ops = { .lstat = local_lstat, .setuid = local_setuid, @@ -230,6 +268,10 @@ static V9fsPosixFileOperations ops = { .open2 = local_open2, .symlink = local_symlink, .link = local_link, +.truncate = local_truncate, +.rename = local_rename, +.chown = local_chown, +.utime = local_utime, }; V9fsPosixFileOperations *virtio_9p_init_local(const char *path) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index 067cc85..c8995a3 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -215,6 +215,28 @@ static int posix_link(V9fsState *s, V9fsString *oldpath, V9fsString *newpath) return s-ops-link(s-ops-opaque, oldpath-data, newpath-data); } +static int posix_truncate(V9fsState *s, V9fsString *path, off_t size) +{ +return s-ops-truncate(s-ops-opaque, path-data, size); +} + +static int posix_rename(V9fsState *s, V9fsString *oldpath, + V9fsString *newpath) +{ +return s-ops-rename(s-ops-opaque, oldpath-data, newpath-data); +} + +static int posix_chown(V9fsState *s, V9fsString *path, uid_t uid, gid_t gid) +{ +return s-ops-chown(s-ops-opaque, path-data, uid, gid); +} + +static int posix_utime(V9fsState *s, V9fsString *path, + const struct utimbuf *buf) +{ +return s-ops-utime(s-ops-opaque, path-data, buf); +} + static void v9fs_string_init(V9fsString *str) { str-data = NULL; @@ -398,7 +420,8 @@ static size_t pdu_unpack(void *dst, V9fsPDU *pdu, size_t offset, size_t size) } /* FIXME i can do this with less variables */ -static size_t pdu_pack(V9fsPDU *pdu, size_t offset, const void *src, size_t size) +static size_t pdu_pack(V9fsPDU *pdu, size_t offset, const void *src, + size_t size) { struct iovec *sg = pdu-elem.in_sg; size_t off = 0; @@ -1613,7 +1636,8 @@ static void v9fs_create_post_lstat(V9fsState *s, V9fsCreateState *vs, int err) uint32_t major, minor; mode_t nmode = 0; -if (sscanf(vs-extension.data, %c %u %u, ctype, major, minor) != 3) { +if (sscanf(vs-extension.data, %c %u %u, ctype, major, + minor) != 3) { err = -errno; v9fs_post_create(s, vs, err); } @@ -1698,10 +1722,214 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu) pprint_pdu(pdu); } +static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension) +{ +mode_t ret; + +ret = mode 0777; +if (mode P9_STAT_MODE_DIR) +ret |= S_IFDIR; + +if (dotu) { +if (mode P9_STAT_MODE_SYMLINK) +ret |= S_IFLNK; +if (mode P9_STAT_MODE_SOCKET) +ret |= S_IFSOCK; +if (mode P9_STAT_MODE_NAMED_PIPE) +ret |= S_IFIFO; +if (mode P9_STAT_MODE_DEVICE) { +if (extension extension-data[0] == 'c') +ret |= S_IFCHR; +else +ret |= S_IFBLK; +} +} + +if (!(ret~0777)) +ret |= S_IFREG; + +if (mode P9_STAT_MODE_SETUID) +ret |= S_ISUID; +if (mode P9_STAT_MODE_SETGID) +ret |= S_ISGID; +if (mode P9_STAT_MODE_SETVTX) +ret |= S_ISVTX; + +return ret; +} + +typedef struct V9fsWstatState +{ +V9fsPDU *pdu; +size_t offset; +int32_t fid; +int16_t unused; +V9fsStat v9stat; +
[Qemu-devel] [PATCH -v2 14/22] virtio-9p: Add multiple mount point support
This patch add a mount tag name in 9p config space. This tag should uniquely identify the mount point and should be used in the mount command as the device name Qemu command line for specifying 9p share directory now becomes -device virtio-9p-pci,share_path=/mnt/,mount_tag=v_mnt -device virtio-9p-pci,share_path=/tmp/,mount_tag=v_tmp NOTE: We now limit tag name to 32 characters because of virtio config space limitation. Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/9p.h | 26 ++ hw/virtio-9p-local.c | 101 - hw/virtio-9p.c | 206 - hw/virtio-9p.h | 141 --- hw/virtio-pci.c |8 +- hw/virtio.h |3 +- 6 files changed, 296 insertions(+), 189 deletions(-) create mode 100644 hw/9p.h diff --git a/hw/9p.h b/hw/9p.h new file mode 100644 index 000..f0ff45b --- /dev/null +++ b/hw/9p.h @@ -0,0 +1,26 @@ +/* + * Virtio 9p + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_9P_H +#define QEMU_9P_H + +#include stdbool.h + +typedef struct V9fsConf +{ +char *share_path; +/* tag name for the device */ +char *tag; +} V9fsConf; + +#endif diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index dca6175..4dd6b22 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -22,22 +22,20 @@ #include sys/socket.h #include sys/un.h -static const char *base_path; - -static const char *rpath(const char *path) +static const char *rpath(V9fsState *s, const char *path) { /* FIXME: so wrong... */ static char buffer[4096]; -snprintf(buffer, sizeof(buffer), %s/%s, base_path, path); +snprintf(buffer, sizeof(buffer), %s/%s, s-fs_root, path); return buffer; } -static int local_lstat(void *opaque, const char *path, struct stat *stbuf) +static int local_lstat(V9fsState *s, const char *path, struct stat *stbuf) { -return lstat(rpath(path), stbuf); +return lstat(rpath(s, path), stbuf); } -static int local_setuid(void *opaque, uid_t uid) +static int local_setuid(V9fsState *s, uid_t uid) { struct passwd *pw; gid_t groups[33]; @@ -72,86 +70,86 @@ static int local_setuid(void *opaque, uid_t uid) return 0; } -static ssize_t local_readlink(void *opaque, const char *path, -char *buf, size_t bufsz) +static ssize_t local_readlink(V9fsState *s, const char *path, + char *buf, size_t bufsz) { -return readlink(rpath(path), buf, bufsz); +return readlink(rpath(s, path), buf, bufsz); } -static int local_close(void *opaque, int fd) +static int local_close(V9fsState *s, int fd) { return close(fd); } -static int local_closedir(void *opaque, DIR *dir) +static int local_closedir(V9fsState *s, DIR *dir) { return closedir(dir); } -static int local_open(void *opaque, const char *path, int flags) +static int local_open(V9fsState *s, const char *path, int flags) { -return open(rpath(path), flags); +return open(rpath(s, path), flags); } -static DIR *local_opendir(void *opaque, const char *path) +static DIR *local_opendir(V9fsState *s, const char *path) { -return opendir(rpath(path)); +return opendir(rpath(s, path)); } -static void local_rewinddir(void *opaque, DIR *dir) +static void local_rewinddir(V9fsState *s, DIR *dir) { return rewinddir(dir); } -static off_t local_telldir(void *opaque, DIR *dir) +static off_t local_telldir(V9fsState *s, DIR *dir) { return telldir(dir); } -static struct dirent *local_readdir(void *opaque, DIR *dir) +static struct dirent *local_readdir(V9fsState *s, DIR *dir) { return readdir(dir); } -static void local_seekdir(void *opaque, DIR *dir, off_t off) +static void local_seekdir(V9fsState *s, DIR *dir, off_t off) { return seekdir(dir, off); } -static ssize_t local_readv(void *opaque, int fd, const struct iovec *iov, +static ssize_t local_readv(V9fsState *s, int fd, const struct iovec *iov, int iovcnt) { return readv(fd, iov, iovcnt); } -static off_t local_lseek(void *opaque, int fd, off_t offset, int whence) +static off_t local_lseek(V9fsState *s, int fd, off_t offset, int whence) { return lseek(fd, offset, whence); } -static ssize_t local_writev(void *opaque, int fd, const struct iovec *iov, +static ssize_t local_writev(V9fsState *s, int fd, const struct iovec *iov, int iovcnt) { return writev(fd, iov, iovcnt); } -static int local_chmod(void *opaque, const char *path, mode_t mode) +static int local_chmod(V9fsState *s, const char *path, mode_t mode) { -return chmod(rpath(path), mode); +return chmod(rpath(s, path), mode); } -static
[Qemu-devel] [PATCH -v2 18/22] virtio-9p: Fix sg usage in the code
sg list contain more than one element and we need to use the right element when we are doing the marshaling and unmarshaling of data. This patch also abstract out the pack/unpack interface and make sure we use one function for doing both. Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p-debug.c | 83 + hw/virtio-9p.c | 57 -- hw/virtio-9p.h |9 + 3 files changed, 98 insertions(+), 51 deletions(-) diff --git a/hw/virtio-9p-debug.c b/hw/virtio-9p-debug.c index 9230659..ee222db 100644 --- a/hw/virtio-9p-debug.c +++ b/hw/virtio-9p-debug.c @@ -29,92 +29,121 @@ static struct iovec *get_sg(V9fsPDU *pdu, int rx) return pdu-elem.out_sg; } +static int get_sg_count(V9fsPDU *pdu, int rx) +{ +if (rx) +return pdu-elem.in_num; +return pdu-elem.out_num; + +} + static void pprint_int8(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) { -struct iovec *sg = get_sg(pdu, rx); +size_t copied; +int count = get_sg_count(pdu, rx); size_t offset = *offsetp; +struct iovec *sg = get_sg(pdu, rx); int8_t value; -BUG_ON((offset + sizeof(value)) sg[0].iov_len); +copied = do_pdu_unpack(value, sg, count, offset, sizeof(value)); -memcpy(value, sg[0].iov_base + offset, sizeof(value)); +BUG_ON(copied != sizeof(value)); offset += sizeof(value); - fprintf(llogfile, %s=0x%x, name, value); - *offsetp = offset; } static void pprint_int16(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) { +size_t copied; +int count = get_sg_count(pdu, rx); struct iovec *sg = get_sg(pdu, rx); size_t offset = *offsetp; int16_t value; -BUG_ON((offset + sizeof(value)) sg[0].iov_len); -memcpy(value, sg[0].iov_base + offset, sizeof(value)); -offset += sizeof(value); +copied = do_pdu_unpack(value, sg, count, offset, sizeof(value)); +BUG_ON(copied != sizeof(value)); +offset += sizeof(value); fprintf(llogfile, %s=0x%x, name, value); - *offsetp = offset; } static void pprint_int32(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) { +size_t copied; +int count = get_sg_count(pdu, rx); struct iovec *sg = get_sg(pdu, rx); size_t offset = *offsetp; int32_t value; -BUG_ON((offset + sizeof(value)) sg[0].iov_len); -memcpy(value, sg[0].iov_base + offset, sizeof(value)); -offset += sizeof(value); +copied = do_pdu_unpack(value, sg, count, offset, sizeof(value)); +BUG_ON(copied != sizeof(value)); +offset += sizeof(value); fprintf(llogfile, %s=0x%x, name, value); - *offsetp = offset; } static void pprint_int64(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) { +size_t copied; +int count = get_sg_count(pdu, rx); struct iovec *sg = get_sg(pdu, rx); size_t offset = *offsetp; int64_t value; -BUG_ON((offset + sizeof(value)) sg[0].iov_len); -memcpy(value, sg[0].iov_base + offset, sizeof(value)); -offset += sizeof(value); +copied = do_pdu_unpack(value, sg, count, offset, sizeof(value)); +BUG_ON(copied != sizeof(value)); +offset += sizeof(value); fprintf(llogfile, %s=0x% PRIx64, name, value); - *offsetp = offset; } static void pprint_str(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) { +int sg_count = get_sg_count(pdu, rx); struct iovec *sg = get_sg(pdu, rx); size_t offset = *offsetp; -int16_t size; +uint16_t tmp_size, size; size_t result; +size_t copied = 0; +int i = 0; -BUG_ON((offset + 2) sg[0].iov_len); -memcpy(size, sg[0].iov_base + offset, 2); -offset += 2; +/* get the size */ +copied = do_pdu_unpack(tmp_size, sg, sg_count, offset, sizeof(tmp_size)); +BUG_ON(copied != sizeof(tmp_size)); +size = le16_to_cpupu(tmp_size); +offset += copied; -BUG_ON((offset + size) sg[0].iov_len); fprintf(llogfile, %s=, name); -result = fwrite(sg[0].iov_base + offset, 1, size, llogfile); -BUG_ON(result != size); -offset += size; - -*offsetp = offset; +for (i = 0; size i sg_count; i++) { +size_t len; +if (offset = sg[i].iov_len) { +/* skip this sg */ +offset -= sg[i].iov_len; +continue; +} else { +len = MIN(sg[i].iov_len - offset, size); +result = fwrite(sg[i].iov_base + offset, 1, len, llogfile); +BUG_ON(result != len); +size -= len; +copied += len; +if (size) { +offset = 0; +continue; +} +} +} +*offsetp += copied; } static void pprint_qid(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index
[Qemu-devel] [PATCH -v2 12/22] virtio-9p: Implement P9_TREMOVE
From: Anthony Liguori aligu...@us.ibm.com This gets file deletion to work [mo...@in.ibm.com: Fix truncate to use the relative path] Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Gautham R Shenoy e...@in.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p-local.c |7 ++ hw/virtio-9p.c | 54 - 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index 829e79a..dca6175 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -245,6 +245,12 @@ static int local_utime(void *opaque, const char *path, return utime(rpath(path), buf); } +static int local_remove(void *opaque, const char *path) +{ +return remove(rpath(path)); +} + + static V9fsPosixFileOperations ops = { .lstat = local_lstat, .setuid = local_setuid, @@ -272,6 +278,7 @@ static V9fsPosixFileOperations ops = { .rename = local_rename, .chown = local_chown, .utime = local_utime, +.remove = local_remove, }; V9fsPosixFileOperations *virtio_9p_init_local(const char *path) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index c8995a3..4478e57 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -237,6 +237,11 @@ static int posix_utime(V9fsState *s, V9fsString *path, return s-ops-utime(s-ops-opaque, path-data, buf); } +static int posix_remove(V9fsState *s, V9fsString *path) +{ +return s-ops-remove(s-ops-opaque, path-data); +} + static void v9fs_string_init(V9fsString *str) { str-data = NULL; @@ -1716,10 +1721,55 @@ static void v9fs_flush(V9fsState *s, V9fsPDU *pdu) pprint_pdu(pdu); } +typedef struct V9fsRemoveState { +V9fsPDU *pdu; +size_t offset; +int32_t fid; +V9fsFidState *fidp; +} V9fsRemoveState; + +static void v9fs_remove_post_remove(V9fsState *s, V9fsRemoveState *vs, +int err) +{ +if (err) { +err = -errno; +goto out; +} + +err = free_fid(s, vs-fid); +if (err 0) +goto out; + +err = vs-offset; +out: +complete_pdu(s, vs-pdu, err); +qemu_free(vs); +} + static void v9fs_remove(V9fsState *s, V9fsPDU *pdu) { -if (debug_9p_pdu) -pprint_pdu(pdu); +V9fsRemoveState *vs; +int err = 0; + +vs = qemu_malloc(sizeof(*vs)); +vs-pdu = pdu; +vs-offset = 7; + +pdu_unmarshal(vs-pdu, vs-offset, d, vs-fid); + +vs-fidp = lookup_fid(s, vs-fid); +if (vs-fidp == NULL) { +err = -EINVAL; +goto out; +} + +err = posix_remove(s, vs-fidp-path); +v9fs_remove_post_remove(s, vs, err); +return; + +out: +complete_pdu(s, pdu, err); +qemu_free(vs); } static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension) -- 1.7.0.2.273.gc2413
[Qemu-devel] [PATCH -v2 17/22] Implement sync support in 9p server
From: M. Mohan Kumar mo...@in.ibm.com When wstat is called with stat field values set to 'don't touch' pattern, 9p Server interprets it as a request to guarantee that the contents of the associated file are committed to stable storage before the Rwstat message is returned. Implement this feature in the server side. [jv...@linux.vnet.ibm.com: Proper error handling and state friendliness] Signed-off-by: M. Mohan Kumar mo...@in.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p-local.c |6 ++ hw/virtio-9p.c | 45 + hw/virtio-9p.h |1 + 3 files changed, 52 insertions(+), 0 deletions(-) diff --git a/hw/virtio-9p-local.c b/hw/virtio-9p-local.c index 4dd6b22..4584bf6 100644 --- a/hw/virtio-9p-local.c +++ b/hw/virtio-9p-local.c @@ -249,6 +249,11 @@ static int local_remove(V9fsState *s, const char *path) } +static int local_fsync(V9fsState *s, int fd) +{ +return fsync(fd); +} + static V9fsPosixFileOperations ops = { .lstat = local_lstat, .setuid = local_setuid, @@ -277,6 +282,7 @@ static V9fsPosixFileOperations ops = { .chown = local_chown, .utime = local_utime, .remove = local_remove, +.fsync = local_fsync, }; V9fsPosixFileOperations *virtio_9p_init_local(const char *path) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index 3ddf2b9..3a5b3f0 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -172,6 +172,11 @@ static int posix_remove(V9fsState *s, V9fsString *path) return s-ops-remove(s, path-data); } +static int posix_fsync(V9fsState *s, int fd) +{ +return s-ops-fsync(s, fd); +} + static void v9fs_string_init(V9fsString *str) { str-data = NULL; @@ -1889,6 +1894,39 @@ out: qemu_free(vs); } +static void v9fs_wstat_post_fsync(V9fsState *s, V9fsWstatState *vs, int err) +{ +if (err == -1) { +err = -errno; +} +v9fs_stat_free(vs-v9stat); +complete_pdu(s, vs-pdu, err); +qemu_free(vs); +} + +static int donttouch_stat(V9fsStat *stat) +{ +if (stat-type == -1 + stat-dev == -1 + stat-qid.type == -1 + stat-qid.version == -1 + stat-qid.path == -1 + stat-mode == -1 + stat-atime == -1 + stat-mtime == -1 + stat-length == -1 + !stat-name.size + !stat-uid.size + !stat-gid.size + !stat-muid.size + stat-n_uid == -1 + stat-n_gid == -1 + stat-n_muid == -1) + return 1; + else + return 0; +} + static void v9fs_wstat(V9fsState *s, V9fsPDU *pdu) { V9fsWstatState *vs; @@ -1906,6 +1944,13 @@ static void v9fs_wstat(V9fsState *s, V9fsPDU *pdu) goto out; } +/* do we need to sync the file? */ +if (donttouch_stat(vs-v9stat)) { +err = posix_fsync(s, vs-fidp-fd); +v9fs_wstat_post_fsync(s, vs, err); +return; +} + if (vs-v9stat.mode != -1) { if (vs-v9stat.mode P9_STAT_MODE_DIR vs-fidp-dir == NULL) { err = -EIO; diff --git a/hw/virtio-9p.h b/hw/virtio-9p.h index 3fc88a4..da0aa64 100644 --- a/hw/virtio-9p.h +++ b/hw/virtio-9p.h @@ -182,6 +182,7 @@ typedef struct V9fsPosixFileOpertions int (*fstat)(V9fsState *, int, struct stat *); int (*rename)(V9fsState *, const char *, const char *); int (*truncate)(V9fsState *, const char *, off_t); +int (*fsync)(V9fsState *, int); void *opaque; } V9fsPosixFileOperations; -- 1.7.0.2.273.gc2413
[Qemu-devel] [PATCH -v2 21/22] virtio-9p: Remove unnecessary definition of fid
We already have fid as a part of V9fsFidState so use that instead of defining another variable Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com Reviewed-by: Venkateswararao Jujjuri jv...@linux.vnet.ibm.com --- hw/virtio-9p.c | 62 ++- 1 files changed, 29 insertions(+), 33 deletions(-) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index 3ce26ca..c8ab6b6 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -770,7 +770,6 @@ out: typedef struct V9fsStatState { V9fsPDU *pdu; size_t offset; -int32_t fid; V9fsStat v9stat; V9fsFidState *fidp; struct stat stbuf; @@ -798,6 +797,7 @@ out: static void v9fs_stat(V9fsState *s, V9fsPDU *pdu) { +int32_t fid; V9fsStatState *vs; ssize_t err = 0; @@ -807,9 +807,9 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu) memset(vs-v9stat, 0, sizeof(vs-v9stat)); -pdu_unmarshal(vs-pdu, vs-offset, d, vs-fid); +pdu_unmarshal(vs-pdu, vs-offset, d, fid); -vs-fidp = lookup_fid(s, vs-fid); +vs-fidp = lookup_fid(s, fid); if (vs-fidp == NULL) { err = -ENOENT; goto out; @@ -828,8 +828,6 @@ out: typedef struct V9fsWalkState { V9fsPDU *pdu; size_t offset; -int32_t fid; -int32_t newfid; int16_t nwnames; int name_idx; V9fsQID *qids; @@ -867,7 +865,7 @@ static void v9fs_walk_post_newfid_lstat(V9fsState *s, V9fsWalkState *vs, int err) { if (err == -1) { -free_fid(s, vs-newfid); +free_fid(s, vs-newfidp-fid); v9fs_string_free(vs-path); err = -ENOENT; goto out; @@ -924,6 +922,7 @@ out: static void v9fs_walk(V9fsState *s, V9fsPDU *pdu) { +int32_t fid, newfid; V9fsWalkState *vs; int err = 0; int i; @@ -934,8 +933,8 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu) vs-qids = NULL; vs-offset = 7; -vs-offset += pdu_unmarshal(vs-pdu, vs-offset, ddw, vs-fid, -vs-newfid, vs-nwnames); +vs-offset += pdu_unmarshal(vs-pdu, vs-offset, ddw, fid, +newfid, vs-nwnames); if(vs-nwnames) { vs-wnames = qemu_mallocz(sizeof(vs-wnames[0]) * vs-nwnames); @@ -948,14 +947,14 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu) } } -vs-fidp = lookup_fid(s, vs-fid); +vs-fidp = lookup_fid(s, fid); if (vs-fidp == NULL) { err = -ENOENT; goto out; } /* FIXME: is this really valid? */ -if (vs-fid == vs-newfid) { +if (fid == newfid) { v9fs_string_init(vs-path); vs-name_idx = 0; @@ -969,7 +968,7 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu) return; } } else { -vs-newfidp = alloc_fid(s, vs-newfid); +vs-newfidp = alloc_fid(s, newfid); if (vs-newfidp == NULL) { err = -EINVAL; goto out; @@ -1000,7 +999,6 @@ out: typedef struct V9fsOpenState { V9fsPDU *pdu; size_t offset; -int32_t fid; int8_t mode; V9fsFidState *fidp; V9fsQID qid; @@ -1105,7 +1103,7 @@ out: static void v9fs_open(V9fsState *s, V9fsPDU *pdu) { - +int32_t fid; V9fsOpenState *vs; ssize_t err = 0; @@ -1114,9 +1112,9 @@ static void v9fs_open(V9fsState *s, V9fsPDU *pdu) vs-pdu = pdu; vs-offset = 7; -pdu_unmarshal(vs-pdu, vs-offset, db, vs-fid, vs-mode); +pdu_unmarshal(vs-pdu, vs-offset, db, fid, vs-mode); -vs-fidp = lookup_fid(s, vs-fid); +vs-fidp = lookup_fid(s, fid); if (vs-fidp == NULL) { err = -ENOENT; goto out; @@ -1183,7 +1181,6 @@ static void print_sg(struct iovec *sg, int cnt) typedef struct V9fsReadState { V9fsPDU *pdu; size_t offset; -int32_t fid; int32_t count; int32_t total; int64_t off; @@ -1346,6 +1343,7 @@ out: static void v9fs_read(V9fsState *s, V9fsPDU *pdu) { +int32_t fid; V9fsReadState *vs; ssize_t err = 0; @@ -1356,9 +1354,9 @@ static void v9fs_read(V9fsState *s, V9fsPDU *pdu) vs-len = 0; vs-count = 0; -pdu_unmarshal(vs-pdu, vs-offset, dqd, vs-fid, vs-off, vs-count); +pdu_unmarshal(vs-pdu, vs-offset, dqd, fid, vs-off, vs-count); -vs-fidp = lookup_fid(s, vs-fid); +vs-fidp = lookup_fid(s, fid); if (vs-fidp == NULL) { err = -EINVAL; goto out; @@ -1407,7 +1405,6 @@ out: typedef struct V9fsWriteState { V9fsPDU *pdu; size_t offset; -int32_t fid; int32_t len; int32_t count; int32_t total; @@ -1476,6 +1473,7 @@ out: static void v9fs_write(V9fsState *s, V9fsPDU *pdu) { +int32_t fid; V9fsWriteState *vs; ssize_t err; @@ -1487,10 +1485,10 @@ static void v9fs_write(V9fsState *s, V9fsPDU *pdu) vs-total = 0; vs-len = 0; -pdu_unmarshal(vs-pdu, vs-offset, dqdv, vs-fid, vs-off,
[Qemu-devel] [PATCH -v2 22/22] virtio-9p: Update existing fid path on rename
We need to make sure that we update the path component of the existing fid's when we rename a file. The client is not expected to clunk these fids pointing to the old name. If we don't update any operation on the old unopened fid will point to the old name and will fail Add BUG_ON to make sure when we clone a fid, we don't have open descriptor attached to the fid. We also need to make sure that when we open a fid, the specified fid should not already be opened. Capture the case by adding a BUG_ON Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p.c | 39 +++ 1 files changed, 39 insertions(+), 0 deletions(-) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index c8ab6b6..9aa4b72 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -955,6 +955,9 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu) /* FIXME: is this really valid? */ if (fid == newfid) { + +BUG_ON(vs-fidp-fd != -1); +BUG_ON(vs-fidp-dir); v9fs_string_init(vs-path); vs-name_idx = 0; @@ -1120,6 +1123,9 @@ static void v9fs_open(V9fsState *s, V9fsPDU *pdu) goto out; } +BUG_ON(vs-fidp-fd != -1); +BUG_ON(vs-fidp-dir); + err = posix_lstat(s, vs-fidp-path, vs-stbuf); v9fs_open_post_lstat(s, vs, err); @@ -1877,8 +1883,19 @@ out: qemu_free(vs); } +static void v9fs_fix_path(V9fsString *dst, V9fsString *src, int len) +{ +V9fsString str; +v9fs_string_init(str); +v9fs_string_copy(str, dst); +v9fs_string_sprintf(dst, %s%s, src-data, str.data+len); +v9fs_string_free(str); +} + + static void v9fs_wstat_post_chown(V9fsState *s, V9fsWstatState *vs, int err) { +V9fsFidState *fidp; if (err 0) { goto out; } @@ -1905,6 +1922,28 @@ static void v9fs_wstat_post_chown(V9fsState *s, V9fsWstatState *vs, int err) if (strcmp(new_name, vs-fidp-path.data) != 0) { if (posix_rename(s, vs-fidp-path, vs-nname)) { err = -errno; +} else { +/* + * Fixup fid's pointing to the old name to + * start pointing to the new name + */ +for (fidp = s-fid_list; fidp; fidp = fidp-next) { + +if (vs-fidp == fidp) { +/* + * we replace name of this fid towards the end + * so that our below strcmp will work + */ +continue; +} +if (!strncmp(vs-fidp-path.data, fidp-path.data, + strlen(vs-fidp-path.data))) { +/* replace the name */ +v9fs_fix_path(fidp-path, vs-nname, + strlen(vs-fidp-path.data)); +} +} +v9fs_string_copy(vs-fidp-path, vs-nname); } } } -- 1.7.0.2.273.gc2413
[Qemu-devel] [PATCH -v2 15/22] virtio-9p: Use little endian format on virtio
We need to use platform independent data format as part of protocol data. 9P uses little endian format on wire Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p.c | 34 +++--- 1 files changed, 23 insertions(+), 11 deletions(-) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index e095916..e8a9eeb 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -426,23 +426,32 @@ static size_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) for (i = 0; fmt[i]; i++) { switch (fmt[i]) { case 'b': { - int8_t *valp = va_arg(ap, int8_t *); + uint8_t *valp = va_arg(ap, uint8_t *); offset += pdu_unpack(valp, pdu, offset, sizeof(*valp)); break; } case 'w': { - int16_t *valp = va_arg(ap, int16_t *); - offset += pdu_unpack(valp, pdu, offset, sizeof(*valp)); + uint16_t val, *valp; + valp = va_arg(ap, uint16_t *); + val = le16_to_cpupu(valp); + offset += pdu_unpack(val, pdu, offset, sizeof(val)); + *valp = val; break; } case 'd': { - int32_t *valp = va_arg(ap, int32_t *); - offset += pdu_unpack(valp, pdu, offset, sizeof(*valp)); + uint32_t val, *valp; + valp = va_arg(ap, uint32_t *); + val = le32_to_cpupu(valp); + offset += pdu_unpack(val, pdu, offset, sizeof(val)); + *valp = val; break; } case 'q': { - int64_t *valp = va_arg(ap, int64_t *); - offset += pdu_unpack(valp, pdu, offset, sizeof(*valp)); + uint64_t val, *valp; + valp = va_arg(ap, uint64_t *); + val = le64_to_cpup(valp); + offset += pdu_unpack(val, pdu, offset, sizeof(val)); + *valp = val; break; } case 'v': { @@ -498,22 +507,25 @@ static size_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) for (i = 0; fmt[i]; i++) { switch (fmt[i]) { case 'b': { - int8_t val = va_arg(ap, int); + uint8_t val = va_arg(ap, int); offset += pdu_pack(pdu, offset, val, sizeof(val)); break; } case 'w': { - int16_t val = va_arg(ap, int); + uint16_t val; + cpu_to_le16w(val, va_arg(ap, int)); offset += pdu_pack(pdu, offset, val, sizeof(val)); break; } case 'd': { - int32_t val = va_arg(ap, int); + uint32_t val; + cpu_to_le32w(val, va_arg(ap, uint32_t)); offset += pdu_pack(pdu, offset, val, sizeof(val)); break; } case 'q': { - int64_t val = va_arg(ap, int64_t); + uint64_t val; + cpu_to_le64w(val, va_arg(ap, uint64_t)); offset += pdu_pack(pdu, offset, val, sizeof(val)); break; } -- 1.7.0.2.273.gc2413
[Qemu-devel] [PATCH -v2 19/22] virtio-9p: Get the correct count values from the pdu
From: Venkateswararao Jujjuri jv...@linux.vnet.ibm.com PDU contain little endian format for integer values. So we need to make sure we map them to host format. Also the count value can be in another sg offset other than 0. Use the righ functions to get the count value Signed-off-by: Venkateswararao Jujjuri jv...@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p-debug.c | 29 +++-- 1 files changed, 19 insertions(+), 10 deletions(-) diff --git a/hw/virtio-9p-debug.c b/hw/virtio-9p-debug.c index ee222db..e8ede8e 100644 --- a/hw/virtio-9p-debug.c +++ b/hw/virtio-9p-debug.c @@ -181,20 +181,25 @@ static void pprint_stat(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) static void pprint_strs(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) { +int sg_count = get_sg_count(pdu, rx); struct iovec *sg = get_sg(pdu, rx); size_t offset = *offsetp; -int16_t count, i; +uint16_t tmp_count, count, i; +size_t copied = 0; fprintf(llogfile, %s={, name); -BUG_ON((offset + 2) sg[0].iov_len); -memcpy(count, sg[0].iov_base + offset, 2); -offset += 2; +/* Get the count */ +copied = do_pdu_unpack(tmp_count, sg, sg_count, offset, sizeof(tmp_count)); +BUG_ON(copied != sizeof(tmp_count)); +count = le16_to_cpupu(tmp_count); +offset += copied; for (i = 0; i count; i++) { char str[512]; -if (i) +if (i) { fprintf(llogfile, , ); +} snprintf(str, sizeof(str), [%d], i); pprint_str(pdu, rx, offset, str); } @@ -206,20 +211,24 @@ static void pprint_strs(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) static void pprint_qids(V9fsPDU *pdu, int rx, size_t *offsetp, const char *name) { +int sg_count = get_sg_count(pdu, rx); struct iovec *sg = get_sg(pdu, rx); size_t offset = *offsetp; -int16_t count, i; +uint16_t tmp_count, count, i; +size_t copied = 0; fprintf(llogfile, %s={, name); -BUG_ON((offset + 2) sg[0].iov_len); -memcpy(count, sg[0].iov_base + offset, 2); -offset += 2; +copied = do_pdu_unpack(tmp_count, sg, sg_count, offset, sizeof(tmp_count)); +BUG_ON(copied != sizeof(tmp_count)); +count = le16_to_cpupu(tmp_count); +offset += copied; for (i = 0; i count; i++) { char str[512]; -if (i) +if (i) { fprintf(llogfile, , ); +} snprintf(str, sizeof(str), [%d], i); pprint_qid(pdu, rx, offset, str); } -- 1.7.0.2.273.gc2413
[Qemu-devel] [PATCH -v2 20/22] virtio-9p: Remove BUG_ON and add proper error handling
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p.c | 106 1 files changed, 84 insertions(+), 22 deletions(-) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index 1237bac..3ce26ca 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -244,7 +244,6 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid) return NULL; f = qemu_mallocz(sizeof(V9fsFidState)); -BUG_ON(f == NULL); f-fid = fid; f-fd = -1; @@ -320,15 +319,18 @@ static void stat_to_qid(const struct stat *stbuf, V9fsQID *qidp) qidp-type |= P9_QID_TYPE_SYMLINK; } -static void fid_to_qid(V9fsState *s, V9fsFidState *fidp, V9fsQID *qidp) +static int fid_to_qid(V9fsState *s, V9fsFidState *fidp, V9fsQID *qidp) { struct stat stbuf; int err; err = posix_lstat(s, fidp-path, stbuf); -BUG_ON(err == -1); +if (err) { +return err; +} stat_to_qid(stbuf, qidp); +return 0; } static V9fsPDU *alloc_pdu(V9fsState *s) @@ -653,7 +655,7 @@ static uint32_t stat_to_v9mode(const struct stat *stbuf) return mode; } -static void stat_to_v9stat(V9fsState *s, V9fsString *name, +static int stat_to_v9stat(V9fsState *s, V9fsString *name, const struct stat *stbuf, V9fsStat *v9stat) { @@ -681,7 +683,10 @@ static void stat_to_v9stat(V9fsState *s, V9fsString *name, if (v9stat-mode P9_STAT_MODE_SYMLINK) { err = posix_readlink(s, name, v9stat-extension); -BUG_ON(err == -1); +if (err == -1) { +err = -errno; +return err; +} v9stat-extension.data[err] = 0; v9stat-extension.size = err; } else if (v9stat-mode P9_STAT_MODE_DEVICE) { @@ -708,6 +713,7 @@ static void stat_to_v9stat(V9fsState *s, V9fsString *name, v9fs_string_size(v9stat-gid) + v9fs_string_size(v9stat-muid) + v9fs_string_size(v9stat-extension); +return 0; } static void v9fs_version(V9fsState *s, V9fsPDU *pdu) @@ -745,7 +751,12 @@ static void v9fs_attach(V9fsState *s, V9fsPDU *pdu) fidp-uid = n_uname; v9fs_string_sprintf(fidp-path, %s, /); -fid_to_qid(s, fidp, qid); +err = fid_to_qid(s, fidp, qid); +if (err) { +err = -EINVAL; +free_fid(s, fid); +goto out; +} offset += pdu_marshal(pdu, offset, Q, qid); @@ -772,7 +783,10 @@ static void v9fs_stat_post_lstat(V9fsState *s, V9fsStatState *vs, int err) goto out; } -stat_to_v9stat(s, vs-fidp-path, vs-stbuf, vs-v9stat); +err = stat_to_v9stat(s, vs-fidp-path, vs-stbuf, vs-v9stat); +if (err) { +goto out; +} vs-offset += pdu_marshal(vs-pdu, vs-offset, wS, 0, vs-v9stat); err = vs-offset; @@ -925,10 +939,8 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu) if(vs-nwnames) { vs-wnames = qemu_mallocz(sizeof(vs-wnames[0]) * vs-nwnames); -BUG_ON(vs-wnames == NULL); vs-qids = qemu_mallocz(sizeof(vs-qids[0]) * vs-nwnames); -BUG_ON(vs-qids == NULL); for (i = 0; i vs-nwnames; i++) { vs-offset += pdu_unmarshal(vs-pdu, vs-offset, s, @@ -1070,7 +1082,10 @@ out: static void v9fs_open_post_lstat(V9fsState *s, V9fsOpenState *vs, int err) { -BUG_ON(err == -1); +if (err) { +err = -errno; +goto out; +} stat_to_qid(vs-stbuf, vs-qid); @@ -1082,7 +1097,10 @@ static void v9fs_open_post_lstat(V9fsState *s, V9fsOpenState *vs, int err) omode_to_uflags(vs-mode)); v9fs_open_post_open(s, vs, err); } - +return; +out: +complete_pdu(s, vs-pdu, err); +qemu_free(vs); } static void v9fs_open(V9fsState *s, V9fsPDU *pdu) @@ -1186,11 +1204,15 @@ static void v9fs_read_post_readdir(V9fsState *, V9fsReadState *, ssize_t ); static void v9fs_read_post_seekdir(V9fsState *s, V9fsReadState *vs, ssize_t err) { +if (err) { +goto out; +} v9fs_stat_free(vs-v9stat); v9fs_string_free(vs-name); vs-offset += pdu_marshal(vs-pdu, vs-offset, d, vs-count); vs-offset += vs-count; err = vs-offset; +out: complete_pdu(s, vs-pdu, err); qemu_free(vs); return; @@ -1199,8 +1221,14 @@ static void v9fs_read_post_seekdir(V9fsState *s, V9fsReadState *vs, ssize_t err) static void v9fs_read_post_dir_lstat(V9fsState *s, V9fsReadState *vs, ssize_t err) { -BUG_ON(err == -1); -stat_to_v9stat(s, vs-name, vs-stbuf, vs-v9stat); +if (err) { + err = -errno; + goto out; +} +err = stat_to_v9stat(s, vs-name, vs-stbuf, vs-v9stat); +if (err) { +goto out; +} vs-len = pdu_marshal(vs-pdu, vs-offset + 4 + vs-count, S, vs-v9stat); @@ -1217,6 +1245,11 @@ static void
[Qemu-devel] [PATCH -v2 13/22] virtio-9p: Implement P9_TFLUSH
From: Anthony Liguori aligu...@us.ibm.com Don't do anything special for flush Signed-off-by: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- hw/virtio-9p.c |5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/virtio-9p.c b/hw/virtio-9p.c index 4478e57..1dbb982 100644 --- a/hw/virtio-9p.c +++ b/hw/virtio-9p.c @@ -1717,10 +1717,11 @@ out: static void v9fs_flush(V9fsState *s, V9fsPDU *pdu) { -if (debug_9p_pdu) -pprint_pdu(pdu); +/* A nop call with no return */ +complete_pdu(s, pdu, 7); } + typedef struct V9fsRemoveState { V9fsPDU *pdu; size_t offset; -- 1.7.0.2.273.gc2413
[Qemu-devel] Re: KVM call agenda for Mar 16
Chris Wright chr...@redhat.com wrote: Please send in any agenda items you are interested in covering. Migration: - flexible migration: I hope to sent an RFC patch on time for the call. idea is to use subsections. - callbacks. block migration introduced several callbacks: * cancel() * get_status() * release() in spice we need now another to callbacks: on_start() and on_end(). * on_start(): tells spice that migration has started (it will then manage certificates, passwords, ... itself) * on_end(): it is called when migration ends. spice use it to transparently connect to the new host and user don't have to reconnect - what to do on migration error: - target side: libvirt folks want the program to print a message if it fails. Current code spent 100% cpu time doing select on a closed fd. (patches already on the list to make it wait without using cpu). - source side: current behaviour if migration fails is to stop the vm. We have requests to make it continue (remember that this is live migration). what to do? adding a paramenter like the block layer: migration_error=[stop|continue] any better ideas. - block migration: it added the set_params() callback, investigating why. I think that it can be done with inside block_save_live(), but I haven't investigated it fully yet. Later, Juan. thanks, -chris
Re: [Qemu-devel] [PATCH] block: add logical_block_size property
ping? On Thu, Mar 04, 2010 at 02:20:17PM +0100, Christoph Hellwig wrote: Add a logical block size attribute as various guest side tools only increase the filesystem sector size based on it, not the advisory physical block size. For scsi we already have support for a different logical block size in place for CDROMs that we can built upon. Only my recent block device characteristics VPD page needs some fixups. Note that we leave the logial block size for CDROMs hardcoded as the 2k value is expected for it in general. For virtio-blk we already have a feature flag claiming to support a variable logical block size that was added for the s390 kuli hypervisor. Interestingly it does not actually change the units in which the protocol works, which is still fixed at 512 bytes, but only communicates a different minimum I/O granularity. So all we need to do in virtio is to add a trap for unaligned I/O and round down the device size to the next multiple of the logical block size. IDE does not support any other logical block size than 512 bytes. Signed-off-by: Christoph Hellwig h...@lst.de Index: qemu/block_int.h === --- qemu.orig/block_int.h 2010-03-03 19:16:13.408253228 +0100 +++ qemu/block_int.h 2010-03-03 19:16:43.030003751 +0100 @@ -209,6 +209,7 @@ struct DriveInfo; typedef struct BlockConf { struct DriveInfo *dinfo; uint16_t physical_block_size; +uint16_t logical_block_size; uint16_t min_io_size; uint32_t opt_io_size; } BlockConf; @@ -226,6 +227,8 @@ static inline unsigned int get_physical_ #define DEFINE_BLOCK_PROPERTIES(_state, _conf) \ DEFINE_PROP_DRIVE(drive, _state, _conf.dinfo),\ +DEFINE_PROP_UINT16(logical_block_size, _state,\ + _conf.logical_block_size, 512), \ DEFINE_PROP_UINT16(physical_block_size, _state, \ _conf.physical_block_size, 512), \ DEFINE_PROP_UINT16(min_io_size, _state, _conf.min_io_size, 512), \ Index: qemu/hw/scsi-disk.c === --- qemu.orig/hw/scsi-disk.c 2010-03-03 19:16:13.419254346 +0100 +++ qemu/hw/scsi-disk.c 2010-03-03 19:16:43.031004240 +0100 @@ -397,8 +397,10 @@ static int scsi_disk_emulate_inquiry(SCS } case 0xb0: /* block device characteristics */ { -unsigned int min_io_size = s-qdev.conf.min_io_size 9; -unsigned int opt_io_size = s-qdev.conf.opt_io_size 9; +unsigned int min_io_size = +s-qdev.conf.min_io_size / s-qdev.blocksize; +unsigned int opt_io_size = +s-qdev.conf.opt_io_size / s-qdev.blocksize; /* required VPD size with unmap support */ outbuf[3] = buflen = 0x3c; @@ -1028,11 +1030,12 @@ static int scsi_disk_initfn(SCSIDevice * s-bs = s-qdev.conf.dinfo-bdrv; if (bdrv_get_type_hint(s-bs) == BDRV_TYPE_CDROM) { -s-cluster_size = 4; +s-qdev.blocksize = 2048; } else { -s-cluster_size = 1; +s-qdev.blocksize = s-qdev.conf.logical_block_size; } -s-qdev.blocksize = 512 * s-cluster_size; +s-cluster_size = s-qdev.blocksize / 512; + s-qdev.type = TYPE_DISK; bdrv_get_geometry(s-bs, nb_sectors); nb_sectors /= s-cluster_size; Index: qemu/hw/virtio-blk.c === --- qemu.orig/hw/virtio-blk.c 2010-03-03 19:16:13.426273971 +0100 +++ qemu/hw/virtio-blk.c 2010-03-03 19:35:16.636028605 +0100 @@ -27,6 +27,7 @@ typedef struct VirtIOBlock void *rq; QEMUBH *bh; BlockConf *conf; +unsigned short sector_mask; } VirtIOBlock; static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev) @@ -250,6 +251,11 @@ static void virtio_blk_handle_flush(Virt static void virtio_blk_handle_write(BlockRequest *blkreq, int *num_writes, VirtIOBlockReq *req, BlockDriverState **old_bs) { +if (req-out-sector req-dev-sector_mask) { +virtio_blk_rw_complete(req, -EIO); +return; +} + if (req-dev-bs != *old_bs || *num_writes == 32) { if (*old_bs != NULL) { do_multiwrite(*old_bs, blkreq, *num_writes); @@ -272,6 +278,11 @@ static void virtio_blk_handle_read(VirtI { BlockDriverAIOCB *acb; +if (req-out-sector req-dev-sector_mask) { +virtio_blk_rw_complete(req, -EIO); +return; +} + acb = bdrv_aio_readv(req-dev-bs, req-out-sector, req-qiov, req-qiov.size / 512, virtio_blk_rw_complete, req); if (!acb) { @@ -404,12 +415,13 @@ static void virtio_blk_update_config(Vir stl_raw(blkcfg.seg_max, 128 - 2); stw_raw(blkcfg.cylinders, cylinders);
[Qemu-devel] Re: KVM call agenda for Mar 16
On Tue, Mar 16, 2010 at 10:18:03AM +0100, Juan Quintela wrote: Chris Wright chr...@redhat.com wrote: Please send in any agenda items you are interested in covering. Migration: - flexible migration: I hope to sent an RFC patch on time for the call. idea is to use subsections. - callbacks. block migration introduced several callbacks: * cancel() * get_status() * release() in spice we need now another to callbacks: on_start() and on_end(). * on_start(): tells spice that migration has started (it will then manage certificates, passwords, ... itself) * on_end(): it is called when migration ends. spice use it to transparently connect to the new host and user don't have to reconnect - what to do on migration error: - target side: libvirt folks want the program to print a message if it fails. Current code spent 100% cpu time doing select on a closed fd. (patches already on the list to make it wait without using cpu). No, that is not correct. We want QEMU to exit when incoming migration fails. Printing to stderr is just something that will end up in the logs for admin to further diagnose the problem if required. There is nothing to be gained by leaving QEMU running, and everything to loose since the failed migration may have left it in a dangerous state from which you do not want to attempt incoming migration again. If we really want to leave it running when migration fails, then we're going to have to add yet another QMP event to inform libvirt when migration has finished/failed, and/or make 'query_migrate' work on the destination too. - source side: current behaviour if migration fails is to stop the vm. We have requests to make it continue (remember that this is live migration). what to do? adding a paramenter like the block layer: migration_error=[stop|continue] any better ideas. A parameter to the 'migrate' monitor command would be the logical place if we needed this configurable. Incidentally I have a feeling we might need to introduce a migration event in QMP. Currently libvirt polls on the 'query_migrate' command to get the ongoing migration status. This means there can be a delay in detecting completion as long as the polling interval - for this reason we just dropped libvirt's polling time from 1/2 sec to 50ms to ensure prompt detection. Daniel -- |: Red Hat, Engineering, London-o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org-o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
[Qemu-devel] Re: [PATCH 1/7] Add support for generic notifier lists (v2)
On 03/15/2010 10:34 PM, Anthony Liguori wrote: Notifiers are data-less callbacks and a notifier list is a list of registered notifiers that all are interested in a particular event. We'll use this in a few patches to implement mouse change notification. Looks nicer lighter! -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: KVM call agenda for Mar 16
On 03/16/2010 11:29 AM, Daniel P. Berrange wrote: On Tue, Mar 16, 2010 at 10:18:03AM +0100, Juan Quintela wrote: Chris Wrightchr...@redhat.com wrote: Please send in any agenda items you are interested in covering. Migration: - flexible migration: I hope to sent an RFC patch on time for the call. idea is to use subsections. - callbacks. block migration introduced several callbacks: * cancel() * get_status() * release() in spice we need now another to callbacks: on_start() and on_end(). * on_start(): tells spice that migration has started (it will then manage certificates, passwords, ... itself) * on_end(): it is called when migration ends. spice use it to transparently connect to the new host and user don't have to reconnect - what to do on migration error: - target side: libvirt folks want the program to print a message if it fails. Current code spent 100% cpu time doing select on a closed fd. (patches already on the list to make it wait without using cpu). No, that is not correct. We want QEMU to exit when incoming migration fails. Printing to stderr is just something that will end up in the logs for admin to further diagnose the problem if required. There is nothing to be gained by leaving QEMU running, and everything to loose since the failed migration may have left it in a dangerous state from which you do not want to attempt incoming migration again. If we really want to leave it running when migration fails, then we're going to have to add yet another QMP event to inform libvirt when migration has finished/failed, and/or make 'query_migrate' work on the destination too. A qmp event seems the logical thing to do? Exiting can happen for many reasons, a qmp event is unambiguous. - source side: current behaviour if migration fails is to stop the vm. We have requests to make it continue (remember that this is live migration). what to do? adding a paramenter like the block layer: migration_error=[stop|continue] any better ideas. A parameter to the 'migrate' monitor command would be the logical place if we needed this configurable. Incidentally I have a feeling we might need to introduce a migration event in QMP. Currently libvirt polls on the 'query_migrate' command to get the ongoing migration status. This means there can be a delay in detecting completion as long as the polling interval - for this reason we just dropped libvirt's polling time from 1/2 sec to 50ms to ensure prompt detection. Whenever you implement a polling loop, can you send an event to qemu-de...@? Polling loops are an indication that something is wrong. -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: KVM call agenda for Mar 16
On Tue, Mar 16, 2010 at 09:29:44AM +, Daniel P. Berrange wrote: On Tue, Mar 16, 2010 at 10:18:03AM +0100, Juan Quintela wrote: Chris Wright chr...@redhat.com wrote: Please send in any agenda items you are interested in covering. Migration: - flexible migration: I hope to sent an RFC patch on time for the call. idea is to use subsections. - callbacks. block migration introduced several callbacks: * cancel() * get_status() * release() in spice we need now another to callbacks: on_start() and on_end(). * on_start(): tells spice that migration has started (it will then manage certificates, passwords, ... itself) * on_end(): it is called when migration ends. spice use it to transparently connect to the new host and user don't have to reconnect - what to do on migration error: - target side: libvirt folks want the program to print a message if it fails. Current code spent 100% cpu time doing select on a closed fd. (patches already on the list to make it wait without using cpu). No, that is not correct. We want QEMU to exit when incoming migration fails. Printing to stderr is just something that will end up in the logs for admin to further diagnose the problem if required. There is nothing to be gained by leaving QEMU running, and everything to loose since the failed migration may have left it in a dangerous state from which you do not want to attempt incoming migration again. Sorry, I forgot to include the original BZ report about this problem from Fedora. In essence, we just truncated the original save state image and then tried to restore from it to check handling in the event of corrupted save image. https://bugzilla.redhat.com/show_bug.cgi?id=518032 Regards, Daniel -- |: Red Hat, Engineering, London-o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org-o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
[Qemu-devel] Re: KVM call agenda for Mar 16
On Tue, Mar 16, 2010 at 12:38:02PM +0200, Avi Kivity wrote: On 03/16/2010 12:31 PM, Daniel P. Berrange wrote: Polling loops are an indication that something is wrong. Except when people suggest they are the right answer, qcow high watermark ;-P I liked Anthony's suggestion of an lvm2 block format driver. No polling. Doesn't that require giving QEMU privileges to perform LVM operations which implies QEMU having CAP_SYS_ADMIN ? Daniel -- |: Red Hat, Engineering, London-o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org-o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
[Qemu-devel] Re: KVM call agenda for Mar 16
On Tue, Mar 16, 2010 at 12:38:02PM +0200, Avi Kivity wrote: On 03/16/2010 12:31 PM, Daniel P. Berrange wrote: Polling loops are an indication that something is wrong. Except when people suggest they are the right answer, qcow high watermark ;-P I liked Anthony's suggestion of an lvm2 block format driver. No polling. I have done some work on linking the new lvm library to qemu to control snapshotting. But introducing a whole new block format seems like a lot of duplication to me.
[Qemu-devel] Re: KVM call agenda for Mar 16
On 03/16/2010 12:31 PM, Daniel P. Berrange wrote: Polling loops are an indication that something is wrong. Except when people suggest they are the right answer, qcow high watermark ;-P I liked Anthony's suggestion of an lvm2 block format driver. No polling. -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: KVM call agenda for Mar 16
On Tue, Mar 16, 2010 at 11:43:48AM +0200, Avi Kivity wrote: On 03/16/2010 11:29 AM, Daniel P. Berrange wrote: On Tue, Mar 16, 2010 at 10:18:03AM +0100, Juan Quintela wrote: Chris Wrightchr...@redhat.com wrote: Please send in any agenda items you are interested in covering. Migration: - flexible migration: I hope to sent an RFC patch on time for the call. idea is to use subsections. - callbacks. block migration introduced several callbacks: * cancel() * get_status() * release() in spice we need now another to callbacks: on_start() and on_end(). * on_start(): tells spice that migration has started (it will then manage certificates, passwords, ... itself) * on_end(): it is called when migration ends. spice use it to transparently connect to the new host and user don't have to reconnect - what to do on migration error: - target side: libvirt folks want the program to print a message if it fails. Current code spent 100% cpu time doing select on a closed fd. (patches already on the list to make it wait without using cpu). No, that is not correct. We want QEMU to exit when incoming migration fails. Printing to stderr is just something that will end up in the logs for admin to further diagnose the problem if required. There is nothing to be gained by leaving QEMU running, and everything to loose since the failed migration may have left it in a dangerous state from which you do not want to attempt incoming migration again. If we really want to leave it running when migration fails, then we're going to have to add yet another QMP event to inform libvirt when migration has finished/failed, and/or make 'query_migrate' work on the destination too. A qmp event seems the logical thing to do? Exiting can happen for many reasons, a qmp event is unambiguous. Yes, for the QEMU upstream adding an event is more flexible. I had originally suggested exiting in the context of the Fedora bug report which was for QEMU 0.10.x which has no events capability. Incidentally I have a feeling we might need to introduce a migration event in QMP. Currently libvirt polls on the 'query_migrate' command to get the ongoing migration status. This means there can be a delay in detecting completion as long as the polling interval - for this reason we just dropped libvirt's polling time from 1/2 sec to 50ms to ensure prompt detection. Whenever you implement a polling loop, can you send an event to qemu-de...@? Yep, sure thing. This is the only polling loop that isn't related to I/O stats collection. Polling loops are an indication that something is wrong. Except when people suggest they are the right answer, qcow high watermark ;-P Regards, Daniel -- |: Red Hat, Engineering, London-o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org-o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
[Qemu-devel] [PATCH 0/6] qemu-kvm: Introduce bit-based phys_ram_dirty, and bit-based dirty page checker.
The dirty and non-dirty pages are checked one by one in vl.c. When the most of the memory is not dirty, checking the dirty and non-dirty pages by multiple page size should be much faster than checking them one by one. We introduced bit-based phys_ram_dirty for VGA, CODE and MIGRATION, and cpu_physical_memory_get_dirty_range() for this purpose. This patch is based on the following discussion. http://www.mail-archive.com/k...@vger.kernel.org/msg28733.html To prove our prospect, we have evaluated effect of this patch. We compared runtime of ram_save_remaining with original ram_save_remaining() and ram_save_remaining() using functions of this patch. Test Environment: CPU: 4x Intel Xeon Quad Core 2.66GHz Mem size: 96GB kvm version: 2.6.33 qemu-kvm version: commit 2b644fd0e737407133c88054ba498e772ce01f27 Host OS: CentOS (kernel 2.6.33) Guest OS: Debian/GNU Linux lenny (kernel 2.6.26) Guest Mem size: 512MB Conditions of experiments are as follows: Cond1: Guest OS periodically makes the 256MB continuous dirty pages. Cond2: Guest OS periodically makes the 256MB dirty pages and non-dirty pages in turn. Cond3: Guest OS read 1GB file, which is bigger than memory. Cond4: Guest OS write 1GB file, which is bigger than memory. Experimental results: Cond1: 1.9 ~ 61 times speed up Cond2: 1.9 ~ 56 times speed up Cond3: 1.9 ~ 59 times speed up Cond4: 1.7 ~ 59 times speed up
[Qemu-devel] [PATCH 5/6] qemu-kvm: Use cpu_physical_memory_set_dirty_range() to update phys_ram_dirty.
Modifies kvm_get_dirty_pages_log_range to use cpu_physical_memory_set_dirty_range() to update the row of the bit-based phys_ram_dirty bitmap at once. Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp --- qemu-kvm.c | 19 ++- 1 files changed, 6 insertions(+), 13 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index e417f21..75fa9b0 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -2305,9 +2305,8 @@ static int kvm_get_dirty_pages_log_range(unsigned long start_addr, unsigned long offset, unsigned long mem_size) { -unsigned int i, j; -unsigned long page_number, addr, addr1, c; -ram_addr_t ram_addr; +unsigned int i; +unsigned long page_number, addr, addr1; unsigned int len = ((mem_size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) / HOST_LONG_BITS; @@ -2317,16 +2316,10 @@ static int kvm_get_dirty_pages_log_range(unsigned long start_addr, */ for (i = 0; i len; i++) { if (bitmap[i] != 0) { -c = leul_to_cpu(bitmap[i]); -do { -j = ffsl(c) - 1; -c = ~(1ul j); -page_number = i * HOST_LONG_BITS + j; -addr1 = page_number * TARGET_PAGE_SIZE; -addr = offset + addr1; -ram_addr = cpu_get_physical_page_desc(addr); -cpu_physical_memory_set_dirty(ram_addr); -} while (c != 0); +page_number = i * HOST_LONG_BITS; +addr1 = page_number * TARGET_PAGE_SIZE; +addr = offset + addr1; +cpu_physical_memory_set_dirty_range(addr, leul_to_cpu(bitmap[i])); } } return 0; -- 1.7.0.31.g1df487
[Qemu-devel] [PATCH 6/6] qemu-kvm: Use cpu_physical_memory_get_dirty_range() to check multiple dirty pages.
Modifies ram_save_block() and ram_save_remaining() to use cpu_physical_memory_get_dirty_range() to check multiple dirty and non-dirty pages at once. Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp --- vl.c | 55 +++ 1 files changed, 35 insertions(+), 20 deletions(-) diff --git a/vl.c b/vl.c index 6e35cc6..e9ad7c9 100644 --- a/vl.c +++ b/vl.c @@ -2779,7 +2779,8 @@ static int ram_save_block(QEMUFile *f) static ram_addr_t current_addr = 0; ram_addr_t saved_addr = current_addr; ram_addr_t addr = 0; -int found = 0; +ram_addr_t dirty_rams[HOST_LONG_BITS]; +int i, found = 0; while (addr last_ram_offset) { if (kvm_enabled() current_addr == 0) { @@ -2791,28 +2792,35 @@ static int ram_save_block(QEMUFile *f) return 0; } } -if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) { +if ((found = cpu_physical_memory_get_dirty_range( + current_addr, last_ram_offset, dirty_rams, HOST_LONG_BITS, + MIGRATION_DIRTY_FLAG))) { uint8_t *p; -cpu_physical_memory_reset_dirty(current_addr, -current_addr + TARGET_PAGE_SIZE, -MIGRATION_DIRTY_FLAG); +for (i = 0; i found; i++) { +ram_addr_t page_addr = dirty_rams[i]; +cpu_physical_memory_reset_dirty(page_addr, +page_addr + TARGET_PAGE_SIZE, +MIGRATION_DIRTY_FLAG); -p = qemu_get_ram_ptr(current_addr); +p = qemu_get_ram_ptr(page_addr); -if (is_dup_page(p, *p)) { -qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS); -qemu_put_byte(f, *p); -} else { -qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE); -qemu_put_buffer(f, p, TARGET_PAGE_SIZE); +if (is_dup_page(p, *p)) { +qemu_put_be64(f, (page_addr) | + RAM_SAVE_FLAG_COMPRESS); +qemu_put_byte(f, *p); +} else { +qemu_put_be64(f, (page_addr) | + RAM_SAVE_FLAG_PAGE); +qemu_put_buffer(f, p, TARGET_PAGE_SIZE); +} } - -found = 1; + break; +} else { +addr += dirty_rams[0]; +current_addr = (saved_addr + addr) % last_ram_offset; } -addr += TARGET_PAGE_SIZE; -current_addr = (saved_addr + addr) % last_ram_offset; } return found; @@ -2822,12 +2830,19 @@ static uint64_t bytes_transferred; static ram_addr_t ram_save_remaining(void) { -ram_addr_t addr; +ram_addr_t addr = 0; ram_addr_t count = 0; +ram_addr_t dirty_rams[HOST_LONG_BITS]; +int found = 0; -for (addr = 0; addr last_ram_offset; addr += TARGET_PAGE_SIZE) { -if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG)) -count++; +while (addr last_ram_offset) { +if ((found = cpu_physical_memory_get_dirty_range(addr, last_ram_offset, +dirty_rams, HOST_LONG_BITS, MIGRATION_DIRTY_FLAG))) { +count += found; +addr = dirty_rams[found - 1] + TARGET_PAGE_SIZE; +} else { +addr += dirty_rams[0]; +} } return count; -- 1.7.0.31.g1df487
[Qemu-devel] [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.
Modifies wrapper functions for byte-based phys_ram_dirty bitmap to bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent direct access to the phys_ram_dirty bitmap. Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp --- cpu-all.h | 94 ++-- 1 files changed, 90 insertions(+), 4 deletions(-) diff --git a/cpu-all.h b/cpu-all.h index 9bc01b9..91ec3e5 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -843,7 +843,9 @@ int cpu_str_to_log_mask(const char *str); /* memory API */ extern int phys_ram_fd; -extern uint8_t *phys_ram_dirty; +extern unsigned long *phys_ram_vga_dirty; +extern unsigned long *phys_ram_code_dirty; +extern unsigned long *phys_ram_migration_dirty; extern ram_addr_t ram_size; extern ram_addr_t last_ram_offset; extern uint8_t *bios_mem; @@ -879,20 +881,104 @@ int cpu_memory_rw_debug(CPUState *env, target_ulong addr, /* read dirty bit (return 0 or 1) */ static inline int cpu_physical_memory_is_dirty(ram_addr_t addr) { -return phys_ram_dirty[addr TARGET_PAGE_BITS] == 0xff; +unsigned long mask; +int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; +int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + +mask = 1UL offset; +return (phys_ram_vga_dirty[index] +phys_ram_code_dirty[index] +phys_ram_migration_dirty[index] mask) == mask; +} + +static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) +{ +unsigned long mask; +int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; +int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); +int ret = 0; + +mask = 1UL offset; +if (phys_ram_vga_dirty[index] mask) +ret |= VGA_DIRTY_FLAG; +if (phys_ram_code_dirty[index] mask) +ret |= CODE_DIRTY_FLAG; +if (phys_ram_migration_dirty[index] mask) +ret |= MIGRATION_DIRTY_FLAG; + +return ret; } static inline int cpu_physical_memory_get_dirty(ram_addr_t addr, int dirty_flags) { -return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags; +return cpu_physical_memory_get_dirty_flags(addr) dirty_flags; } static inline void cpu_physical_memory_set_dirty(ram_addr_t addr) { -phys_ram_dirty[addr TARGET_PAGE_BITS] = 0xff; +unsigned long mask; +int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; +int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + +mask = 1UL offset; +phys_ram_vga_dirty[index] |= mask; +phys_ram_code_dirty[index] |= mask; +phys_ram_migration_dirty[index] |= mask; +} + +static inline void cpu_physical_memory_set_dirty_range(ram_addr_t addr, + unsigned long mask) +{ +int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; + +phys_ram_vga_dirty[index] |= mask; +phys_ram_code_dirty[index] |= mask; +phys_ram_migration_dirty[index] |= mask; } +static inline void cpu_physical_memory_set_dirty_flags(ram_addr_t addr, + int dirty_flags) +{ +unsigned long mask; +int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; +int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + +mask = 1UL offset; +if (dirty_flags VGA_DIRTY_FLAG) +phys_ram_vga_dirty[index] |= mask; +if (dirty_flags CODE_DIRTY_FLAG) +phys_ram_code_dirty[index] |= mask; +if (dirty_flags MIGRATION_DIRTY_FLAG) +phys_ram_migration_dirty[index] |= mask; +} + +static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start, +int length, +int dirty_flags) +{ +ram_addr_t addr = start; +unsigned long mask; +int index, offset, i; + +for (i = 0; i length; i += TARGET_PAGE_SIZE) { +index = ((addr + i) TARGET_PAGE_BITS) / HOST_LONG_BITS; +offset = ((addr + i) TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); +mask = ~(1UL offset); + +if (dirty_flags VGA_DIRTY_FLAG) +phys_ram_vga_dirty[index] = mask; +if (dirty_flags CODE_DIRTY_FLAG) +phys_ram_code_dirty[index] = mask; +if (dirty_flags MIGRATION_DIRTY_FLAG) +phys_ram_migration_dirty[index] = mask; + } +} + +int cpu_physical_memory_get_dirty_range(ram_addr_t start, ram_addr_t end, +ram_addr_t *dirty_rams, int length, +int dirty_flags); + void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, int dirty_flags); void cpu_tlb_update_dirty(CPUState *env); -- 1.7.0.31.g1df487
[Qemu-devel] [PATCH 3/6] qemu-kvm: Replace direct phys_ram_dirty access with wrapper functions.
Replaces direct phys_ram_dirty access with wrapper functions to prevent direct access to the phys_ram_dirty bitmap. Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp --- exec.c | 45 - 1 files changed, 20 insertions(+), 25 deletions(-) diff --git a/exec.c b/exec.c index ba334e7..b31c349 100644 --- a/exec.c +++ b/exec.c @@ -1946,7 +1946,7 @@ static void tlb_protect_code(ram_addr_t ram_addr) static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr, target_ulong vaddr) { -phys_ram_dirty[ram_addr TARGET_PAGE_BITS] |= CODE_DIRTY_FLAG; +cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG); } static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, @@ -1967,8 +1967,7 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, { CPUState *env; unsigned long length, start1; -int i, mask, len; -uint8_t *p; +int i; start = TARGET_PAGE_MASK; end = TARGET_PAGE_ALIGN(end); @@ -1976,11 +1975,7 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, length = end - start; if (length == 0) return; -len = length TARGET_PAGE_BITS; -mask = ~dirty_flags; -p = phys_ram_dirty + (start TARGET_PAGE_BITS); -for(i = 0; i len; i++) -p[i] = mask; +cpu_physical_memory_mask_dirty_range(start, length, dirty_flags); /* we modify the TLB cache so that the dirty bit will be set again when accessing the range */ @@ -2837,16 +2832,16 @@ static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr, uint32_t val) { int dirty_flags; -dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; +dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); if (!(dirty_flags CODE_DIRTY_FLAG)) { #if !defined(CONFIG_USER_ONLY) tb_invalidate_phys_page_fast(ram_addr, 1); -dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; +dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); #endif } stb_p(qemu_get_ram_ptr(ram_addr), val); dirty_flags |= (0xff ~CODE_DIRTY_FLAG); -phys_ram_dirty[ram_addr TARGET_PAGE_BITS] = dirty_flags; +cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags); /* we remove the notdirty callback only if the code has been flushed */ if (dirty_flags == 0xff) @@ -2857,16 +2852,16 @@ static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr, uint32_t val) { int dirty_flags; -dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; +dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); if (!(dirty_flags CODE_DIRTY_FLAG)) { #if !defined(CONFIG_USER_ONLY) tb_invalidate_phys_page_fast(ram_addr, 2); -dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; +dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); #endif } stw_p(qemu_get_ram_ptr(ram_addr), val); dirty_flags |= (0xff ~CODE_DIRTY_FLAG); -phys_ram_dirty[ram_addr TARGET_PAGE_BITS] = dirty_flags; +cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags); /* we remove the notdirty callback only if the code has been flushed */ if (dirty_flags == 0xff) @@ -2877,16 +2872,16 @@ static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr, uint32_t val) { int dirty_flags; -dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; +dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); if (!(dirty_flags CODE_DIRTY_FLAG)) { #if !defined(CONFIG_USER_ONLY) tb_invalidate_phys_page_fast(ram_addr, 4); -dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; +dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr); #endif } stl_p(qemu_get_ram_ptr(ram_addr), val); dirty_flags |= (0xff ~CODE_DIRTY_FLAG); -phys_ram_dirty[ram_addr TARGET_PAGE_BITS] = dirty_flags; +cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags); /* we remove the notdirty callback only if the code has been flushed */ if (dirty_flags == 0xff) @@ -3337,8 +3332,8 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, /* invalidate code */ tb_invalidate_phys_page_range(addr1, addr1 + l, 0); /* set dirty bit */ -phys_ram_dirty[addr1 TARGET_PAGE_BITS] |= -(0xff ~CODE_DIRTY_FLAG); +cpu_physical_memory_set_dirty_flags( +addr1, (0xff ~CODE_DIRTY_FLAG)); } /* qemu doesn't execute guest code directly, but kvm does therefore flush instruction caches
[Qemu-devel] [PATCH 4/6] qemu-kvm: Introduce cpu_physical_memory_get_dirty_range().
Introduces cpu_physical_memory_get_dirty_range(). It checks the first row and puts dirty addr in the array. If the first row is empty, it skips to the first non-dirty row or the end addr, and put the length in the first entry of the array. Signed-off-by: Yoshiaki Tamura tamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Kei ohmura@lab.ntt.co.jp --- exec.c | 73 1 files changed, 73 insertions(+), 0 deletions(-) diff --git a/exec.c b/exec.c index b31c349..87056a6 100644 --- a/exec.c +++ b/exec.c @@ -1961,6 +1961,79 @@ static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, } } +/* It checks the first row and puts dirty addrs in the array. + If the first row is empty, it skips to the first non-dirty row + or the end addr, and put the length in the first entry of the array. */ +int cpu_physical_memory_get_dirty_range(ram_addr_t start, ram_addr_t end, +ram_addr_t *dirty_rams, int length, +int dirty_flag) +{ +unsigned long phys_ram_dirty, page_number, *p; +ram_addr_t addr; +int s_idx = (start TARGET_PAGE_BITS) / HOST_LONG_BITS; +int e_idx = (end TARGET_PAGE_BITS) / HOST_LONG_BITS; +int i, j, offset; + +switch (dirty_flag) { +case VGA_DIRTY_FLAG: +p = phys_ram_vga_dirty; +break; +case CODE_DIRTY_FLAG: +p = phys_ram_code_dirty; +break; +case MIGRATION_DIRTY_FLAG: +p = phys_ram_migration_dirty; +break; +default: +abort(); +} + +/* mask bits before the start addr */ +offset = (start TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); +phys_ram_dirty = p[s_idx] ~((1UL offset) - 1); + +if (s_idx == e_idx) { +/* mask bits after the end addr */ +offset = (end TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); +phys_ram_dirty = (1UL offset) - 1; +} + +if (phys_ram_dirty == 0) { +/* when the row is empty */ +ram_addr_t skip; +if (s_idx == e_idx) +skip = end; +else { +/* skip empty rows */ +while (s_idx e_idx p[++s_idx] == 0); +skip = (s_idx * HOST_LONG_BITS * TARGET_PAGE_SIZE); +} +dirty_rams[0] = skip - start; +i = 0; + +} else if (phys_ram_dirty == ~0UL) { +/* when the row is fully dirtied */ +addr = start; +for (i = 0; i length; i++) { +dirty_rams[i] = addr; +addr += TARGET_PAGE_SIZE; +} +} else { +/* when the row is partially dirtied */ +i = 0; +do { +j = ffsl(phys_ram_dirty) - 1; +phys_ram_dirty = ~(1UL j); +page_number = s_idx * HOST_LONG_BITS + j; +addr = page_number * TARGET_PAGE_SIZE; +dirty_rams[i] = addr; +i++; +} while (phys_ram_dirty != 0 i length); +} + +return i; +} + /* Note: start and end must be within the same ram block. */ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, int dirty_flags) -- 1.7.0.31.g1df487
[Qemu-devel] Re: KVM call agenda for Mar 16
On 03/16/2010 12:45 PM, Daniel P. Berrange wrote: On Tue, Mar 16, 2010 at 12:38:02PM +0200, Avi Kivity wrote: On 03/16/2010 12:31 PM, Daniel P. Berrange wrote: Polling loops are an indication that something is wrong. Except when people suggest they are the right answer, qcow high watermark ;-P I liked Anthony's suggestion of an lvm2 block format driver. No polling. Doesn't that require giving QEMU privileges to perform LVM operations which implies QEMU having CAP_SYS_ADMIN ? Ouch. I expect fd permissions on the volume are insufficient, and fd permissions on the group are excessive. -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: [PATCH 1/6] qemu-kvm: Introduce bit-based phys_ram_dirty for VGA, CODE and MIGRATION.
On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Replaces byte-based phys_ram_dirty bitmap with three bit-based phys_ram_dirty bitmap. On allocation, it sets all bits in the bitmap. Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp --- exec.c | 22 +- 1 files changed, 17 insertions(+), 5 deletions(-) diff --git a/exec.c b/exec.c index 9bcb4de..ba334e7 100644 --- a/exec.c +++ b/exec.c @@ -119,7 +119,9 @@ uint8_t *code_gen_ptr; #if !defined(CONFIG_USER_ONLY) int phys_ram_fd; -uint8_t *phys_ram_dirty; +unsigned long *phys_ram_vga_dirty; +unsigned long *phys_ram_code_dirty; +unsigned long *phys_ram_migration_dirty; Would be nice to make this an array. -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.
On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Modifies wrapper functions for byte-based phys_ram_dirty bitmap to bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent direct access to the phys_ram_dirty bitmap. + +static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) +{ +unsigned long mask; +int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; +int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); +int ret = 0; + +mask = 1UL offset; +if (phys_ram_vga_dirty[index] mask) +ret |= VGA_DIRTY_FLAG; +if (phys_ram_code_dirty[index] mask) +ret |= CODE_DIRTY_FLAG; +if (phys_ram_migration_dirty[index] mask) +ret |= MIGRATION_DIRTY_FLAG; + +return ret; } static inline int cpu_physical_memory_get_dirty(ram_addr_t addr, int dirty_flags) { -return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags; +return cpu_physical_memory_get_dirty_flags(addr) dirty_flags; } This turns one cacheline access into three. If the dirty bitmaps were in an array, you could do return dirty_bitmaps[dirty_index][addr (TARGET_PAGE_BITS + BITS_IN_LONG)] mask; with one cacheline access. static inline void cpu_physical_memory_set_dirty(ram_addr_t addr) { -phys_ram_dirty[addr TARGET_PAGE_BITS] = 0xff; +unsigned long mask; +int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; +int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + +mask = 1UL offset; +phys_ram_vga_dirty[index] |= mask; +phys_ram_code_dirty[index] |= mask; +phys_ram_migration_dirty[index] |= mask; +} This is also three cacheline accesses. I think we should have a master bitmap which is updated by set_dirty(), and which is or'ed into the other bitmaps when they are accessed. At least the vga and migration bitmaps are only read periodically, not randomly, so this would be very fast. In a way, this is similar to how the qemu bitmap is updated from the kvm bitmap today. I am not sure about the code bitmap though. -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: [PATCH 4/6] qemu-kvm: Introduce cpu_physical_memory_get_dirty_range().
On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Introduces cpu_physical_memory_get_dirty_range(). It checks the first row and puts dirty addr in the array. If the first row is empty, it skips to the first non-dirty row or the end addr, and put the length in the first entry of the array. +/* It checks the first row and puts dirty addrs in the array. + If the first row is empty, it skips to the first non-dirty row + or the end addr, and put the length in the first entry of the array. */ +int cpu_physical_memory_get_dirty_range(ram_addr_t start, ram_addr_t end, +ram_addr_t *dirty_rams, int length, +int dirty_flag) +{ +unsigned long phys_ram_dirty, page_number, *p; +ram_addr_t addr; +int s_idx = (start TARGET_PAGE_BITS) / HOST_LONG_BITS; +int e_idx = (end TARGET_PAGE_BITS) / HOST_LONG_BITS; +int i, j, offset; + +switch (dirty_flag) { +case VGA_DIRTY_FLAG: +p = phys_ram_vga_dirty; +break; +case CODE_DIRTY_FLAG: +p = phys_ram_code_dirty; +break; +case MIGRATION_DIRTY_FLAG: +p = phys_ram_migration_dirty; +break; +default: +abort(); +} This bit would be improved by switching to an array of bitmaps. -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: [PATCH 1/6] qemu-kvm: Introduce bit-based phys_ram_dirty for VGA, CODE and MIGRATION.
Avi Kivity wrote: On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Replaces byte-based phys_ram_dirty bitmap with three bit-based phys_ram_dirty bitmap. On allocation, it sets all bits in the bitmap. Signed-off-by: Yoshiaki Tamuratamura.yoshi...@lab.ntt.co.jp Signed-off-by: OHMURA Keiohmura@lab.ntt.co.jp --- exec.c | 22 +- 1 files changed, 17 insertions(+), 5 deletions(-) diff --git a/exec.c b/exec.c index 9bcb4de..ba334e7 100644 --- a/exec.c +++ b/exec.c @@ -119,7 +119,9 @@ uint8_t *code_gen_ptr; #if !defined(CONFIG_USER_ONLY) int phys_ram_fd; -uint8_t *phys_ram_dirty; +unsigned long *phys_ram_vga_dirty; +unsigned long *phys_ram_code_dirty; +unsigned long *phys_ram_migration_dirty; Would be nice to make this an array. Thanks for pointing out. I have a question regarding the index of the array. From the compatibility perspective, I would prefer using the existing macros. #define VGA_DIRTY_FLAG 0x01 #define CODE_DIRTY_FLAG 0x02 #define MIGRATION_DIRTY_FLAG 0x08 However, if I use them as is, I'll get a sparse array... Is it acceptable to change these values like 0, 1, 2?
[Qemu-devel] Re: [PATCH 1/6] qemu-kvm: Introduce bit-based phys_ram_dirty for VGA, CODE and MIGRATION.
On 03/16/2010 03:01 PM, Yoshiaki Tamura wrote: -uint8_t *phys_ram_dirty; +unsigned long *phys_ram_vga_dirty; +unsigned long *phys_ram_code_dirty; +unsigned long *phys_ram_migration_dirty; Would be nice to make this an array. Thanks for pointing out. I have a question regarding the index of the array. From the compatibility perspective, I would prefer using the existing macros. #define VGA_DIRTY_FLAG 0x01 #define CODE_DIRTY_FLAG 0x02 #define MIGRATION_DIRTY_FLAG 0x08 However, if I use them as is, I'll get a sparse array... Is it acceptable to change these values like 0, 1, 2? Sure. -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: [PATCH 0/6] qemu-kvm: Introduce bit-based phys_ram_dirty, and bit-based dirty page checker.
On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Experimental results: Cond1: 1.9 ~ 61 times speed up Cond2: 1.9 ~ 56 times speed up Cond3: 1.9 ~ 59 times speed up Cond4: 1.7 ~ 59 times speed up Impressive results. What's the typical speedup? Closer to 1.9 or 61? Note the issue with the cache accesses for set_dirty() is only applicable to tcg, since kvm always updates the dirty bitmap in a batch (well, I/O also updates the bitmap). -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.
Avi Kivity wrote: On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Modifies wrapper functions for byte-based phys_ram_dirty bitmap to bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent direct access to the phys_ram_dirty bitmap. + +static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) +{ + unsigned long mask; + int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; + int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + int ret = 0; + + mask = 1UL offset; + if (phys_ram_vga_dirty[index] mask) + ret |= VGA_DIRTY_FLAG; + if (phys_ram_code_dirty[index] mask) + ret |= CODE_DIRTY_FLAG; + if (phys_ram_migration_dirty[index] mask) + ret |= MIGRATION_DIRTY_FLAG; + + return ret; } static inline int cpu_physical_memory_get_dirty(ram_addr_t addr, int dirty_flags) { - return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags; + return cpu_physical_memory_get_dirty_flags(addr) dirty_flags; } This turns one cacheline access into three. If the dirty bitmaps were in an array, you could do return dirty_bitmaps[dirty_index][addr (TARGET_PAGE_BITS + BITS_IN_LONG)] mask; with one cacheline access. If I'm understanding the existing code correctly, int dirty_flags can be combined, like VGA + MIGRATION. If we only have to worry about a single dirty flag, I agree with your idea. On the other hand, qemu seems to require getting combined dirty flags. If we introduce dirty bitmaps for each type, we need to access each bitmap to get combined flags. I wasn't sure how to make this more efficient... static inline void cpu_physical_memory_set_dirty(ram_addr_t addr) { - phys_ram_dirty[addr TARGET_PAGE_BITS] = 0xff; + unsigned long mask; + int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; + int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + + mask = 1UL offset; + phys_ram_vga_dirty[index] |= mask; + phys_ram_code_dirty[index] |= mask; + phys_ram_migration_dirty[index] |= mask; +} This is also three cacheline accesses. I think we should have a master bitmap which is updated by set_dirty(), and which is or'ed into the other bitmaps when they are accessed. At least the vga and migration bitmaps are only read periodically, not randomly, so this would be very fast. In a way, this is similar to how the qemu bitmap is updated from the kvm bitmap today. Sounds good to me. So we're going to introduce 4 (VGA, CODE, MIGRATION, master) bit-based bitmaps in total.
[Qemu-devel] Re: [PATCH 1/7] Add support for generic notifier lists (v2)
Anthony Liguori aligu...@us.ibm.com wrote: Notifiers are data-less callbacks and a notifier list is a list of registered notifiers that all are interested in a particular event. We'll use this in a few patches to implement mouse change notification. We could use that for migration also. spice just needs to have start/end migration happening. And block migration added a new callback that is basically call this on start. Later, Juan.
[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.
On 03/16/2010 03:17 PM, Yoshiaki Tamura wrote: Avi Kivity wrote: On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Modifies wrapper functions for byte-based phys_ram_dirty bitmap to bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent direct access to the phys_ram_dirty bitmap. + +static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) +{ + unsigned long mask; + int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; + int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + int ret = 0; + + mask = 1UL offset; + if (phys_ram_vga_dirty[index] mask) + ret |= VGA_DIRTY_FLAG; + if (phys_ram_code_dirty[index] mask) + ret |= CODE_DIRTY_FLAG; + if (phys_ram_migration_dirty[index] mask) + ret |= MIGRATION_DIRTY_FLAG; + + return ret; } static inline int cpu_physical_memory_get_dirty(ram_addr_t addr, int dirty_flags) { - return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags; + return cpu_physical_memory_get_dirty_flags(addr) dirty_flags; } This turns one cacheline access into three. If the dirty bitmaps were in an array, you could do return dirty_bitmaps[dirty_index][addr (TARGET_PAGE_BITS + BITS_IN_LONG)] mask; with one cacheline access. If I'm understanding the existing code correctly, int dirty_flags can be combined, like VGA + MIGRATION. If we only have to worry about a single dirty flag, I agree with your idea. From a quick grep it seems flags are not combined, except for something strange with CODE_DIRTY_FLAG: static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr, uint32_t val) { int dirty_flags; dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; if (!(dirty_flags CODE_DIRTY_FLAG)) { #if !defined(CONFIG_USER_ONLY) tb_invalidate_phys_page_fast(ram_addr, 4); dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; #endif } stl_p(qemu_get_ram_ptr(ram_addr), val); dirty_flags |= (0xff ~CODE_DIRTY_FLAG); phys_ram_dirty[ram_addr TARGET_PAGE_BITS] = dirty_flags; /* we remove the notdirty callback only if the code has been flushed */ if (dirty_flags == 0xff) tlb_set_dirty(cpu_single_env, cpu_single_env-mem_io_vaddr); } I can't say I understand what it does. On the other hand, qemu seems to require getting combined dirty flags. If we introduce dirty bitmaps for each type, we need to access each bitmap to get combined flags. I wasn't sure how to make this more efficient... static inline void cpu_physical_memory_set_dirty(ram_addr_t addr) { - phys_ram_dirty[addr TARGET_PAGE_BITS] = 0xff; + unsigned long mask; + int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; + int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + + mask = 1UL offset; + phys_ram_vga_dirty[index] |= mask; + phys_ram_code_dirty[index] |= mask; + phys_ram_migration_dirty[index] |= mask; +} This is also three cacheline accesses. I think we should have a master bitmap which is updated by set_dirty(), and which is or'ed into the other bitmaps when they are accessed. At least the vga and migration bitmaps are only read periodically, not randomly, so this would be very fast. In a way, this is similar to how the qemu bitmap is updated from the kvm bitmap today. Sounds good to me. So we're going to introduce 4 (VGA, CODE, MIGRATION, master) bit-based bitmaps in total. Yeah, except CODE doesn't behave like the others. Would be best to understand what it's requirements are before making the change. Maybe CODE will need separate handling (so master will only feed VGA and MIGRATION). -- error compiling committee.c: too many arguments to function
[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.
On 03/16/2010 07:45 AM, Avi Kivity wrote: On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Modifies wrapper functions for byte-based phys_ram_dirty bitmap to bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent direct access to the phys_ram_dirty bitmap. + +static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) +{ +unsigned long mask; +int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; +int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); +int ret = 0; + +mask = 1UL offset; +if (phys_ram_vga_dirty[index] mask) +ret |= VGA_DIRTY_FLAG; +if (phys_ram_code_dirty[index] mask) +ret |= CODE_DIRTY_FLAG; +if (phys_ram_migration_dirty[index] mask) +ret |= MIGRATION_DIRTY_FLAG; + +return ret; } static inline int cpu_physical_memory_get_dirty(ram_addr_t addr, int dirty_flags) { -return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags; +return cpu_physical_memory_get_dirty_flags(addr) dirty_flags; } This turns one cacheline access into three. If the dirty bitmaps were in an array, you could do return dirty_bitmaps[dirty_index][addr (TARGET_PAGE_BITS + BITS_IN_LONG)] mask; with one cacheline access. As far as I can tell, we only ever call with a single flag so your suggestion makes sense. I'd suggest introducing these functions before splitting the bitmap up. It makes review a bit easier. static inline void cpu_physical_memory_set_dirty(ram_addr_t addr) { -phys_ram_dirty[addr TARGET_PAGE_BITS] = 0xff; +unsigned long mask; +int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; +int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + +mask = 1UL offset; +phys_ram_vga_dirty[index] |= mask; +phys_ram_code_dirty[index] |= mask; +phys_ram_migration_dirty[index] |= mask; +} This is also three cacheline accesses. I think we should have a master bitmap which is updated by set_dirty(), and which is or'ed into the other bitmaps when they are accessed. At least the vga and migration bitmaps are only read periodically, not randomly, so this would be very fast. In a way, this is similar to how the qemu bitmap is updated from the kvm bitmap today. I am not sure about the code bitmap though. I think your suggestion makes sense and would also work for the code bitmap. Regards, Anthony Liguori
[Qemu-devel] Re: [PATCH 0/6] qemu-kvm: Introduce bit-based phys_ram_dirty, and bit-based dirty page checker.
Avi Kivity wrote: On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Experimental results: Cond1: 1.9 ~ 61 times speed up Cond2: 1.9 ~ 56 times speed up Cond3: 1.9 ~ 59 times speed up Cond4: 1.7 ~ 59 times speed up Impressive results. What's the typical speedup? Closer to 1.9 or 61? To be honest, I thought the result above was too vague... The speed up grows when the number of dirty pages decreases. Let me paste the snipped actual data measured during live migration on Cond1. This result is measured with cpu_get_real_ticks(), so the values should be in raw ticks. 135200 dirty pages: orig.2488419, bitbased.1251171, ratio.1.99 ... 98346 dirty pages: orig.3580533, bitbased.1386918, ratio.2.58 ... 54865 dirty pages: orig.4220865, bitbased.984924, ratio.4.29 ... 27883 dirty pages: orig.4088970, bitbased.514602, ratio.7.95 ... 11541 dirty pages: orig.3854277, bitbased.220410, ratio.17.49 ... 8117 dirty pages: orig.4041765, bitbased.175446, ratio.23.04 3231 dirty pages: orig.3337083, bitbased.105921, ratio.31.51 2401 dirty pages: orig.4103469, bitbased.89406, ratio.45.90 1595 dirty pages: orig.4028949, bitbased.78570, ratio.51.28 756 dirty pages: orig.4036707, bitbased.67662, ratio.59.66 0 dirty pages: orig.3938085, bitbased.23634, ratio.166.63 0 dirty pages: orig.3968163, bitbased.23526, ratio.168.67 We didn't show the data for checking completely empty bitmap because it was too fast and didn't wan't to get wrong impression. Note the issue with the cache accesses for set_dirty() is only applicable to tcg, since kvm always updates the dirty bitmap in a batch (well, I/O also updates the bitmap). I understand. I'm still concerned regarding the way of reseting the dirty bitmap. I was thinking to reset them in a batch, but it seems difficult because of the consistency with the tlb.
[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.
Avi Kivity wrote: On 03/16/2010 03:17 PM, Yoshiaki Tamura wrote: Avi Kivity wrote: On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Modifies wrapper functions for byte-based phys_ram_dirty bitmap to bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent direct access to the phys_ram_dirty bitmap. + +static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) +{ + unsigned long mask; + int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; + int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + int ret = 0; + + mask = 1UL offset; + if (phys_ram_vga_dirty[index] mask) + ret |= VGA_DIRTY_FLAG; + if (phys_ram_code_dirty[index] mask) + ret |= CODE_DIRTY_FLAG; + if (phys_ram_migration_dirty[index] mask) + ret |= MIGRATION_DIRTY_FLAG; + + return ret; } static inline int cpu_physical_memory_get_dirty(ram_addr_t addr, int dirty_flags) { - return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags; + return cpu_physical_memory_get_dirty_flags(addr) dirty_flags; } This turns one cacheline access into three. If the dirty bitmaps were in an array, you could do return dirty_bitmaps[dirty_index][addr (TARGET_PAGE_BITS + BITS_IN_LONG)] mask; with one cacheline access. If I'm understanding the existing code correctly, int dirty_flags can be combined, like VGA + MIGRATION. If we only have to worry about a single dirty flag, I agree with your idea. From a quick grep it seems flags are not combined, except for something strange with CODE_DIRTY_FLAG: Thanks for checking out. But the CODE_DIRTY_FLAG makes me really nervous... static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr, uint32_t val) { int dirty_flags; dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; if (!(dirty_flags CODE_DIRTY_FLAG)) { #if !defined(CONFIG_USER_ONLY) tb_invalidate_phys_page_fast(ram_addr, 4); dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; #endif } stl_p(qemu_get_ram_ptr(ram_addr), val); dirty_flags |= (0xff ~CODE_DIRTY_FLAG); phys_ram_dirty[ram_addr TARGET_PAGE_BITS] = dirty_flags; /* we remove the notdirty callback only if the code has been flushed */ if (dirty_flags == 0xff) tlb_set_dirty(cpu_single_env, cpu_single_env-mem_io_vaddr); } I can't say I understand what it does. Me neither. This the reason I had to take naive approach... On the other hand, qemu seems to require getting combined dirty flags. If we introduce dirty bitmaps for each type, we need to access each bitmap to get combined flags. I wasn't sure how to make this more efficient... static inline void cpu_physical_memory_set_dirty(ram_addr_t addr) { - phys_ram_dirty[addr TARGET_PAGE_BITS] = 0xff; + unsigned long mask; + int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; + int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + + mask = 1UL offset; + phys_ram_vga_dirty[index] |= mask; + phys_ram_code_dirty[index] |= mask; + phys_ram_migration_dirty[index] |= mask; +} This is also three cacheline accesses. I think we should have a master bitmap which is updated by set_dirty(), and which is or'ed into the other bitmaps when they are accessed. At least the vga and migration bitmaps are only read periodically, not randomly, so this would be very fast. In a way, this is similar to how the qemu bitmap is updated from the kvm bitmap today. Sounds good to me. So we're going to introduce 4 (VGA, CODE, MIGRATION, master) bit-based bitmaps in total. Yeah, except CODE doesn't behave like the others. Would be best to understand what it's requirements are before making the change. Maybe CODE will need separate handling (so master will only feed VGA and MIGRATION). After implementing this patch set, I thought separating the wrapper functions for each dirty flag type might be an option. Unifying everything makes inefficient here. But anyway, do you know somebody who has a strong insight on this CODE_DIRTY_FLAG?
[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.
On 03/16/2010 08:29 AM, Avi Kivity wrote: On 03/16/2010 03:17 PM, Yoshiaki Tamura wrote: Avi Kivity wrote: On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Modifies wrapper functions for byte-based phys_ram_dirty bitmap to bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent direct access to the phys_ram_dirty bitmap. + +static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) +{ + unsigned long mask; + int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; + int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + int ret = 0; + + mask = 1UL offset; + if (phys_ram_vga_dirty[index] mask) + ret |= VGA_DIRTY_FLAG; + if (phys_ram_code_dirty[index] mask) + ret |= CODE_DIRTY_FLAG; + if (phys_ram_migration_dirty[index] mask) + ret |= MIGRATION_DIRTY_FLAG; + + return ret; } static inline int cpu_physical_memory_get_dirty(ram_addr_t addr, int dirty_flags) { - return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags; + return cpu_physical_memory_get_dirty_flags(addr) dirty_flags; } This turns one cacheline access into three. If the dirty bitmaps were in an array, you could do return dirty_bitmaps[dirty_index][addr (TARGET_PAGE_BITS + BITS_IN_LONG)] mask; with one cacheline access. If I'm understanding the existing code correctly, int dirty_flags can be combined, like VGA + MIGRATION. If we only have to worry about a single dirty flag, I agree with your idea. From a quick grep it seems flags are not combined, except for something strange with CODE_DIRTY_FLAG: static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr, uint32_t val) { int dirty_flags; dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; if (!(dirty_flags CODE_DIRTY_FLAG)) { #if !defined(CONFIG_USER_ONLY) tb_invalidate_phys_page_fast(ram_addr, 4); dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; #endif } stl_p(qemu_get_ram_ptr(ram_addr), val); dirty_flags |= (0xff ~CODE_DIRTY_FLAG); phys_ram_dirty[ram_addr TARGET_PAGE_BITS] = dirty_flags; /* we remove the notdirty callback only if the code has been flushed */ if (dirty_flags == 0xff) tlb_set_dirty(cpu_single_env, cpu_single_env-mem_io_vaddr); } I can't say I understand what it does. The semantics of CODE_DIRTY_FLAG are a little counter intuitive. CODE_DIRTY_FLAG means that we know that something isn't code so writes do not need checking for self modifying code. notdirty_mem_write() is called for any ram that is in the virtual TLB that has not been updated yet and once a write has occurred, we can switch to faster access functions (provided we've invalidated any translation blocks). That's why the check is if (!(dirty_flags CODE_DIRTY_FLAG)), if it hasn't been set yet, we have to assume that it could be a TB so we need to invalidate it. tb_invalidate_phys_page_fast() will set the CODE_DIRTY_FLAG if no code is present in that memory area which is why we fetch dirty_flags again. We do the store, and then set the dirty bits to mark that the page is now dirty taking care to not change the CODE_DIRTY_FLAG bit. At the very end, we check to see if CODE_DIRTY_FLAG which indicates that we no longer need to trap writes. If so, we call tlb_set_dirty() which will ultimately remove the notdirty callback in favor of a faster access mechanism. With respect patch series, there should be no problem having a separate code bitmap that gets updated along with a main bitmap provided that the semantics of CODE_DIRTY_FLAG are preserved. Sounds good to me. So we're going to introduce 4 (VGA, CODE, MIGRATION, master) bit-based bitmaps in total. Yeah, except CODE doesn't behave like the others. Would be best to understand what it's requirements are before making the change. Maybe CODE will need separate handling (so master will only feed VGA and MIGRATION). Generally speaking, cpu_physical_memory_set_dirty() is called by the device model. Any writes by the device model that results in self-modifying code are not going to have predictable semantics which is why it can set CODE_DIRTY_FLAG. CODE_DIRTY_FLAG doesn't need to get updated from a master bitmap. It should be treated as a separate bitmap that is strictly dealt with by the virtual TLB. Regards, Anthony Liguori
Re: [Qemu-devel] wake-on-lan IPMI implementation; real power-off and -no-shutdown
On Mon, 15 Mar 2010 15:55:26 + Daniel P. Berrange berra...@redhat.com wrote: On Mon, Mar 15, 2010 at 04:01:27PM +0100, Fran?ois Revol wrote: Hello, while working on a demonstrator for a green-IT project, to show scheduled machine shutdown and powering depending on various conditions, I wondered if I could use QEMU with wake-on-lan transparently, but it seems it's not implemented at all. I though I could try to add support for it, and with -S it theorically should be doable at least for the first boot, but the network packets do not go much further until the NIC is actually initialized, as most network layers use qemu_can_send_packet() which returns 0 if the machine is stopped. Hacking this function to return 1 seems to push the packet upward, but I couldn't find a single point where I could check for WOL packets, different -net subsystems using different code paths. Also, it seems -no-shutdown doesn't actually stop the emulation as said in the manual, it actually keeps the vm running (and using cpu), despite the OS trying to shutdown via ACPI. At least I tested so with Haiku (and acpi=true in kernel config), which properly exits QEMU without -no-shutdown. Hmm, I think -no-shutdown should at least stop the CPUs executing. It is not really useful on its own though. The app managing QEMU would want to use the new JSON based monitor to listen for the SHUTDOWN event to be emitted, so it can detect the shutdown completing then take action it wants either reset the guest, or kill QEMU, etc If I'm not missing something, -no-shutdown calls vm_stop(), which calls pause_all_vcpus().
[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.
On 03/16/2010 03:51 PM, Anthony Liguori wrote: On 03/16/2010 08:29 AM, Avi Kivity wrote: On 03/16/2010 03:17 PM, Yoshiaki Tamura wrote: Avi Kivity wrote: On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Modifies wrapper functions for byte-based phys_ram_dirty bitmap to bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent direct access to the phys_ram_dirty bitmap. + +static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) +{ + unsigned long mask; + int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; + int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + int ret = 0; + + mask = 1UL offset; + if (phys_ram_vga_dirty[index] mask) + ret |= VGA_DIRTY_FLAG; + if (phys_ram_code_dirty[index] mask) + ret |= CODE_DIRTY_FLAG; + if (phys_ram_migration_dirty[index] mask) + ret |= MIGRATION_DIRTY_FLAG; + + return ret; } static inline int cpu_physical_memory_get_dirty(ram_addr_t addr, int dirty_flags) { - return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags; + return cpu_physical_memory_get_dirty_flags(addr) dirty_flags; } This turns one cacheline access into three. If the dirty bitmaps were in an array, you could do return dirty_bitmaps[dirty_index][addr (TARGET_PAGE_BITS + BITS_IN_LONG)] mask; with one cacheline access. If I'm understanding the existing code correctly, int dirty_flags can be combined, like VGA + MIGRATION. If we only have to worry about a single dirty flag, I agree with your idea. From a quick grep it seems flags are not combined, except for something strange with CODE_DIRTY_FLAG: static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr, uint32_t val) { int dirty_flags; dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; if (!(dirty_flags CODE_DIRTY_FLAG)) { #if !defined(CONFIG_USER_ONLY) tb_invalidate_phys_page_fast(ram_addr, 4); dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; #endif } stl_p(qemu_get_ram_ptr(ram_addr), val); dirty_flags |= (0xff ~CODE_DIRTY_FLAG); phys_ram_dirty[ram_addr TARGET_PAGE_BITS] = dirty_flags; /* we remove the notdirty callback only if the code has been flushed */ if (dirty_flags == 0xff) tlb_set_dirty(cpu_single_env, cpu_single_env-mem_io_vaddr); } I can't say I understand what it does. The semantics of CODE_DIRTY_FLAG are a little counter intuitive. CODE_DIRTY_FLAG means that we know that something isn't code so writes do not need checking for self modifying code. So the hardware equivalent is, when the Instruction TLB loads a page address, clear CODE_DIRTY_FLAG? notdirty_mem_write() is called for any ram that is in the virtual TLB that has not been updated yet and once a write has occurred, we can switch to faster access functions (provided we've invalidated any translation blocks). That's why the check is if (!(dirty_flags CODE_DIRTY_FLAG)), if it hasn't been set yet, we have to assume that it could be a TB so we need to invalidate it. tb_invalidate_phys_page_fast() will set the CODE_DIRTY_FLAG if no code is present in that memory area which is why we fetch dirty_flags again. Ok. We do the store, and then set the dirty bits to mark that the page is now dirty taking care to not change the CODE_DIRTY_FLAG bit. At the very end, we check to see if CODE_DIRTY_FLAG which indicates that we no longer need to trap writes. If so, we call tlb_set_dirty() which will ultimately remove the notdirty callback in favor of a faster access mechanism. With respect patch series, there should be no problem having a separate code bitmap that gets updated along with a main bitmap provided that the semantics of CODE_DIRTY_FLAG are preserved. Sounds good to me. So we're going to introduce 4 (VGA, CODE, MIGRATION, master) bit-based bitmaps in total. Yeah, except CODE doesn't behave like the others. Would be best to understand what it's requirements are before making the change. Maybe CODE will need separate handling (so master will only feed VGA and MIGRATION). Generally speaking, cpu_physical_memory_set_dirty() is called by the device model. Any writes by the device model that results in self-modifying code are not going to have predictable semantics which is why it can set CODE_DIRTY_FLAG. CODE_DIRTY_FLAG doesn't need to get updated from a master bitmap. It should be treated as a separate bitmap that is strictly dealt with by the virtual TLB. Thanks. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] wake-on-lan IPMI implementation; real power-off and -no-shutdown
Also, it seems -no-shutdown doesn't actually stop the emulation as said in the manual, it actually keeps the vm running (and using cpu), despite the OS trying to shutdown via ACPI. At least I tested so with Haiku (and acpi=true in kernel config), which properly exits QEMU without -no-shutdown. Hmm, I think -no-shutdown should at least stop the CPUs executing. It is not really useful on its own though. The app managing QEMU would want to use the new JSON based monitor to listen for the SHUTDOWN event to be emitted, so it can detect the shutdown completing then take action it wants either reset the guest, or kill QEMU, etc If I'm not missing something, -no-shutdown calls vm_stop(), which calls pause_all_vcpus(). Oh indeed, info status shows the VM as paused. I was misled because the GUI window was still open... François.
[Qemu-devel] Re: [PATCH 2/6] qemu-kvm: Modify and introduce wrapper functions to access phys_ram_dirty.
On 03/16/2010 08:57 AM, Avi Kivity wrote: On 03/16/2010 03:51 PM, Anthony Liguori wrote: On 03/16/2010 08:29 AM, Avi Kivity wrote: On 03/16/2010 03:17 PM, Yoshiaki Tamura wrote: Avi Kivity wrote: On 03/16/2010 12:53 PM, Yoshiaki Tamura wrote: Modifies wrapper functions for byte-based phys_ram_dirty bitmap to bit-based phys_ram_dirty bitmap, and adds more wrapper functions to prevent direct access to the phys_ram_dirty bitmap. + +static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr) +{ + unsigned long mask; + int index = (addr TARGET_PAGE_BITS) / HOST_LONG_BITS; + int offset = (addr TARGET_PAGE_BITS) (HOST_LONG_BITS - 1); + int ret = 0; + + mask = 1UL offset; + if (phys_ram_vga_dirty[index] mask) + ret |= VGA_DIRTY_FLAG; + if (phys_ram_code_dirty[index] mask) + ret |= CODE_DIRTY_FLAG; + if (phys_ram_migration_dirty[index] mask) + ret |= MIGRATION_DIRTY_FLAG; + + return ret; } static inline int cpu_physical_memory_get_dirty(ram_addr_t addr, int dirty_flags) { - return phys_ram_dirty[addr TARGET_PAGE_BITS] dirty_flags; + return cpu_physical_memory_get_dirty_flags(addr) dirty_flags; } This turns one cacheline access into three. If the dirty bitmaps were in an array, you could do return dirty_bitmaps[dirty_index][addr (TARGET_PAGE_BITS + BITS_IN_LONG)] mask; with one cacheline access. If I'm understanding the existing code correctly, int dirty_flags can be combined, like VGA + MIGRATION. If we only have to worry about a single dirty flag, I agree with your idea. From a quick grep it seems flags are not combined, except for something strange with CODE_DIRTY_FLAG: static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr, uint32_t val) { int dirty_flags; dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; if (!(dirty_flags CODE_DIRTY_FLAG)) { #if !defined(CONFIG_USER_ONLY) tb_invalidate_phys_page_fast(ram_addr, 4); dirty_flags = phys_ram_dirty[ram_addr TARGET_PAGE_BITS]; #endif } stl_p(qemu_get_ram_ptr(ram_addr), val); dirty_flags |= (0xff ~CODE_DIRTY_FLAG); phys_ram_dirty[ram_addr TARGET_PAGE_BITS] = dirty_flags; /* we remove the notdirty callback only if the code has been flushed */ if (dirty_flags == 0xff) tlb_set_dirty(cpu_single_env, cpu_single_env-mem_io_vaddr); } I can't say I understand what it does. The semantics of CODE_DIRTY_FLAG are a little counter intuitive. CODE_DIRTY_FLAG means that we know that something isn't code so writes do not need checking for self modifying code. So the hardware equivalent is, when the Instruction TLB loads a page address, clear CODE_DIRTY_FLAG? Yes, and is what tlb_protect_code() does and it's called from tb_alloc_page() which is what's code when a TB is created. Regards, Anthony Liguori
[Qemu-devel] [PULL] e100/pci fixes
The following changes since commit cb66ffcf9e298dc1bfc11682172ff9472bcd4495: Kevin Wolf (1): qemu-img rebase: Document -f option are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/mst/qemu.git pci Michael S. Tsirkin (2): eepro100: address pci todo's, use pci_set_xx pcnet: make subsystem vendor id match hardware hw/eepro100.c | 94 +++- hw/pcnet.c|3 ++ 2 files changed, 35 insertions(+), 62 deletions(-)
[Qemu-devel] Re: KVM call agenda for Mar 16
On 03/16/2010 05:45 AM, Daniel P. Berrange wrote: On Tue, Mar 16, 2010 at 12:38:02PM +0200, Avi Kivity wrote: On 03/16/2010 12:31 PM, Daniel P. Berrange wrote: Polling loops are an indication that something is wrong. Except when people suggest they are the right answer, qcow high watermark ;-P I liked Anthony's suggestion of an lvm2 block format driver. No polling. Doesn't that require giving QEMU privileges to perform LVM operations which implies QEMU having CAP_SYS_ADMIN ? If QEMU is able to resize an LVM partition, it needs to carry privileges. I'm not sure how this can be done safely in a lesser privileged environment. Presumably, you're over committing storage and there's not much you can do if the guests exhaust their storage all at once. Regards, Anthony Liguori Daniel
[Qemu-devel] Re: [PATCHv4 09/12] vhost: vhost net support
On Mon, Mar 08, 2010 at 11:50:23AM +0530, Amit Shah wrote: On (Sat) Mar 06 2010 [21:06:35], Michael S. Tsirkin wrote: +r = vhost_virtqueue_set_addr(dev, vq, idx, dev-log_enabled); +if (r 0) { +r = -errno; +goto fail_alloc; +} +if (!vdev-binding-guest_notifier || !vdev-binding-host_notifier) { +fprintf(stderr, binding does not support irqfd/queuefd\n); +r = -ENOSYS; +goto fail_alloc; +} This could be checked much earlier on in the function; so that we avoid doing all that stuff above and the cleanup. Whatever order we put checks in, we'll have to undo stuff done beforehand on error. Not if you do this check before any ioctls or allocations. !vdev-binding-guest_notifier is not dependent on anything you do above it in this function, so just checking for this first thing in the function will not need any cleanup. Amit Yes, but I think it's clearer to do check function just before calling it. No?
[Qemu-devel] [PATCH 0/4] tcg-hppa git it working, v2
Changes since v2: * Fix cpu_signal_handler. At this point the port passes all of the integer gcc tests for i386-linux-uclibc. Many of the fp tests fail because target-i386 is horribly confused about how to represent the fpu when not being built on a i386 host. I briefly tried to force the use of the floatx80 format, but there's a whole tangle of follow-on errors in cpu.h and the helper files. I've been considering putting together the parts for an arm or mips userland to avoid this problem entirely, but havn't quite gotten that far. r~ Richard Henderson (4): tcg-hppa: Fix const errors in hppa-dis.c. tcg-hppa: Fix 64-bit argument ordering. tcg-hppa: Finish the port. tcg-hppa: Compute is_write in cpu_signal_handler. configure |5 +- cpu-exec.c| 38 +- hppa-dis.c|4 +- tcg/hppa/tcg-target.c | 1846 +++-- tcg/hppa/tcg-target.h | 142 +--- tcg/tcg.c | 12 +- 6 files changed, 1387 insertions(+), 660 deletions(-)
[Qemu-devel] [PATCH 1/4] tcg-hppa: Fix const errors in hppa-dis.c.
--- hppa-dis.c |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hppa-dis.c b/hppa-dis.c index 9d96d72..49f99c8 100644 --- a/hppa-dis.c +++ b/hppa-dis.c @@ -576,7 +576,7 @@ struct pa_opcode const char *name; unsigned long int match; /* Bits that must be set... */ unsigned long int mask;/* ... in these bits. */ -char *args; +const char *args; enum pa_arch arch; char flags; }; @@ -2753,7 +2753,7 @@ print_insn_hppa (bfd_vma memaddr, disassemble_info *info) int sf = GET_FIELD (insn, 19, 20); const char * const * source = float_format_names; const char * const * dest = float_format_names; - char *t = ; + const char *t = ; if (sub == 4) { -- 1.6.6.1
[Qemu-devel] [PATCH 4/4] tcg-hppa: Compute is_write in cpu_signal_handler.
--- cpu-exec.c | 38 +++--- 1 files changed, 31 insertions(+), 7 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index bcfcda2..14204f4 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -1193,15 +1193,39 @@ int cpu_signal_handler(int host_signum, void *pinfo, { struct siginfo *info = pinfo; struct ucontext *uc = puc; -unsigned long pc; -int is_write; +unsigned long pc = uc-uc_mcontext.sc_iaoq[0]; +uint32_t insn = *(uint32_t *)pc; +int is_write = 0; + +/* XXX: need kernel patch to get write flag faster. */ +switch (insn 26) { +case 0x1a: /* STW */ +case 0x19: /* STH */ +case 0x18: /* STB */ +case 0x1b: /* STWM */ +is_write = 1; +break; + +case 0x09: /* CSTWX, FSTWX, FSTWS */ +case 0x0b: /* CSTDX, FSTDX, FSTDS */ +/* Distinguish from coprocessor load ... */ +is_write = (insn 9) 1; +break; + +case 0x03: +switch ((insn 6) 15) { +case 0xa: /* STWS */ +case 0x9: /* STHS */ +case 0x8: /* STBS */ +case 0xe: /* STWAS */ +case 0xc: /* STBYS */ +is_write = 1; +} +break; +} -pc = uc-uc_mcontext.sc_iaoq[0]; -/* FIXME: compute is_write */ -is_write = 0; return handle_cpu_signal(pc, (unsigned long)info-si_addr, - is_write, - uc-uc_sigmask, puc); + is_write, uc-uc_sigmask, puc); } #else -- 1.6.6.1
[Qemu-devel] [PATCH 2/4] tcg-hppa: Fix 64-bit argument ordering.
--- tcg/tcg.c | 12 +++- 1 files changed, 11 insertions(+), 1 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 1818868..d753149 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -596,7 +596,17 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, real_args++; } #endif -#ifdef TCG_TARGET_WORDS_BIGENDIAN + /* If stack grows up, then we will be placing successive + arguments at lower addresses, which means we need to + reverse the order compared to how we would normally + treat either big or little-endian. For those arguments + that will wind up in registers, this still works for + HPPA (the only current STACK_GROWSUP target) since the + argument registers are *also* allocated in decreasing + order. If another such target is added, this logic may + have to get more complicated to differentiate between + stack arguments and register arguments. */ +#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) *gen_opparam_ptr++ = args[i] + 1; *gen_opparam_ptr++ = args[i]; #else -- 1.6.6.1
[Qemu-devel] Re: KVM call agenda for Mar 16
On 03/16/2010 10:23 AM, Daniel P. Berrange wrote: In the context of the RHEV management application, iSCSI/SCSI Fibre are providing the raw storage, with LVM VGs on top and the carving LVs for the guests. In the common case the admin/app would monitor VG usage LV rate of increase to ensure extra space was available in the VG ahead of it being needed. eg if the VG comes close to exhaustion then further LUNS can be obtained and added as PVs to the LVM volume group. So you can't guarentee that a VM won't stop on ENOSPC, but it is very unlikely if the system is operating correctly. As an added complication, since cluster-LVM isn't used, all LVM operations have to be performed on a dedicated/exclusive storage host and then metadata refreshed/propagated to other hosts running VMs. This last issue implies that letting QEMU resize its LV would never be possible, even if it were not for the permissions problem. Sounds like a good argument for polling :-) Regards, Anthony Liguori Regards, Daniel
[Qemu-devel] [PATCH 0/5] target-alpha improvements
The major thing here is the addition of the CONST/PURE markers to the helper functions. In the process of studying the helper implmentations to see how each ought to get marked, there were several follow-on improvements that caught my eye. r~ Richard Henderson (5): target-alpha: Add flags markups to helpers.h. target-alpha: Implement cpys{,n,e} inline. target-alpha: Implement rs/rc properly. target-alpha: Implement cvtql inline. target-alpha: Implement cvtlq inline. linux-user/main.c|5 ++ target-alpha/helper.h| 179 + target-alpha/op_helper.c | 73 +-- target-alpha/translate.c | 163 ++ 4 files changed, 239 insertions(+), 181 deletions(-)
[Qemu-devel] [PATCH 1/5] target-alpha: Add flags markups to helpers.h.
Almost all alpha helpers are at least TCG_CALL_CONST and a fair few are also TCG_CALL_PURE. Signed-off-by: Richard Henderson r...@twiddle.net --- target-alpha/helper.h | 184 1 files changed, 92 insertions(+), 92 deletions(-) diff --git a/target-alpha/helper.h b/target-alpha/helper.h index 79cf375..a508077 100644 --- a/target-alpha/helper.h +++ b/target-alpha/helper.h @@ -1,9 +1,9 @@ #include def-helper.h DEF_HELPER_2(excp, void, int, int) -DEF_HELPER_0(load_pcc, i64) -DEF_HELPER_0(rc, i64) -DEF_HELPER_0(rs, i64) +DEF_HELPER_FLAGS_0(load_pcc, TCG_CALL_CONST | TCG_CALL_PURE, i64) +DEF_HELPER_FLAGS_0(rc, TCG_CALL_CONST, i64) +DEF_HELPER_FLAGS_0(rs, TCG_CALL_CONST, i64) DEF_HELPER_2(addqv, i64, i64, i64) DEF_HELPER_2(addlv, i64, i64, i64) @@ -11,98 +11,98 @@ DEF_HELPER_2(subqv, i64, i64, i64) DEF_HELPER_2(sublv, i64, i64, i64) DEF_HELPER_2(mullv, i64, i64, i64) DEF_HELPER_2(mulqv, i64, i64, i64) -DEF_HELPER_2(umulh, i64, i64, i64) - -DEF_HELPER_1(ctpop, i64, i64) -DEF_HELPER_1(ctlz, i64, i64) -DEF_HELPER_1(cttz, i64, i64) - -DEF_HELPER_2(zap, i64, i64, i64) -DEF_HELPER_2(zapnot, i64, i64, i64) - -DEF_HELPER_2(cmpbge, i64, i64, i64) - -DEF_HELPER_2(minub8, i64, i64, i64) -DEF_HELPER_2(minsb8, i64, i64, i64) -DEF_HELPER_2(minuw4, i64, i64, i64) -DEF_HELPER_2(minsw4, i64, i64, i64) -DEF_HELPER_2(maxub8, i64, i64, i64) -DEF_HELPER_2(maxsb8, i64, i64, i64) -DEF_HELPER_2(maxuw4, i64, i64, i64) -DEF_HELPER_2(maxsw4, i64, i64, i64) -DEF_HELPER_2(perr, i64, i64, i64) -DEF_HELPER_1(pklb, i64, i64) -DEF_HELPER_1(pkwb, i64, i64) -DEF_HELPER_1(unpkbl, i64, i64) -DEF_HELPER_1(unpkbw, i64, i64) - -DEF_HELPER_0(load_fpcr, i64) -DEF_HELPER_1(store_fpcr, void, i64) - -DEF_HELPER_1(f_to_memory, i32, i64) -DEF_HELPER_1(memory_to_f, i64, i32) -DEF_HELPER_2(addf, i64, i64, i64) -DEF_HELPER_2(subf, i64, i64, i64) -DEF_HELPER_2(mulf, i64, i64, i64) -DEF_HELPER_2(divf, i64, i64, i64) -DEF_HELPER_1(sqrtf, i64, i64) - -DEF_HELPER_1(g_to_memory, i64, i64) -DEF_HELPER_1(memory_to_g, i64, i64) -DEF_HELPER_2(addg, i64, i64, i64) -DEF_HELPER_2(subg, i64, i64, i64) -DEF_HELPER_2(mulg, i64, i64, i64) -DEF_HELPER_2(divg, i64, i64, i64) -DEF_HELPER_1(sqrtg, i64, i64) - -DEF_HELPER_1(s_to_memory, i32, i64) -DEF_HELPER_1(memory_to_s, i64, i32) -DEF_HELPER_2(adds, i64, i64, i64) -DEF_HELPER_2(subs, i64, i64, i64) -DEF_HELPER_2(muls, i64, i64, i64) -DEF_HELPER_2(divs, i64, i64, i64) -DEF_HELPER_1(sqrts, i64, i64) - -DEF_HELPER_2(addt, i64, i64, i64) -DEF_HELPER_2(subt, i64, i64, i64) -DEF_HELPER_2(mult, i64, i64, i64) -DEF_HELPER_2(divt, i64, i64, i64) -DEF_HELPER_1(sqrtt, i64, i64) - -DEF_HELPER_2(cmptun, i64, i64, i64) -DEF_HELPER_2(cmpteq, i64, i64, i64) -DEF_HELPER_2(cmptle, i64, i64, i64) -DEF_HELPER_2(cmptlt, i64, i64, i64) -DEF_HELPER_2(cmpgeq, i64, i64, i64) -DEF_HELPER_2(cmpgle, i64, i64, i64) -DEF_HELPER_2(cmpglt, i64, i64, i64) - -DEF_HELPER_2(cpys, i64, i64, i64) -DEF_HELPER_2(cpysn, i64, i64, i64) -DEF_HELPER_2(cpyse, i64, i64, i64) - -DEF_HELPER_1(cvtts, i64, i64) -DEF_HELPER_1(cvtst, i64, i64) -DEF_HELPER_1(cvtqs, i64, i64) -DEF_HELPER_1(cvtqt, i64, i64) -DEF_HELPER_1(cvtqf, i64, i64) -DEF_HELPER_1(cvtgf, i64, i64) -DEF_HELPER_1(cvtgq, i64, i64) -DEF_HELPER_1(cvtqg, i64, i64) -DEF_HELPER_1(cvtlq, i64, i64) - -DEF_HELPER_1(cvttq, i64, i64) -DEF_HELPER_1(cvttq_c, i64, i64) -DEF_HELPER_1(cvttq_svic, i64, i64) - -DEF_HELPER_1(cvtql, i64, i64) +DEF_HELPER_FLAGS_2(umulh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_1(ctpop, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) +DEF_HELPER_FLAGS_1(ctlz, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) +DEF_HELPER_FLAGS_1(cttz, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) + +DEF_HELPER_FLAGS_2(zap, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(zapnot, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(cmpbge, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) + +DEF_HELPER_FLAGS_2(minub8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(minsb8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(minuw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(minsw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(maxub8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(maxsb8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(maxuw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(maxsw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_2(perr, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) +DEF_HELPER_FLAGS_1(pklb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) +DEF_HELPER_FLAGS_1(pkwb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) +DEF_HELPER_FLAGS_1(unpkbl, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) +DEF_HELPER_FLAGS_1(unpkbw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) + +DEF_HELPER_FLAGS_0(load_fpcr, TCG_CALL_CONST | TCG_CALL_PURE, i64) +DEF_HELPER_FLAGS_1(store_fpcr,
[Qemu-devel] [PATCH 2/5] target-alpha: Implement cpys{, n, e} inline.
Signed-off-by: Richard Henderson r...@twiddle.net --- target-alpha/helper.h|4 -- target-alpha/op_helper.c | 18 -- target-alpha/translate.c | 78 +++-- 3 files changed, 74 insertions(+), 26 deletions(-) diff --git a/target-alpha/helper.h b/target-alpha/helper.h index a508077..8e11304 100644 --- a/target-alpha/helper.h +++ b/target-alpha/helper.h @@ -77,10 +77,6 @@ DEF_HELPER_FLAGS_2(cmpgeq, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) DEF_HELPER_FLAGS_2(cmpgle, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) DEF_HELPER_FLAGS_2(cmpglt, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) -DEF_HELPER_FLAGS_2(cpys, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) -DEF_HELPER_FLAGS_2(cpysn, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) -DEF_HELPER_FLAGS_2(cpyse, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) - DEF_HELPER_FLAGS_1(cvtts, TCG_CALL_CONST, i64, i64) DEF_HELPER_FLAGS_1(cvtst, TCG_CALL_CONST, i64, i64) DEF_HELPER_FLAGS_1(cvtqs, TCG_CALL_CONST, i64, i64) diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c index 4d2c2ee..2419dc4 100644 --- a/target-alpha/op_helper.c +++ b/target-alpha/op_helper.c @@ -921,24 +921,6 @@ uint64_t helper_sqrtt (uint64_t a) return float64_to_t(fr); } - -/* Sign copy */ -uint64_t helper_cpys(uint64_t a, uint64_t b) -{ -return (a 0x8000ULL) | (b ~0x8000ULL); -} - -uint64_t helper_cpysn(uint64_t a, uint64_t b) -{ -return ((~a) 0x8000ULL) | (b ~0x8000ULL); -} - -uint64_t helper_cpyse(uint64_t a, uint64_t b) -{ -return (a 0xFFF0ULL) | (b ~0xFFF0ULL); -} - - /* Comparisons */ uint64_t helper_cmptun (uint64_t a, uint64_t b) { diff --git a/target-alpha/translate.c b/target-alpha/translate.c index 719b423..b677378 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -741,6 +741,80 @@ static inline void glue(gen_f, name)(DisasContext *ctx, \ IEEE_INTCVT(cvtqs) IEEE_INTCVT(cvtqt) +static void gen_cpys_internal(int ra, int rb, int rc, int inv_a, uint64_t mask) +{ +TCGv va, vb, vmask; +int za = 0, zb = 0; + +if (unlikely(rc == 31)) { +return; +} + +vmask = tcg_const_i64(mask); + +TCGV_UNUSED_I64(va); +if (ra == 31) { +if (inv_a) { +va = vmask; +} else { +za = 1; +} +} else { +va = tcg_temp_new_i64(); +tcg_gen_mov_i64(va, cpu_fir[ra]); +if (inv_a) { +tcg_gen_not_i64(va, va); +} +tcg_gen_and_i64(va, va, vmask); +} + +TCGV_UNUSED_I64(vb); +if (rb == 31) { +zb = 1; +} else { +vb = tcg_temp_new_i64(); +tcg_gen_andc_i64(vb, cpu_fir[rb], vmask); +} + +switch (za * 2 + zb) { +case 0: +tcg_gen_or_i64(cpu_fir[rc], va, vb); +break; +case 1: +tcg_gen_mov_i64(cpu_fir[rc], va); +break; +case 2: +tcg_gen_mov_i64(cpu_fir[rc], vb); +break; +case 3: +tcg_gen_movi_i64(cpu_fir[rc], 0); +break; +} + +tcg_temp_free(vmask); +if (ra != 31) { +tcg_temp_free(va); +} +if (rb != 31) { +tcg_temp_free(vb); +} +} + +static inline void gen_fcpys(int ra, int rb, int rc) +{ +gen_cpys_internal(ra, rb, rc, 0, 0x8000ULL); +} + +static inline void gen_fcpysn(int ra, int rb, int rc) +{ +gen_cpys_internal(ra, rb, rc, 1, 0x8000ULL); +} + +static inline void gen_fcpyse(int ra, int rb, int rc) +{ +gen_cpys_internal(ra, rb, rc, 0, 0xFFF0ULL); +} + #define FARITH3(name) \ static inline void glue(gen_f, name)(int ra, int rb, int rc)\ { \ @@ -769,10 +843,6 @@ static inline void glue(gen_f, name)(int ra, int rb, int rc)\ tcg_temp_free(vb); \ } \ } -/* ??? Ought to expand these inline; simple masking operations. */ -FARITH3(cpys) -FARITH3(cpysn) -FARITH3(cpyse) /* ??? VAX instruction qualifiers ignored. */ FARITH3(addf) -- 1.6.6.1
[Qemu-devel] [PATCH 4/5] target-alpha: Implement cvtql inline.
It's a simple mask and shift sequence. Also, fix a typo in the actual masks used. Signed-off-by: Richard Henderson r...@twiddle.net --- target-alpha/helper.h|4 target-alpha/op_helper.c | 20 target-alpha/translate.c | 45 +++-- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/target-alpha/helper.h b/target-alpha/helper.h index c378195..10c78d0 100644 --- a/target-alpha/helper.h +++ b/target-alpha/helper.h @@ -89,10 +89,6 @@ DEF_HELPER_FLAGS_1(cvttq, TCG_CALL_CONST, i64, i64) DEF_HELPER_FLAGS_1(cvttq_c, TCG_CALL_CONST, i64, i64) DEF_HELPER_FLAGS_1(cvttq_svic, TCG_CALL_CONST, i64, i64) -DEF_HELPER_FLAGS_1(cvtql, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) -DEF_HELPER_1(cvtql_v, i64, i64) -DEF_HELPER_1(cvtql_sv, i64, i64) - DEF_HELPER_FLAGS_1(setroundmode, TCG_CALL_CONST, void, i32) DEF_HELPER_FLAGS_1(setflushzero, TCG_CALL_CONST, void, i32) DEF_HELPER_FLAGS_0(fp_exc_clear, TCG_CALL_CONST, void) diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c index 84867b8..f9cd07a 100644 --- a/target-alpha/op_helper.c +++ b/target-alpha/op_helper.c @@ -1159,26 +1159,6 @@ uint64_t helper_cvtlq (uint64_t a) return (lo 0x3FFF) | (hi 0xc000); } -uint64_t helper_cvtql (uint64_t a) -{ -return ((a 0xC000) 32) | ((a 0x7FFF) 29); -} - -uint64_t helper_cvtql_v (uint64_t a) -{ -if ((int32_t)a != (int64_t)a) -helper_excp(EXCP_ARITH, EXC_M_IOV); -return helper_cvtql(a); -} - -uint64_t helper_cvtql_sv (uint64_t a) -{ -/* ??? I'm pretty sure there's nothing that /sv needs to do that /v - doesn't do. The only thing I can think is that /sv is a valid - instruction merely for completeness in the ISA. */ -return helper_cvtql_v(a); -} - /* PALcode support special instructions */ #if !defined (CONFIG_USER_ONLY) void helper_hw_rei (void) diff --git a/target-alpha/translate.c b/target-alpha/translate.c index 188e76c..cfdf441 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -597,6 +597,41 @@ static inline void gen_fp_exc_raise(int rc, int fn11) gen_fp_exc_raise_ignore(rc, fn11, fn11 QUAL_I ? 0 : float_flag_inexact); } +static void gen_fcvtql(int rb, int rc) +{ +if (unlikely(rc == 31)) { +return; +} +if (unlikely(rb == 31)) { +tcg_gen_movi_i64(cpu_fir[rc], 0); +} else { +TCGv tmp = tcg_temp_new(); + +tcg_gen_andi_i64(tmp, cpu_fir[rb], 0xC000); +tcg_gen_andi_i64(cpu_fir[rc], cpu_fir[rb], 0x3FFF); +tcg_gen_shli_i64(tmp, tmp, 32); +tcg_gen_shli_i64(cpu_fir[rc], cpu_fir[rc], 29); +tcg_gen_or_i64(cpu_fir[rc], cpu_fir[rc], tmp); + +tcg_temp_free(tmp); +} +} + +static void gen_fcvtql_v(DisasContext *ctx, int rb, int rc) +{ +if (rb != 31) { +int lab = gen_new_label(); +TCGv tmp = tcg_temp_new(); + +tcg_gen_ext_i32_i64(tmp, cpu_fir[rb]); +tcg_gen_brcond_i64(TCG_COND_EQ, tmp, cpu_fir[rb], lab); +gen_excp(ctx, EXCP_ARITH, EXC_M_IOV); + +gen_set_label(lab); +} +gen_fcvtql(rb, rc); +} + #define FARITH2(name) \ static inline void glue(gen_f, name)(int rb, int rc)\ { \ @@ -612,9 +647,6 @@ static inline void glue(gen_f, name)(int rb, int rc)\ } \ } FARITH2(cvtlq) -FARITH2(cvtql) -FARITH2(cvtql_v) -FARITH2(cvtql_sv) /* ??? VAX instruction qualifiers ignored. */ FARITH2(sqrtf) @@ -2327,11 +2359,12 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) break; case 0x130: /* CVTQL/V */ -gen_fcvtql_v(rb, rc); -break; case 0x530: /* CVTQL/SV */ -gen_fcvtql_sv(rb, rc); +/* ??? I'm pretty sure there's nothing that /sv needs to do that + /v doesn't do. The only thing I can think is that /sv is a + valid instruction merely for completeness in the ISA. */ +gen_fcvtql_v(ctx, rb, rc); break; default: goto invalid_opc; -- 1.6.6.1
[Qemu-devel] [PATCH 3/5] target-alpha: Implement rs/rc properly.
This is a per-cpu flag; there's no need for a spinlock of any kind. We were also failing to manipulate the flag with $31 as a target reg and failing to clear the flag on execution of a return-from-interrupt instruction. Signed-off-by: Richard Henderson r...@twiddle.net --- linux-user/main.c|5 + target-alpha/helper.h|2 -- target-alpha/op_helper.c | 28 ++-- target-alpha/translate.c | 19 +++ 4 files changed, 22 insertions(+), 32 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index 4614e3c..d4a29cb 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -2356,6 +2356,11 @@ void cpu_loop (CPUState *env) while (1) { trapnr = cpu_alpha_exec (env); + /* All of the traps imply a transition through PALcode, which + implies an REI instruction has been executed. Which means + that the intr_flag should be cleared. */ + env-intr_flag = 0; + switch (trapnr) { case EXCP_RESET: fprintf(stderr, Reset requested. Exit\n); diff --git a/target-alpha/helper.h b/target-alpha/helper.h index 8e11304..c378195 100644 --- a/target-alpha/helper.h +++ b/target-alpha/helper.h @@ -2,8 +2,6 @@ DEF_HELPER_2(excp, void, int, int) DEF_HELPER_FLAGS_0(load_pcc, TCG_CALL_CONST | TCG_CALL_PURE, i64) -DEF_HELPER_FLAGS_0(rc, TCG_CALL_CONST, i64) -DEF_HELPER_FLAGS_0(rs, TCG_CALL_CONST, i64) DEF_HELPER_2(addqv, i64, i64, i64) DEF_HELPER_2(addlv, i64, i64, i64) diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c index 2419dc4..84867b8 100644 --- a/target-alpha/op_helper.c +++ b/target-alpha/op_helper.c @@ -47,32 +47,6 @@ void helper_store_fpcr (uint64_t val) cpu_alpha_store_fpcr (env, val); } -static spinlock_t intr_cpu_lock = SPIN_LOCK_UNLOCKED; - -uint64_t helper_rs(void) -{ -uint64_t tmp; - -spin_lock(intr_cpu_lock); -tmp = env-intr_flag; -env-intr_flag = 1; -spin_unlock(intr_cpu_lock); - -return tmp; -} - -uint64_t helper_rc(void) -{ -uint64_t tmp; - -spin_lock(intr_cpu_lock); -tmp = env-intr_flag; -env-intr_flag = 0; -spin_unlock(intr_cpu_lock); - -return tmp; -} - uint64_t helper_addqv (uint64_t op1, uint64_t op2) { uint64_t tmp = op1; @@ -1211,6 +1185,7 @@ void helper_hw_rei (void) { env-pc = env-ipr[IPR_EXC_ADDR] ~3; env-ipr[IPR_EXC_ADDR] = env-ipr[IPR_EXC_ADDR] 1; +env-intr_flag = 0; /* XXX: re-enable interrupts and memory mapping */ } @@ -1218,6 +1193,7 @@ void helper_hw_ret (uint64_t a) { env-pc = a ~3; env-ipr[IPR_EXC_ADDR] = a 1; +env-intr_flag = 0; /* XXX: re-enable interrupts and memory mapping */ } diff --git a/target-alpha/translate.c b/target-alpha/translate.c index b677378..188e76c 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -1266,6 +1266,19 @@ static inline void gen_cmp(TCGCond cond, int ra, int rb, int rc, int islit, gen_set_label(l2); } +static void gen_rx(int ra, int set) +{ +TCGv_i32 tmp; + +if (ra != 31) { +tcg_gen_ld8u_i64(cpu_ir[ra], cpu_env, offsetof(CPUState, intr_flag)); +} + +tmp = tcg_const_i32(set); +tcg_gen_st8_i32(tmp, cpu_env, offsetof(CPUState, intr_flag)); +tcg_temp_free_i32(tmp); +} + static inline int translate_one(DisasContext *ctx, uint32_t insn) { uint32_t palcode; @@ -2359,16 +2372,14 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn) break; case 0xE000: /* RC */ -if (ra != 31) -gen_helper_rc(cpu_ir[ra]); +gen_rx(ra, 0); break; case 0xE800: /* ECB */ break; case 0xF000: /* RS */ -if (ra != 31) -gen_helper_rs(cpu_ir[ra]); +gen_rx(ra, 1); break; case 0xF800: /* WH64 */ -- 1.6.6.1
[Qemu-devel] [PATCH 5/5] target-alpha: Implement cvtlq inline.
It's a simple shift and mask sequence. Signed-off-by: Richard Henderson r...@twiddle.net --- target-alpha/helper.h|1 - target-alpha/op_helper.c |7 --- target-alpha/translate.c | 21 - 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/target-alpha/helper.h b/target-alpha/helper.h index 10c78d0..ccf6a2a 100644 --- a/target-alpha/helper.h +++ b/target-alpha/helper.h @@ -83,7 +83,6 @@ DEF_HELPER_FLAGS_1(cvtqf, TCG_CALL_CONST, i64, i64) DEF_HELPER_FLAGS_1(cvtgf, TCG_CALL_CONST, i64, i64) DEF_HELPER_FLAGS_1(cvtgq, TCG_CALL_CONST, i64, i64) DEF_HELPER_FLAGS_1(cvtqg, TCG_CALL_CONST, i64, i64) -DEF_HELPER_FLAGS_1(cvtlq, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) DEF_HELPER_FLAGS_1(cvttq, TCG_CALL_CONST, i64, i64) DEF_HELPER_FLAGS_1(cvttq_c, TCG_CALL_CONST, i64, i64) diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c index f9cd07a..a209130 100644 --- a/target-alpha/op_helper.c +++ b/target-alpha/op_helper.c @@ -1152,13 +1152,6 @@ uint64_t helper_cvtqg (uint64_t a) return float64_to_g(fr); } -uint64_t helper_cvtlq (uint64_t a) -{ -int32_t lo = a 29; -int32_t hi = a 32; -return (lo 0x3FFF) | (hi 0xc000); -} - /* PALcode support special instructions */ #if !defined (CONFIG_USER_ONLY) void helper_hw_rei (void) diff --git a/target-alpha/translate.c b/target-alpha/translate.c index cfdf441..c52cac3 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -597,6 +597,26 @@ static inline void gen_fp_exc_raise(int rc, int fn11) gen_fp_exc_raise_ignore(rc, fn11, fn11 QUAL_I ? 0 : float_flag_inexact); } +static void gen_fcvtlq(int rb, int rc) +{ +if (unlikely(rc == 31)) { +return; +} +if (unlikely(rb == 31)) { +tcg_gen_movi_i64(cpu_fir[rc], 0); +} else { +TCGv tmp = tcg_temp_new(); + +tcg_gen_shri_i64(tmp, cpu_fir[rb], 32); +tcg_gen_shri_i64(cpu_fir[rc], cpu_fir[rb], 29); +tcg_gen_andi_i64(tmp, tmp, 0xc000); +tcg_gen_andi_i64(cpu_fir[rc], cpu_fir[rc], 0x3FFF); +tcg_gen_or_i64(cpu_fir[rc], cpu_fir[rc], tmp); + +tcg_temp_free(tmp); +} +} + static void gen_fcvtql(int rb, int rc) { if (unlikely(rc == 31)) { @@ -646,7 +666,6 @@ static inline void glue(gen_f, name)(int rb, int rc)\ tcg_temp_free(tmp); \ } \ } -FARITH2(cvtlq) /* ??? VAX instruction qualifiers ignored. */ FARITH2(sqrtf) -- 1.6.6.1
[Qemu-devel] [PATCH 0/4] tcg-hppa get it working, v2.1
Gah. Left out --thread and -s options to format-patch. Sorry about that. r~ Richard Henderson (4): tcg-hppa: Fix const errors in hppa-dis.c. tcg-hppa: Fix 64-bit argument ordering. tcg-hppa: Finish the port. tcg-hppa: Compute is_write in cpu_signal_handler. configure |5 +- cpu-exec.c| 38 +- hppa-dis.c|4 +- tcg/hppa/tcg-target.c | 1846 +++-- tcg/hppa/tcg-target.h | 142 +--- tcg/tcg.c | 12 +- 6 files changed, 1387 insertions(+), 660 deletions(-)
[Qemu-devel] [PATCH 1/4] tcg-hppa: Fix const errors in hppa-dis.c.
Signed-off-by: Richard Henderson r...@twiddle.net --- hppa-dis.c |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hppa-dis.c b/hppa-dis.c index 9d96d72..49f99c8 100644 --- a/hppa-dis.c +++ b/hppa-dis.c @@ -576,7 +576,7 @@ struct pa_opcode const char *name; unsigned long int match; /* Bits that must be set... */ unsigned long int mask;/* ... in these bits. */ -char *args; +const char *args; enum pa_arch arch; char flags; }; @@ -2753,7 +2753,7 @@ print_insn_hppa (bfd_vma memaddr, disassemble_info *info) int sf = GET_FIELD (insn, 19, 20); const char * const * source = float_format_names; const char * const * dest = float_format_names; - char *t = ; + const char *t = ; if (sub == 4) { -- 1.6.6.1
[Qemu-devel] [PATCH 2/4] tcg-hppa: Fix 64-bit argument ordering.
Signed-off-by: Richard Henderson r...@twiddle.net --- tcg/tcg.c | 12 +++- 1 files changed, 11 insertions(+), 1 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 1818868..d753149 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -596,7 +596,17 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags, real_args++; } #endif -#ifdef TCG_TARGET_WORDS_BIGENDIAN + /* If stack grows up, then we will be placing successive + arguments at lower addresses, which means we need to + reverse the order compared to how we would normally + treat either big or little-endian. For those arguments + that will wind up in registers, this still works for + HPPA (the only current STACK_GROWSUP target) since the + argument registers are *also* allocated in decreasing + order. If another such target is added, this logic may + have to get more complicated to differentiate between + stack arguments and register arguments. */ +#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP) *gen_opparam_ptr++ = args[i] + 1; *gen_opparam_ptr++ = args[i]; #else -- 1.6.6.1
[Qemu-devel] [PATCH 4/4] tcg-hppa: Compute is_write in cpu_signal_handler.
Signed-off-by: Richard Henderson r...@twiddle.net --- cpu-exec.c | 38 +++--- 1 files changed, 31 insertions(+), 7 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index bcfcda2..14204f4 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -1193,15 +1193,39 @@ int cpu_signal_handler(int host_signum, void *pinfo, { struct siginfo *info = pinfo; struct ucontext *uc = puc; -unsigned long pc; -int is_write; +unsigned long pc = uc-uc_mcontext.sc_iaoq[0]; +uint32_t insn = *(uint32_t *)pc; +int is_write = 0; + +/* XXX: need kernel patch to get write flag faster. */ +switch (insn 26) { +case 0x1a: /* STW */ +case 0x19: /* STH */ +case 0x18: /* STB */ +case 0x1b: /* STWM */ +is_write = 1; +break; + +case 0x09: /* CSTWX, FSTWX, FSTWS */ +case 0x0b: /* CSTDX, FSTDX, FSTDS */ +/* Distinguish from coprocessor load ... */ +is_write = (insn 9) 1; +break; + +case 0x03: +switch ((insn 6) 15) { +case 0xa: /* STWS */ +case 0x9: /* STHS */ +case 0x8: /* STBS */ +case 0xe: /* STWAS */ +case 0xc: /* STBYS */ +is_write = 1; +} +break; +} -pc = uc-uc_mcontext.sc_iaoq[0]; -/* FIXME: compute is_write */ -is_write = 0; return handle_cpu_signal(pc, (unsigned long)info-si_addr, - is_write, - uc-uc_sigmask, puc); + is_write, uc-uc_sigmask, puc); } #else -- 1.6.6.1
[Qemu-devel] [PATCH QEMU] Transparent Hugepage Support #2
From: Andrea Arcangeli aarca...@redhat.com This will allow proper alignment so NPT/EPT can take advantage of linux host backing the guest memory with hugepages. It also ensures that when KVM isn't used the first 2M of guest physical memory are backed by a large TLB. To complete it, it will also notify the kernel that this memory is important to be backed by hugepages with madvise (needed for both KVM and QEMU) so that hugepages can also be used in embedded systems without any memory waste and in the future it will allow khugepaged to prioritize the collapse of hugepages into the madvise regions. Ideally the max hugepage size provided by the transparent hugepage support in the kernel should be exported by some sysfs file, but there is no reason to expect x86_64 host to have hugepages larger than 2M or to expect those to be supported by the kernel transparent hugepage support in the short and medium term, so we can defer the invention of a fixed kernel API until this happens, by that time we'll surely have a better clue of what's the best way to provide that information to userland and it'll be a few liner change to adapt qemu to use it so there's no hurry to do it right now. Plus the below will keep to remain optimal and there is no risk of memory waste as virtual memory is practically zero cost on 64bit archs. NOTE: if the callers of qemu_ram_alloc changes significantly we may later be required to pass a second parameter to qemu_ram_alloc that will tell it what is the first guest physical address that corresponds to the sized memory block being allocated. I'd defer this change for later too as it may never be needed. I verified this is more than enough to get the max benefit from the kernel side feature. cat /sys/kernel/debug/kvm/largepages 301 Signed-off-by: Andrea Arcangeli aarca...@redhat.com --- diff --git a/exec.c b/exec.c index b0b6056..9552366 100644 --- a/exec.c +++ b/exec.c @@ -2733,11 +2733,30 @@ ram_addr_t qemu_ram_alloc(ram_addr_t size) PROT_EXEC|PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); #else -new_block-host = qemu_vmalloc(size); +#if defined(__linux__) defined(__x86_64__) +#define MAX_TRANSPARENT_HUGEPAGE_SIZE (2*1024*1024) + if (size = MAX_TRANSPARENT_HUGEPAGE_SIZE) + /* +* Align on the max transparent hugepage size so that +* (gfn ^ pfn) (HPAGE_SIZE-1) == 0 to allow KVM to +* take advantage of hugepages with NPT/EPP or to +* ensure the first 2M of the guest physical ram will +* be mapped by the same hugetlb for QEMU (it is worth +* it even without NPT/EPT). +*/ + new_block-host = qemu_memalign(MAX_TRANSPARENT_HUGEPAGE_SIZE, + size); + else +#undef MAX_TRANSPARENT_HUGEPAGE_SIZE +#endif + new_block-host = qemu_vmalloc(size); #endif #ifdef MADV_MERGEABLE madvise(new_block-host, size, MADV_MERGEABLE); #endif +#ifdef MADV_HUGEPAGE +madvise(new_block-host, size, MADV_HUGEPAGE); +#endif } new_block-offset = last_ram_offset; new_block-length = size;
[Qemu-devel] Re: [PATCH] pcnet: make subsystem vendor id match hardware
Michael S. Tsirkin wrote: Real pcnet device (AT2450) apparently has subsystem device and vendor id set to 0, this is out of spec (which requires that vendor id is obtained from PCI SIG) but windows xp driver seems to need this in order to associate. qemu sets pci subsystem id to qumranet/qemu since d350d97d196a632b6c7493acf07a061017fc6f7d, debian does not yet have this patch. https://bugzilla.redhat.com/show_bug.cgi?id=521247 Signed-off-by: Michael S. Tsirkin m...@redhat.com Cc: Gerd Hoffmann kra...@redhat.com Cc: Anthony Liguori aligu...@us.ibm.com --- hw/pcnet.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/hw/pcnet.c b/hw/pcnet.c index 44b5b31..12260be 100644 --- a/hw/pcnet.c +++ b/hw/pcnet.c @@ -1997,6 +1997,9 @@ static int pci_pcnet_init(PCIDevice *pci_dev) pci_set_long(pci_conf + PCI_BASE_ADDRESS_0 + 4, PCI_BASE_ADDRESS_SPACE_MEMORY); +pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, 0x0); +pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, 0x0); + /* TODO: value must be 0 at RST# */ pci_conf[PCI_INTERRUPT_PIN] = 1; // interrupt pin 0 pci_conf[PCI_MIN_GNT] = 0x06; No concerns from my side, still works here. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux
Re: [Qemu-devel] [PATCH QEMU] Transparent Hugepage Support #2
Andrea Arcangeli wrote: + * take advantage of hugepages with NPT/EPP or to Spelling: NPT/EPT? -- Jamie
[Qemu-devel] [PATCHv5 00/11] vhost-net: upstream integration
Here's a patchset with vhost support for upstream qemu, rebased to latest bits, and with all comments I'm aware of addressed. Please consider for merging. Anthony, if you are still deliberating some issues, maybe the series can be merged partially? This will at least reduce the amount of noise from reposting the large patchset. Changes from v4: address amit's style comments: mostly renaming for clarity Changes from v3: vhost: vhost net support: use typedef instead of struct name virtio: add set_status callback: fix up non-PCI bindings Changes from v2: Addressed style comments Detect mapping changes and abort Unmap ring on cleanup Changes from v1: Addressed style comments Migration fixes. Gracefully fail with non-tap backends. Michael S. Tsirkin (11): tap: add interface to get device fd kvm: add API to set ioeventfd notifier: event notifier implementation virtio: notifier support + APIs for queue fields virtio: add set_status callback virtio: move typedef to qemu-common virtio-pci: fill in notifier support vhost: vhost net support tap: add vhost/vhostfd options tap: add API to retrieve vhost net header virtio-net: vhost net support Makefile.target |3 + configure| 36 +++ hw/event_notifier.c | 62 + hw/event_notifier.h | 16 ++ hw/s390-virtio-bus.c |2 +- hw/syborg_virtio.c |2 +- hw/vhost.c | 706 ++ hw/vhost.h | 48 hw/vhost_net.c | 198 ++ hw/vhost_net.h | 19 ++ hw/virtio-net.c | 71 +- hw/virtio-pci.c | 68 +- hw/virtio.c | 80 ++- hw/virtio.h | 28 ++- kvm-all.c| 22 ++ kvm.h| 16 ++ net.c|8 + net/tap.c| 43 +++ net/tap.h|5 + qemu-common.h|2 + qemu-options.hx |4 +- 21 files changed, 1429 insertions(+), 10 deletions(-) create mode 100644 hw/event_notifier.c create mode 100644 hw/event_notifier.h create mode 100644 hw/vhost.c create mode 100644 hw/vhost.h create mode 100644 hw/vhost_net.c create mode 100644 hw/vhost_net.h
[Qemu-devel] [PATCHv5 01/11] tap: add interface to get device fd
Will be used by vhost to attach/detach to backend. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- net/tap.c |7 +++ net/tap.h |2 ++ 2 files changed, 9 insertions(+), 0 deletions(-) diff --git a/net/tap.c b/net/tap.c index 7a7320c..fc59fd4 100644 --- a/net/tap.c +++ b/net/tap.c @@ -269,6 +269,13 @@ static void tap_poll(VLANClientState *nc, bool enable) tap_write_poll(s, enable); } +int tap_get_fd(VLANClientState *nc) +{ +TAPState *s = DO_UPCAST(TAPState, nc, nc); +assert(nc-info-type == NET_CLIENT_TYPE_TAP); +return s-fd; +} + /* fd support */ static NetClientInfo net_tap_info = { diff --git a/net/tap.h b/net/tap.h index 538a562..a244b28 100644 --- a/net/tap.h +++ b/net/tap.h @@ -48,4 +48,6 @@ int tap_probe_vnet_hdr(int fd); int tap_probe_has_ufo(int fd); void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo); +int tap_get_fd(VLANClientState *vc); + #endif /* QEMU_NET_TAP_H */ -- 1.7.0.18.g0d53a5
[Qemu-devel] [PATCHv5 03/11] notifier: event notifier implementation
event notifiers are slightly generalized eventfd descriptors. Current implementation depends on eventfd because vhost is the only user, and vhost depends on eventfd anyway, but a stub is provided for non-eventfd case. We'll be able to further generalize this when another user comes along and we see how to best do this. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- Makefile.target |1 + hw/event_notifier.c | 62 +++ hw/event_notifier.h | 16 + qemu-common.h |1 + 4 files changed, 80 insertions(+), 0 deletions(-) create mode 100644 hw/event_notifier.c create mode 100644 hw/event_notifier.h diff --git a/Makefile.target b/Makefile.target index ab3c438..004a703 100644 --- a/Makefile.target +++ b/Makefile.target @@ -175,6 +175,7 @@ obj-y = vl.o async.o monitor.o pci.o pci_host.o pcie_host.o machine.o gdbstub.o # virtio has to be here due to weird dependency between PCI and virtio-net. # need to fix this properly obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-pci.o virtio-serial-bus.o +obj-y += event_notifier.o obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_ISA_MMIO) += isa_mmio.o diff --git a/hw/event_notifier.c b/hw/event_notifier.c new file mode 100644 index 000..13f3656 --- /dev/null +++ b/hw/event_notifier.c @@ -0,0 +1,62 @@ +/* + * event notifier support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin m...@redhat.com + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include hw.h +#include event_notifier.h +#ifdef CONFIG_EVENTFD +#include sys/eventfd.h +#endif + +int event_notifier_init(EventNotifier *e, int active) +{ +#ifdef CONFIG_EVENTFD +int fd = eventfd(!!active, EFD_NONBLOCK | EFD_CLOEXEC); +if (fd 0) +return -errno; +e-fd = fd; +return 0; +#else +return -ENOSYS; +#endif +} + +void event_notifier_cleanup(EventNotifier *e) +{ +close(e-fd); +} + +int event_notifier_get_fd(EventNotifier *e) +{ +return e-fd; +} + +int event_notifier_test_and_clear(EventNotifier *e) +{ +uint64_t value; +int r = read(e-fd, value, sizeof(value)); +return r == sizeof(value); +} + +int event_notifier_test(EventNotifier *e) +{ +uint64_t value; +int r = read(e-fd, value, sizeof(value)); +if (r == sizeof(value)) { +/* restore previous value. */ +int s = write(e-fd, value, sizeof(value)); +/* never blocks because we use EFD_SEMAPHORE. + * If we didn't we'd get EAGAIN on overflow + * and we'd have to write code to ignore it. */ +assert(s == sizeof(value)); +} +return r == sizeof(value); +} diff --git a/hw/event_notifier.h b/hw/event_notifier.h new file mode 100644 index 000..24117ea --- /dev/null +++ b/hw/event_notifier.h @@ -0,0 +1,16 @@ +#ifndef QEMU_EVENT_NOTIFIER_H +#define QEMU_EVENT_NOTIFIER_H + +#include qemu-common.h + +struct EventNotifier { + int fd; +}; + +int event_notifier_init(EventNotifier *, int active); +void event_notifier_cleanup(EventNotifier *); +int event_notifier_get_fd(EventNotifier *); +int event_notifier_test_and_clear(EventNotifier *); +int event_notifier_test(EventNotifier *); + +#endif diff --git a/qemu-common.h b/qemu-common.h index 805be1a..f12a8f5 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -227,6 +227,7 @@ typedef struct uWireSlave uWireSlave; typedef struct I2SCodec I2SCodec; typedef struct DeviceState DeviceState; typedef struct SSIBus SSIBus; +typedef struct EventNotifier EventNotifier; typedef uint64_t pcibus_t; -- 1.7.0.18.g0d53a5
[Qemu-devel] [PATCHv5 04/11] virtio: notifier support + APIs for queue fields
vhost needs physical addresses for ring and other queue fields, so add APIs for these. In particular, add binding API to set host/guest notifiers. Will be used by vhost. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- hw/virtio.c | 80 ++- hw/virtio.h | 18 - 2 files changed, 96 insertions(+), 2 deletions(-) diff --git a/hw/virtio.c b/hw/virtio.c index 7c020a3..f54129f 100644 --- a/hw/virtio.c +++ b/hw/virtio.c @@ -73,6 +73,9 @@ struct VirtQueue int inuse; uint16_t vector; void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); +VirtIODevice *vdev; +EventNotifier guest_notifier; +EventNotifier host_notifier; }; /* virt queue functions */ @@ -592,6 +595,12 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, return vdev-vq[i]; } +void virtio_irq(VirtQueue *vq) +{ +vq-vdev-isr |= 0x01; +virtio_notify_vector(vq-vdev, vq-vector); +} + void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) { /* Always notify when queue is empty (when feature acknowledge) */ @@ -714,8 +723,10 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id, vdev-queue_sel = 0; vdev-config_vector = VIRTIO_NO_VECTOR; vdev-vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); -for(i = 0; i VIRTIO_PCI_QUEUE_MAX; i++) +for(i = 0; i VIRTIO_PCI_QUEUE_MAX; i++) { vdev-vq[i].vector = VIRTIO_NO_VECTOR; +vdev-vq[i].vdev = vdev; +} vdev-name = name; vdev-config_len = config_size; @@ -733,3 +744,70 @@ void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding, vdev-binding = binding; vdev-binding_opaque = opaque; } + +target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) +{ +return vdev-vq[n].vring.desc; +} + +target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) +{ +return vdev-vq[n].vring.avail; +} + +target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n) +{ +return vdev-vq[n].vring.used; +} + +target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n) +{ +return vdev-vq[n].vring.desc; +} + +target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n) +{ +return sizeof(VRingDesc) * vdev-vq[n].vring.num; +} + +target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n) +{ +return offsetof(VRingAvail, ring) + +sizeof(u_int64_t) * vdev-vq[n].vring.num; +} + +target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n) +{ +return offsetof(VRingUsed, ring) + +sizeof(VRingUsedElem) * vdev-vq[n].vring.num; +} + +target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n) +{ +return vdev-vq[n].vring.used - vdev-vq[n].vring.desc + + virtio_queue_get_used_size(vdev, n); +} + +uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) +{ +return vdev-vq[n].last_avail_idx; +} + +void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) +{ +vdev-vq[n].last_avail_idx = idx; +} + +VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) +{ +return vdev-vq + n; +} + +EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) +{ +return vq-guest_notifier; +} +EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) +{ +return vq-host_notifier; +} diff --git a/hw/virtio.h b/hw/virtio.h index 3baa2a3..a074a65 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -19,6 +19,7 @@ #include qdev.h #include sysemu.h #include block_int.h +#include event_notifier.h /* from Linux's linux/virtio_config.h */ @@ -89,6 +90,8 @@ typedef struct { int (*load_config)(void * opaque, QEMUFile *f); int (*load_queue)(void * opaque, int n, QEMUFile *f); unsigned (*get_features)(void * opaque); +int (*guest_notifier)(void * opaque, int n, bool assigned); +int (*host_notifier)(void * opaque, int n, bool assigned); } VirtIOBindings; #define VIRTIO_PCI_QUEUE_MAX 64 @@ -181,5 +184,18 @@ void virtio_net_exit(VirtIODevice *vdev); DEFINE_PROP_BIT(indirect_desc, _state, _field, \ VIRTIO_RING_F_INDIRECT_DESC, true) - +target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); +target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n); +target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n); +target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n); +target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n); +target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n); +target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n); +target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n); +uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n); +void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t
[Qemu-devel] [PATCHv5 05/11] virtio: add set_status callback
vhost net backend needs to be notified when frontend status changes. Add a callback, similar to set_features. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- hw/s390-virtio-bus.c |2 +- hw/syborg_virtio.c |2 +- hw/virtio-pci.c |5 +++-- hw/virtio.h |9 + 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c index 9fc01e9..3efbaab 100644 --- a/hw/s390-virtio-bus.c +++ b/hw/s390-virtio-bus.c @@ -242,7 +242,7 @@ void s390_virtio_device_update_status(VirtIOS390Device *dev) VirtIODevice *vdev = dev-vdev; uint32_t features; -vdev-status = ldub_phys(dev-dev_offs + VIRTIO_DEV_OFFS_STATUS); +virtio_set_status(vdev, ldub_phys(dev-dev_offs + VIRTIO_DEV_OFFS_STATUS)); /* Update guest supported feature bitmap */ diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c index 65239a0..abf0370 100644 --- a/hw/syborg_virtio.c +++ b/hw/syborg_virtio.c @@ -149,7 +149,7 @@ static void syborg_virtio_writel(void *opaque, target_phys_addr_t offset, virtio_queue_notify(vdev, value); break; case SYBORG_VIRTIO_STATUS: -vdev-status = value 0xFF; +virtio_set_status(vdev, value 0xFF); if (vdev-status == 0) virtio_reset(vdev); break; diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 799f664..ee67a8a 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -206,7 +206,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) virtio_queue_notify(vdev, val); break; case VIRTIO_PCI_STATUS: -vdev-status = val 0xFF; +virtio_set_status(vdev, val 0xFF); if (vdev-status == 0) { virtio_reset(proxy-vdev); msix_unuse_all_vectors(proxy-pci_dev); @@ -377,7 +377,8 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, if (PCI_COMMAND == address) { if (!(val PCI_COMMAND_MASTER)) { -proxy-vdev-status = ~VIRTIO_CONFIG_S_DRIVER_OK; +virtio_set_status(proxy-vdev, + proxy-vdev-status ~VIRTIO_CONFIG_S_DRIVER_OK); } } diff --git a/hw/virtio.h b/hw/virtio.h index a074a65..5b07176 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -115,12 +115,21 @@ struct VirtIODevice void (*get_config)(VirtIODevice *vdev, uint8_t *config); void (*set_config)(VirtIODevice *vdev, const uint8_t *config); void (*reset)(VirtIODevice *vdev); +void (*set_status)(VirtIODevice *vdev, uint8_t val); VirtQueue *vq; const VirtIOBindings *binding; void *binding_opaque; uint16_t device_id; }; +static inline void virtio_set_status(VirtIODevice *vdev, uint8_t val) +{ +if (vdev-set_status) { +vdev-set_status(vdev, val); +} +vdev-status = val; +} + VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, void (*handle_output)(VirtIODevice *, VirtQueue *)); -- 1.7.0.18.g0d53a5
[Qemu-devel] [PATCHv5 06/11] virtio: move typedef to qemu-common
make it possible to use type without header include, simplifying header dependencies. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- hw/virtio.h |1 - qemu-common.h |1 + 2 files changed, 1 insertions(+), 1 deletions(-) diff --git a/hw/virtio.h b/hw/virtio.h index 5b07176..4a4131d 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -69,7 +69,6 @@ static inline target_phys_addr_t vring_align(target_phys_addr_t addr, } typedef struct VirtQueue VirtQueue; -typedef struct VirtIODevice VirtIODevice; #define VIRTQUEUE_MAX_SIZE 1024 diff --git a/qemu-common.h b/qemu-common.h index f12a8f5..90ca3b8 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -228,6 +228,7 @@ typedef struct I2SCodec I2SCodec; typedef struct DeviceState DeviceState; typedef struct SSIBus SSIBus; typedef struct EventNotifier EventNotifier; +typedef struct VirtIODevice VirtIODevice; typedef uint64_t pcibus_t; -- 1.7.0.18.g0d53a5
[Qemu-devel] [PATCHv5 07/11] virtio-pci: fill in notifier support
Support host/guest notifiers in virtio-pci. The last one only with kvm, that's okay because vhost relies on kvm anyway. Note on kvm usage: kvm ioeventfd API is implemented on non-kvm systems as well, this is the reason we don't need if (kvm_enabled()) around it. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- hw/virtio-pci.c | 63 +++ 1 files changed, 63 insertions(+), 0 deletions(-) diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index ee67a8a..a7e1bcb 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -24,6 +24,7 @@ #include net.h #include block_int.h #include loader.h +#include kvm.h /* from Linux's linux/virtio_pci.h */ @@ -392,6 +393,66 @@ static unsigned virtio_pci_get_features(void *opaque) return proxy-host_features; } +static void virtio_pci_guest_notifier_read(void *opaque) +{ +VirtQueue *vq = opaque; +EventNotifier *n = virtio_queue_get_guest_notifier(vq); +if (event_notifier_test_and_clear(n)) { +virtio_irq(vq); +} +} + +static int virtio_pci_guest_notifier(void *opaque, int n, bool assign) +{ +VirtIOPCIProxy *proxy = opaque; +VirtQueue *vq = virtio_get_queue(proxy-vdev, n); +EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); + +if (assign) { +int r = event_notifier_init(notifier, 0); +if (r 0) { +return r; +} +qemu_set_fd_handler(event_notifier_get_fd(notifier), +virtio_pci_guest_notifier_read, NULL, vq); +} else { +qemu_set_fd_handler(event_notifier_get_fd(notifier), +NULL, NULL, NULL); +event_notifier_cleanup(notifier); +} + +return 0; +} + +static int virtio_pci_host_notifier(void *opaque, int n, bool assign) +{ +VirtIOPCIProxy *proxy = opaque; +VirtQueue *vq = virtio_get_queue(proxy-vdev, n); +EventNotifier *notifier = virtio_queue_get_host_notifier(vq); +int r; +if (assign) { +r = event_notifier_init(notifier, 1); +if (r 0) { +return r; +} +r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier), + proxy-addr + VIRTIO_PCI_QUEUE_NOTIFY, + n, assign); +if (r 0) { +event_notifier_cleanup(notifier); +} +} else { +r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier), + proxy-addr + VIRTIO_PCI_QUEUE_NOTIFY, + n, assign); +if (r 0) { +return r; +} +event_notifier_cleanup(notifier); +} +return r; +} + static const VirtIOBindings virtio_pci_bindings = { .notify = virtio_pci_notify, .save_config = virtio_pci_save_config, @@ -399,6 +460,8 @@ static const VirtIOBindings virtio_pci_bindings = { .save_queue = virtio_pci_save_queue, .load_queue = virtio_pci_load_queue, .get_features = virtio_pci_get_features, +.host_notifier = virtio_pci_host_notifier, +.guest_notifier = virtio_pci_guest_notifier, }; static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev, -- 1.7.0.18.g0d53a5
[Qemu-devel] [PATCHv5 09/11] tap: add vhost/vhostfd options
This adds vhost binary option to tap, to enable vhost net accelerator. Default is off for now, we'll be able to make default on long term when we know it's stable. vhostfd option can be used by management, to pass in the fd. Assigning vhostfd implies vhost=on. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- net.c |8 net/tap.c | 29 + qemu-options.hx |4 +++- 3 files changed, 40 insertions(+), 1 deletions(-) diff --git a/net.c b/net.c index e47f727..48d9fb0 100644 --- a/net.c +++ b/net.c @@ -976,6 +976,14 @@ static const struct { .name = vnet_hdr, .type = QEMU_OPT_BOOL, .help = enable the IFF_VNET_HDR flag on the tap interface +}, { +.name = vhost, +.type = QEMU_OPT_BOOL, +.help = enable vhost-net network accelerator, +}, { +.name = vhostfd, +.type = QEMU_OPT_STRING, +.help = file descriptor of an already opened vhost net device, }, #endif /* _WIN32 */ { /* end of list */ } diff --git a/net/tap.c b/net/tap.c index fc59fd4..19c4fa2 100644 --- a/net/tap.c +++ b/net/tap.c @@ -41,6 +41,8 @@ #include net/tap-linux.h +#include hw/vhost_net.h + /* Maximum GSO packet size (64k) plus plenty of room for * the ethernet and virtio_net headers */ @@ -57,6 +59,7 @@ typedef struct TAPState { unsigned int has_vnet_hdr : 1; unsigned int using_vnet_hdr : 1; unsigned int has_ufo: 1; +VHostNetState *vhost_net; } TAPState; static int launch_script(const char *setup_script, const char *ifname, int fd); @@ -252,6 +255,10 @@ static void tap_cleanup(VLANClientState *nc) { TAPState *s = DO_UPCAST(TAPState, nc, nc); +if (s-vhost_net) { +vhost_net_cleanup(s-vhost_net); +} + qemu_purge_queued_packets(nc); if (s-down_script[0]) @@ -307,6 +314,7 @@ static TAPState *net_tap_fd_init(VLANState *vlan, s-has_ufo = tap_probe_has_ufo(s-fd); tap_set_offload(s-nc, 0, 0, 0, 0, 0); tap_read_poll(s, 1); +s-vhost_net = NULL; return s; } @@ -456,5 +464,26 @@ int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan } } +if (qemu_opt_get_bool(opts, vhost, !!qemu_opt_get(opts, vhostfd))) { +int vhostfd, r; +if (qemu_opt_get(opts, vhostfd)) { +r = net_handle_fd_param(mon, qemu_opt_get(opts, vhostfd)); +if (r == -1) { +return -1; +} +vhostfd = r; +} else { +vhostfd = -1; +} +s-vhost_net = vhost_net_init(s-nc, vhostfd); +if (!s-vhost_net) { +qemu_error(vhost-net requested but could not be initialized\n); +return -1; +} +} else if (qemu_opt_get(opts, vhostfd)) { +qemu_error(vhostfd= is not valid without vhost\n); +return -1; +} + return 0; } diff --git a/qemu-options.hx b/qemu-options.hx index fd50add..4d9f4da 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -895,7 +895,7 @@ DEF(net, HAS_ARG, QEMU_OPTION_net, -net tap[,vlan=n][,name=str],ifname=name\n connect the host TAP network interface to VLAN 'n'\n #else --net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off]\n +-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h]\n connect the host TAP network interface to VLAN 'n' and use the\n network scripts 'file' (default= DEFAULT_NETWORK_SCRIPT )\n and 'dfile' (default= DEFAULT_NETWORK_DOWN_SCRIPT )\n @@ -905,6 +905,8 @@ DEF(net, HAS_ARG, QEMU_OPTION_net, default of 'sndbuf=1048576' can be disabled using 'sndbuf=0')\n use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap flag\n use vnet_hdr=on to make the lack of IFF_VNET_HDR support an error condition\n +use vhost=on to enable experimental in kernel accelerator\n +use 'vhostfd=h' to connect to an already opened vhost net device\n #endif -net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n connect the vlan 'n' to another VLAN using a socket connection\n -- 1.7.0.18.g0d53a5
[Qemu-devel] [PATCHv5 10/11] tap: add API to retrieve vhost net header
will be used by virtio-net for vhost net support Signed-off-by: Michael S. Tsirkin m...@redhat.com --- net/tap.c |7 +++ net/tap.h |3 +++ 2 files changed, 10 insertions(+), 0 deletions(-) diff --git a/net/tap.c b/net/tap.c index 19c4fa2..35c05d7 100644 --- a/net/tap.c +++ b/net/tap.c @@ -487,3 +487,10 @@ int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan return 0; } + +VHostNetState *tap_get_vhost_net(VLANClientState *nc) +{ +TAPState *s = DO_UPCAST(TAPState, nc, nc); +assert(nc-info-type == NET_CLIENT_TYPE_TAP); +return s-vhost_net; +} diff --git a/net/tap.h b/net/tap.h index a244b28..b8cec83 100644 --- a/net/tap.h +++ b/net/tap.h @@ -50,4 +50,7 @@ void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo); int tap_get_fd(VLANClientState *vc); +struct vhost_net; +struct vhost_net *tap_get_vhost_net(VLANClientState *vc); + #endif /* QEMU_NET_TAP_H */ -- 1.7.0.18.g0d53a5
[Qemu-devel] [PATCHv5 11/11] virtio-net: vhost net support
This connects virtio-net to vhost net backend. The code is structured in a way analogous to what we have with vnet header capability in tap. We start/stop backend on driver start/stop as well as on save and vm start (for migration). Signed-off-by: Michael S. Tsirkin m...@redhat.com --- hw/virtio-net.c | 71 +- 1 files changed, 69 insertions(+), 2 deletions(-) diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 5c0093e..9ddd58c 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -17,6 +17,7 @@ #include net/tap.h #include qemu-timer.h #include virtio-net.h +#include vhost_net.h #define VIRTIO_NET_VM_VERSION11 @@ -47,6 +48,8 @@ typedef struct VirtIONet uint8_t nomulti; uint8_t nouni; uint8_t nobcast; +uint8_t vhost_started; +VMChangeStateEntry *vmstate; struct { int in_use; int first_multi; @@ -114,6 +117,10 @@ static void virtio_net_reset(VirtIODevice *vdev) n-nomulti = 0; n-nouni = 0; n-nobcast = 0; +if (n-vhost_started) { +vhost_net_stop(tap_get_vhost_net(n-nic-nc.peer), vdev); +n-vhost_started = 0; +} /* Flush any MAC and VLAN filter table state */ n-mac_table.in_use = 0; @@ -172,7 +179,14 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features) features = ~(0x1 VIRTIO_NET_F_HOST_UFO); } -return features; +if (!n-nic-nc.peer || +n-nic-nc.peer-info-type != NET_CLIENT_TYPE_TAP) { +return features; +} +if (!tap_get_vhost_net(n-nic-nc.peer)) { +return features; +} +return vhost_net_get_features(tap_get_vhost_net(n-nic-nc.peer), features); } static uint32_t virtio_net_bad_features(VirtIODevice *vdev) @@ -698,6 +712,12 @@ static void virtio_net_save(QEMUFile *f, void *opaque) { VirtIONet *n = opaque; +if (n-vhost_started) { +/* TODO: should we really stop the backend? + * If we don't, it might keep writing to memory. */ +vhost_net_stop(tap_get_vhost_net(n-nic-nc.peer), n-vdev); +n-vhost_started = 0; +} virtio_save(n-vdev, f); qemu_put_buffer(f, n-mac, ETH_ALEN); @@ -810,7 +830,6 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) qemu_mod_timer(n-tx_timer, qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL); } - return 0; } @@ -830,6 +849,47 @@ static NetClientInfo net_virtio_info = { .link_status_changed = virtio_net_set_link_status, }; +static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) +{ +VirtIONet *n = to_virtio_net(vdev); +if (!n-nic-nc.peer) { +return; +} +if (n-nic-nc.peer-info-type != NET_CLIENT_TYPE_TAP) { +return; +} + +if (!tap_get_vhost_net(n-nic-nc.peer)) { +return; +} +if (!!n-vhost_started == !!(status VIRTIO_CONFIG_S_DRIVER_OK)) { +return; +} +if (status VIRTIO_CONFIG_S_DRIVER_OK) { +int r = vhost_net_start(tap_get_vhost_net(n-nic-nc.peer), vdev); +if (r 0) { +fprintf(stderr, unable to start vhost net: %d: +falling back on userspace virtio\n, -r); +} else { +n-vhost_started = 1; +} +} else { +vhost_net_stop(tap_get_vhost_net(n-nic-nc.peer), vdev); +n-vhost_started = 0; +} +} + +static void virtio_net_vmstate_change(void *opaque, int running, int reason) +{ +VirtIONet *n = opaque; +if (!running) { +return; +} +/* This is called when vm is started, it will start vhost backend if + * appropriate e.g. after migration. */ +virtio_net_set_status(n-vdev, n-vdev.status); +} + VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf) { VirtIONet *n; @@ -845,6 +905,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf) n-vdev.set_features = virtio_net_set_features; n-vdev.bad_features = virtio_net_bad_features; n-vdev.reset = virtio_net_reset; +n-vdev.set_status = virtio_net_set_status; n-rx_vq = virtio_add_queue(n-vdev, 256, virtio_net_handle_rx); n-tx_vq = virtio_add_queue(n-vdev, 256, virtio_net_handle_tx); n-ctrl_vq = virtio_add_queue(n-vdev, 64, virtio_net_handle_ctrl); @@ -867,6 +928,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf) register_savevm(virtio-net, virtio_net_id++, VIRTIO_NET_VM_VERSION, virtio_net_save, virtio_net_load, n); +n-vmstate = qemu_add_vm_change_state_handler(virtio_net_vmstate_change, n); return n-vdev; } @@ -874,6 +936,11 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf) void virtio_net_exit(VirtIODevice *vdev) { VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev); +qemu_del_vm_change_state_handler(n-vmstate); + +if (n-vhost_started) { +vhost_net_stop(tap_get_vhost_net(n-nic-nc.peer), vdev); +}
[Qemu-devel] [PATCHv5 08/11] vhost: vhost net support
This adds vhost net device support in qemu. Will be tied to tap device and virtio by following patches. Raw backend is currently missing, will be worked on/submitted separately. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- Makefile.target |2 + configure | 36 +++ hw/vhost.c | 706 +++ hw/vhost.h | 48 hw/vhost_net.c | 198 hw/vhost_net.h | 19 ++ 6 files changed, 1009 insertions(+), 0 deletions(-) create mode 100644 hw/vhost.c create mode 100644 hw/vhost.h create mode 100644 hw/vhost_net.c create mode 100644 hw/vhost_net.h diff --git a/Makefile.target b/Makefile.target index 004a703..ea5207c 100644 --- a/Makefile.target +++ b/Makefile.target @@ -176,6 +176,8 @@ obj-y = vl.o async.o monitor.o pci.o pci_host.o pcie_host.o machine.o gdbstub.o # need to fix this properly obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-pci.o virtio-serial-bus.o obj-y += event_notifier.o +obj-y += vhost_net.o +obj-$(CONFIG_VHOST_NET) += vhost.o obj-y += rwhandler.o obj-$(CONFIG_KVM) += kvm.o kvm-all.o obj-$(CONFIG_ISA_MMIO) += isa_mmio.o diff --git a/configure b/configure index d728799..ebfc774 100755 --- a/configure +++ b/configure @@ -87,6 +87,7 @@ libs_softmmu= libs_tools= audio_pt_int= audio_win_int= +audio_win_int= # parse CC options first for opt do @@ -263,6 +264,7 @@ vnc_tls= vnc_sasl= xen= linux_aio= +vhost_net= gprof=no debug_tcg=no @@ -651,6 +653,10 @@ for opt do ;; --enable-docs) docs=yes ;; + --disable-vhost-net) vhost_net=no + ;; + --enable-vhost-net) vhost_net=yes + ;; *) echo ERROR: unknown option $opt; show_help=yes ;; esac @@ -1498,6 +1504,32 @@ EOF fi ## +# test for vhost net + +if test $vhost_net != no; then +if test $kvm != no; then +cat $TMPC EOF +#include linux/vhost.h +int main(void) { return 0; } +EOF +if compile_prog $kvm_cflags ; then +vhost_net=yes +else +if $vhost_net == yes ; then +feature_not_found vhost-net +fi +vhost_net=no +fi +else +if $vhost_net == yes ; then +echo -e NOTE: vhost-net feature requires KVM (--enable-kvm). +feature_not_found vhost-net +fi +vhost_net=no +fi +fi + +## # pthread probe PTHREADLIBS_LIST=-lpthread -lpthreadGC2 @@ -1968,6 +2000,7 @@ echo fdt support $fdt echo preadv support$preadv echo fdatasync $fdatasync echo uuid support $uuid +echo vhost-net support $vhost_net if test $sdl_too_old = yes; then echo - Your SDL version is too old - please upgrade to have SDL support @@ -2492,6 +2525,9 @@ case $target_arch2 in if test $kvm_para = yes; then echo CONFIG_KVM_PARA=y $config_target_mak fi + if test $vhost_net = yes ; then +echo CONFIG_VHOST_NET=y $config_target_mak + fi fi esac if test $target_bigendian = yes ; then diff --git a/hw/vhost.c b/hw/vhost.c new file mode 100644 index 000..2e93f33 --- /dev/null +++ b/hw/vhost.c @@ -0,0 +1,706 @@ +/* + * vhost support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin m...@redhat.com + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include linux/vhost.h +#include sys/ioctl.h +#include sys/eventfd.h +#include vhost.h +#include hw/hw.h +/* For range_get_last */ +#include pci.h + +static void vhost_dev_sync_region(struct vhost_dev *dev, + uint64_t mfirst, uint64_t mlast, + uint64_t rfirst, uint64_t rlast) +{ +uint64_t start = MAX(mfirst, rfirst); +uint64_t end = MIN(mlast, rlast); +vhost_log_chunk_t *from = dev-log + start / VHOST_LOG_CHUNK; +vhost_log_chunk_t *to = dev-log + end / VHOST_LOG_CHUNK + 1; +uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK; + +assert(end / VHOST_LOG_CHUNK dev-log_size); +assert(start / VHOST_LOG_CHUNK dev-log_size); +if (end start) { +return; +} +for (;from to; ++from) { +vhost_log_chunk_t log; +int bit; +/* We first check with non-atomic: much cheaper, + * and we expect non-dirty to be the common case. */ +if (!*from) { +continue; +} +/* Data must be read atomically. We don't really + * need the barrier semantics of __sync + * builtins, but it's easier to use them than + * roll our own. */ +log = __sync_fetch_and_and(from, 0); +while ((bit = sizeof(log) sizeof(int) ? +ffsll(log) : ffs(log))) { +bit -= 1; +cpu_physical_memory_set_dirty(addr + bit *
[Qemu-devel] [PATCHv5 02/11] kvm: add API to set ioeventfd
Comment on kvm usage: rather than require users to do if (kvm_enabled()) and/or ifdefs, this patch adds an API that, internally, is defined to stub function on non-kvm build, and checks kvm_enabled for non-kvm run. While rest of qemu code still uses if (kvm_enabled()), I think this approach is cleaner, and we should convert rest of code to it long term. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- kvm-all.c | 22 ++ kvm.h | 16 2 files changed, 38 insertions(+), 0 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 534ead0..f427f73 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1153,3 +1153,25 @@ int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset) return r; } + +#ifdef KVM_IOEVENTFD +int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign) +{ +struct kvm_ioeventfd kick = { +.datamatch = val, +.addr = addr, +.len = 2, +.flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO, +.fd = fd, +}; +int r; +if (!kvm_enabled()) +return -ENOSYS; +if (!assign) +kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN; +r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, kick); +if (r 0) +return r; +return 0; +} +#endif diff --git a/kvm.h b/kvm.h index fd8d0c1..2dfcb15 100644 --- a/kvm.h +++ b/kvm.h @@ -14,10 +14,16 @@ #ifndef QEMU_KVM_H #define QEMU_KVM_H +#include stdbool.h +#include errno.h #include config.h #include qemu-queue.h #ifdef CONFIG_KVM +#include linux/kvm.h +#endif + +#ifdef CONFIG_KVM extern int kvm_allowed; #define kvm_enabled() (kvm_allowed) @@ -161,4 +167,14 @@ static inline void cpu_synchronize_post_init(CPUState *env) } } +#if defined(KVM_IOEVENTFD) defined(CONFIG_KVM) +int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign); +#else +static inline +int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign) +{ +return -ENOSYS; +} +#endif + #endif -- 1.7.0.18.g0d53a5
Re: [Qemu-devel] wake-on-lan IPMI implementation; real power-off and -no-shutdown
The semantics of -no-shutdown are awful. I'd personally prefer to see the option deprecated and a new set of options introduced with clearer semantics. Currently, -no-shutdown does too many things. It affects reboot behaviour, shutdown behaviour, the behavior of the SDL close button. Each of these things should be individual tunables. I'm not sure about -no-shutdown, but I've had some problems with -no-reboot, which I use for semi-automated OS installations. I use -no-reboot so that when the guest does a reboot during installation, as they invariably do one or more times, QEMU exits, my scripts does things eject the CD/floppy, or change it for the next in sequence, and modify the guest's installed files to add virtio drivers, install extra fiels, edit boot scripts and whatever else is useful, and then restart QEMU. The guest thinks it's just rebooted, but it has the virtualisation goodies in place to run better. Unfortunately with an MS-DOS 5.00 guest, -no-reboot does not work. It fails to exit QEMU; instead it just reboots. I guess that means a QJSON event would not be sent either. For my use case, it would be even better if guest reboot paused the guest and sent a QJSON event instead of having to use -no-reboot. Then I wouldn't have to close and restart the VNC client repeatedly during installs. Now that we have ways to choose what kind of events and actions are triggered by the QEMU watchdog device, it would be nice to fit guest reboot (perhaps even the different types of reboot) / host-forced reboot / guest powerdown / host-forced powerdown (like holding down the power button for 5 seconds on a real PC) into the same/similar framework as the watchdog, with same/similar event types and action choices. -- Jamie
Re: [Qemu-devel] wake-on-lan IPMI implementation; real power-off and -no-shutdown
On Tue, Mar 16, 2010 at 05:28:51PM +, Jamie Lokier wrote: The semantics of -no-shutdown are awful. I'd personally prefer to see the option deprecated and a new set of options introduced with clearer semantics. Currently, -no-shutdown does too many things. It affects reboot behaviour, shutdown behaviour, the behavior of the SDL close button. Each of these things should be individual tunables. I'm not sure about -no-shutdown, but I've had some problems with -no-reboot, which I use for semi-automated OS installations. I use -no-reboot so that when the guest does a reboot during installation, as they invariably do one or more times, QEMU exits, my scripts does things eject the CD/floppy, or change it for the next in sequence, and modify the guest's installed files to add virtio drivers, install extra fiels, edit boot scripts and whatever else is useful, and then restart QEMU. The guest thinks it's just rebooted, but it has the virtualisation goodies in place to run better. That's the way libvirt / virt-manager does provisioning too. Unfortunately with an MS-DOS 5.00 guest, -no-reboot does not work. It fails to exit QEMU; instead it just reboots. I guess that means a QJSON event would not be sent either. For my use case, it would be even better if guest reboot paused the guest and sent a QJSON event instead of having to use -no-reboot. Then I wouldn't have to close and restart the VNC client repeatedly during installs. That would be nice! Now that we have ways to choose what kind of events and actions are triggered by the QEMU watchdog device, it would be nice to fit guest reboot (perhaps even the different types of reboot) / host-forced reboot / guest powerdown / host-forced powerdown (like holding down the power button for 5 seconds on a real PC) into the same/similar framework as the watchdog, with same/similar event types and action choices. Agreed, it'd be good to have a more generalized method of controlling the lifecycle actions in QEMU. Regards, Daniel -- |: Red Hat, Engineering, London-o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org-o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
Re: [Qemu-devel] Re: [PATCH, RFC] Replace assert(0) with abort() or cpu_abort()
Paolo Bonzini wrote: On 03/15/2010 07:36 PM, Markus Armbruster wrote: Please don't tell me that user emulators make abort() return. abort() is declared __noreturn__, and the optimizer may well rely on that. If the user programs make a signal (SIGABRT, SIG_IGN) call, I suppose abort() will return. On Linux, man abort says: If the SIGABRT signal is ignored, or caught by a handler that returns, the abort() function will still terminate the process. It does this by restoring the default disposition for SIGABRT and then raising the sig‐ nal for a second time. However I have a suspicious that I've seen abort() return on some other OS in the distant past, maybe SunOS 4. I wouldn't rely on abort() always terminating the process on all OSes. -- Jamie
Re: [Qemu-devel] Re: [PATCH, RFC] Replace assert(0) with abort() or cpu_abort()
Markus Armbruster wrote: Paolo Bonzini pbonz...@redhat.com writes: On 03/15/2010 07:36 PM, Markus Armbruster wrote: Please don't tell me that user emulators make abort() return. abort() is declared __noreturn__, and the optimizer may well rely on that. If the user programs make a signal (SIGABRT, SIG_IGN) call, I suppose abort() will return. I program doing that gets what it asks for, and richly deserves. A guest program is also allowed to trap SIGABRT with a signal handler, and that does have some uses. E.g. cleaning up temporary files and shmem segments following a crash when calling 3rd party code. Whatever the guest does with SIGABRT, it should not result in _QEMU_ crashing - whether due to abort() returning, or QEMU's control flow jumping to the guest's signal handler from an unexpected location. -- Jamie
[Qemu-devel] [PATCH] qemu-io: fix aio help texts
Fix a few typos in the helptexts for the various aio commands. Signed-off-by: Christoph Hellwig h...@lst.de Index: qemu/qemu-io.c === --- qemu.orig/qemu-io.c 2010-03-16 19:07:43.089009269 +0100 +++ qemu/qemu-io.c 2010-03-16 19:08:36.597005148 +0100 @@ -904,8 +904,8 @@ aio_read_help(void) \n Reads a segment of the currently open file, optionally dumping it to the\n standard output stream (with -v option) for subsequent inspection.\n - The read is performed asynchronously and should the aio_flush command \n - should be used to ensure all outstanding aio requests have been completed\n + The read is performed asynchronously and the aio_flush command must be\n + to ensure all outstanding aio requests have been completed\n -C, -- report statistics in a machine parsable format\n -P, -- use a pattern to verify read data\n -v, -- dump buffer to standard output\n @@ -1003,8 +1003,8 @@ aio_write_help(void) \n Writes into a segment of the currently open file, using a buffer\n filled with a set pattern (0xcdcdcdcd).\n - The write is performed asynchronously and should the aio_flush command \n - should be used to ensure all outstanding aio requests have been completed\n + The write is performed asynchronously and the aio_flush command must be\n + used to ensure all outstanding aio requests have been completed\n -P, -- use different pattern to fill file\n -C, -- report statistics in a machine parsable format\n -q, -- quite mode, do not show I/O statistics\n @@ -1095,7 +1095,7 @@ aio_flush_f(int argc, char **argv) static const cmdinfo_t aio_flush_cmd = { .name = aio_flush, .cfunc = aio_flush_f, - .oneline= completes all outstanding aio requets + .oneline= completes all outstanding aio requests }; static int
[Qemu-devel] [PATCH] [Also for STABLE-0.12] Don't check for bus master for old guests
Older Linux guests don't activate the bus master enable bit. So for those we can just try to be clever and track if they set the DEVICE_OK bit even though bus mastering is still disabled. Under that condition we can disable the windows safety check. With that logic in place both guests should work just fine. Without PCI hotplug breaks virtio-net in Linux 2.6.34 guests. Signed-off-by: Alexander Graf ag...@suse.de CC: Michael S. Tsirkin m...@redhat.com --- hw/virtio-pci.c | 25 - 1 files changed, 24 insertions(+), 1 deletions(-) diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 3594152..4fc4b3c 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -76,6 +76,10 @@ * 12 is historical, and due to x86 page size. */ #define VIRTIO_PCI_QUEUE_ADDR_SHIFT12 +/* We can catch some guest bugs inside here so we continue supporting older + guests. */ +#define VIRTIO_PCI_BUG_BUS_MASTER (1 0) + /* QEMU doesn't strictly need write barriers since everything runs in * lock-step. We'll leave the calls to wmb() in though to make it obvious for * KVM or if kqemu gets SMP support. @@ -87,6 +91,7 @@ typedef struct { PCIDevice pci_dev; VirtIODevice *vdev; +uint32_t bugs; uint32_t addr; uint32_t class_code; uint32_t nvectors; @@ -138,6 +143,13 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f) if (proxy-vdev-config_vector != VIRTIO_NO_VECTOR) { return msix_vector_use(proxy-pci_dev, proxy-vdev-config_vector); } + +/* Try to find out if the guest has bus master disabled, but is + in ready state. Then we have a buggy guest OS. */ +if (!(proxy-vdev-status VIRTIO_CONFIG_S_DRIVER_OK) +!(proxy-pci_dev.config[PCI_COMMAND] PCI_COMMAND_MASTER)) { +proxy-bugs |= VIRTIO_PCI_BUG_BUS_MASTER; +} return 0; } @@ -162,6 +174,7 @@ static void virtio_pci_reset(DeviceState *d) VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev); virtio_reset(proxy-vdev); msix_reset(proxy-pci_dev); +proxy-bugs = 0; } static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) @@ -205,6 +218,14 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) virtio_reset(proxy-vdev); msix_unuse_all_vectors(proxy-pci_dev); } + +/* Linux before 2.6.34 sets the device as OK without enabling + the PCI device bus master bit. In this case we need to disable + some safety checks. */ +if ((val VIRTIO_CONFIG_S_DRIVER_OK) +!(proxy-pci_dev.config[PCI_COMMAND] PCI_COMMAND_MASTER)) { +proxy-bugs |= VIRTIO_PCI_BUG_BUS_MASTER; +} break; case VIRTIO_MSI_CONFIG_VECTOR: msix_vector_unuse(proxy-pci_dev, vdev-config_vector); @@ -372,7 +393,9 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, if (PCI_COMMAND == address) { if (!(val PCI_COMMAND_MASTER)) { -proxy-vdev-status = ~VIRTIO_CONFIG_S_DRIVER_OK; +if (!(proxy-bugs VIRTIO_PCI_BUG_BUS_MASTER)) { +proxy-vdev-status = ~VIRTIO_CONFIG_S_DRIVER_OK; +} } } -- 1.6.0.2
Re: [Qemu-devel] wake-on-lan IPMI implementation; real power-off and -no-shutdown
I use -no-reboot so that when the guest does a reboot during installation, as they invariably do one or more times, QEMU exits, my scripts does things eject the CD/floppy, or change it for the next in sequence, and modify the guest's installed files to add virtio drivers, install extra fiels, edit boot scripts and whatever else is useful, and then restart QEMU. The guest thinks it's just rebooted, but it has the virtualisation goodies in place to run better. Unfortunately with an MS-DOS 5.00 guest, -no-reboot does not work. It fails to exit QEMU; instead it just reboots. I guess that means a QJSON event would not be sent either. I suppose it uses a weird way to reboot, there are many (PS/2 controller, calling the BIOS entry point...). François.
[Qemu-devel] [PULL v2] Convert device_add to QObject / QError
Anthony ran into conflicts and asked me to rebase and send out a pull request. Complete list of conflicts: * qdev: Improve diagnostics for bad property values commit 6bf38816df80a3b50529119c5458b151b3e2c728 Adds two new errors to qdev_prop_parse(), which need conversion to QError. Resolution straighforward, just needs new QERR_PROPERTY_VALUE_IN_USE, QERR_PROPERTY_VALUE_NOT_FOUND. * scsi: Make device scsi-disk reject /dev/sg* commit 32bb404a6a4d726dfd691f75704f08257ce65ffe Adds a qemu_error() use, which needs to be changed to error_report(). * slirp: check system() success commit 24ac07dec7f23c58dc48aa7754f872781b386d46 Context changed. Resolution trivial. The following changes since commit 0aef4261ac0ec9089ade0e3a92f986cb4ba7317e: Aurelien Jarno (1): target-ppc: fix evsrwu and evsrws (second try) are available in the git repository at: git://repo.or.cz/qemu/armbru.git qerror Markus Armbruster (52): usb: Remove disabled monitor_printf() in usb_read_file() savevm: Fix -loadvm to report errors to stderr, not the monitor pc: Fix error reporting for -boot once pc: Factor common code out of pc_boot_set() and cmos_init() tools: Remove unused cur_mon from qemu-tool.c monitor: Separate default monitor and current monitor cleanly block: Simplify usb_msd_initfn() test for can read bdrv key monitor: Factor monitor_set_error() out of qemu_error_internal() error: Move qemu_error() friends from monitor.c to own file error: Simplify error sink setup error: Move qemu_error friends into their own header error: New error_printf() and error_vprintf() error: Don't abuse qemu_error() for non-error in qdev_device_help() error: Don't abuse qemu_error() for non-error in qbus_find() error: Don't abuse qemu_error() for non-error in scsi_hot_add() error: Replace qemu_error() by error_report() error: Rename qemu_error_new() to qerror_report() error: Infrastructure to track locations for error reporting error: Include the program name in error messages to stderr error: Track locations in configuration files QemuOpts: Fix qemu_config_parse() to catch file read errors error: Track locations on command line qdev: Fix -device and device_add to handle unsuitable bus gracefully qdev: Factor qdev_create_from_info() out of qdev_create() qdev: Hide no_user devices from users qdev: Hide ptr properties from users monitor: New monitor_cur_is_qmp() error: Let converted handlers print in human monitor error: Polish human-readable error descriptions error: New QERR_PROPERTY_NOT_FOUND error: New QERR_PROPERTY_VALUE_BAD error: New QERR_PROPERTY_VALUE_IN_USE error: New QERR_PROPERTY_VALUE_NOT_FOUND qdev: convert setting device properties to QError qdev: Relax parsing of bus option error: New QERR_BUS_NOT_FOUND error: New QERR_DEVICE_MULTIPLE_BUSSES error: New QERR_DEVICE_NO_BUS qdev: Convert qbus_find() to QError error: New error_printf_unless_qmp() error: New QERR_BAD_BUS_FOR_DEVICE error: New QERR_BUS_NO_HOTPLUG error: New QERR_DEVICE_INIT_FAILED error: New QERR_NO_BUS_FOR_DEVICE Revert qdev: Use QError for 'device not found' error error: Convert do_device_add() to QError qemu-option: Functions to convert to/from QDict qemu-option: Move the implied first name into QemuOptsList qemu-option: Rename find_list() to qemu_find_opts() external linkage monitor: New argument type 'O' monitor: Use argument type 'O' for device_add monitor: convert do_device_add() to QObject Makefile.target|1 + audio/audio.c |4 +- hw/pc.c| 35 ++ hw/pci-hotplug.c | 14 +- hw/pci.c | 14 +- hw/qdev-properties.c | 36 ++--- hw/qdev.c | 236 -- hw/qdev.h |2 +- hw/scsi-bus.c |4 +- hw/scsi-disk.c |7 +- hw/scsi-generic.c |9 +- hw/usb-bus.c |4 +- hw/usb-msd.c |4 +- hw/usb-net.c |2 +- hw/usb-serial.c|9 +- hw/virtio-net.c|5 +- hw/virtio-pci.c|4 +- hw/virtio-serial-bus.c |2 +- monitor.c | 337 +--- monitor.h |7 + net.c | 32 +++--- net/dump.c |5 +- net/slirp.c| 28 ++-- net/socket.c | 12 +- net/tap-bsd.c |7 +- net/tap-linux.c|9 +- net/tap-solaris.c |4 +- net/tap-win32.c|2 +- net/tap.c |3 +- qemu-config.c | 56 +--- qemu-config.h |3 +- qemu-error.c | 227 qemu-error.h | 47 +++