Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On 05/25/2011 09:18 PM, Marcelo Tosatti wrote: Commit fa4491a6b667304 moved the permission check for io instructions to the -check_perm callback. It failed to copy the port value from RDX register for string and in,out ax,dx instructions. Fix it by reading RDX register at decode stage when appropriate. Fixes FC8.32 installation. +#define Sse (118) /* SSE Vector instruction */ 19/20 are still available, no need to go 64-bit just yet. /* Misc flags */ -#define Prot(121) /* instruction generates #UD if not in prot-mode */ + case SrcDX: + c-src.type = OP_REG; + c-src.bytes = c-op_bytes; Needs to be 2. Otherwise we'll see extra bits from edx, or lose bits from dx if it's a 1-byte instruction. + c-src.addr.reg =c-regs[VCPU_REGS_RDX]; + fetch_register_operand(c-src); + break; } if (rc != X86EMUL_CONTINUE) @@ -3649,6 +3657,12 @@ done_prefixes: c-dst.addr.mem.seg = VCPU_SREG_ES; c-dst.val = 0; break; + case DstDX: + c-dst.type = OP_REG; + c-dst.bytes = c-op_bytes; 2 again. + c-dst.addr.reg =c-regs[VCPU_REGS_RDX]; + fetch_register_operand(c-dst); + break; case ImplicitOps: /* Special instructions do their own operand decoding. */ default: We also need to unify Src/Dst decode eventually. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks
Allow specifying an optional parameter when registering an ioport range. The callback functions provided by the registering module will be called with the same parameter. This may be used to keep context during callbacks on IO operations. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |3 ++ tools/kvm/ioport.c | 54 +-- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 8253938..2a8d74d 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -25,11 +25,14 @@ struct kvm; struct ioport_operations { bool (*io_in)(struct kvm *kvm, u16 port, void *data, int size, u32 count); bool (*io_out)(struct kvm *kvm, u16 port, void *data, int size, u32 count); + bool (*io_in_param)(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param); + bool (*io_out_param)(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param); }; void ioport__setup_legacy(void); void ioport__register(u16 port, struct ioport_operations *ops, int count); +void ioport__register_param(u16 port, struct ioport_operations *ops, int count, void *param); static inline u8 ioport__read8(u8 *data) { diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c index 1f13960..159d089 100644 --- a/tools/kvm/ioport.c +++ b/tools/kvm/ioport.c @@ -18,6 +18,7 @@ struct ioport_entry { struct rb_int_node node; struct ioport_operations*ops; + void*param; }; static struct rb_root ioport_tree = RB_ROOT; @@ -89,6 +90,29 @@ void ioport__register(u16 port, struct ioport_operations *ops, int count) ioport_insert(ioport_tree, entry); } +void ioport__register_param(u16 port, struct ioport_operations *ops, int count, void *param) +{ + struct ioport_entry *entry; + + entry = ioport_search(ioport_tree, port); + if (entry) { + pr_warning(ioport re-registered: %x, port); + rb_int_erase(ioport_tree, entry-node); + } + + entry = malloc(sizeof(*entry)); + if (entry == NULL) + die(Failed allocating new ioport entry); + + *entry = (struct ioport_entry) { + .node = RB_INT_INIT(port, port + count), + .ops= ops, + .param = param, + }; + + ioport_insert(ioport_tree, entry); +} + static const char *to_direction(int direction) { if (direction == KVM_EXIT_IO_IN) @@ -105,30 +129,32 @@ static void ioport_error(u16 port, void *data, int direction, int size, u32 coun bool kvm__emulate_io(struct kvm *kvm, u16 port, void *data, int direction, int size, u32 count) { struct ioport_operations *ops; - bool ret; + bool ret = false; struct ioport_entry *entry; + void *param; entry = ioport_search(ioport_tree, port); if (!entry) goto error; - ops = entry-ops; + ops = entry-ops; + param = entry-param; if (direction == KVM_EXIT_IO_IN) { - if (!ops-io_in) - goto error; - - ret = ops-io_in(kvm, port, data, size, count); - if (!ret) - goto error; + if (!param ops-io_in) + ret = ops-io_in(kvm, port, data, size, count); + if (param ops-io_in_param) + ret = ops-io_in_param(kvm, port, data, size, count, param); } else { - if (!ops-io_out) - goto error; - - ret = ops-io_out(kvm, port, data, size, count); - if (!ret) - goto error; + if (!param ops-io_out) + ret = ops-io_out(kvm, port, data, size, count); + if (param ops-io_out_param) + ret = ops-io_out_param(kvm, port, data, size, count, param); } + + if (!ret) + goto error; + return true; error: if (ioport_debug) -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 2/8] kvm tools: Add basic ioport dynamic allocation
Add a very simple allocation of ioports. This prevents the need to coordinate ioports between different modules. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h | 11 +-- tools/kvm/ioport.c | 36 ++-- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 2a8d74d..c500f1e 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -7,6 +7,9 @@ /* some ports we reserve for own use */ #define IOPORT_DBG 0xe0 +#define IOPORT_START 0x6200 +#define IOPORT_SIZE0x400 + #define IOPORT_VESA0xa200 #define IOPORT_VESA_SIZE 256 #define IOPORT_VIRTIO_P9 0xb200 /* Virtio 9P device */ @@ -20,6 +23,8 @@ #define IOPORT_VIRTIO_RNG 0xf200 /* Virtio network device */ #define IOPORT_VIRTIO_RNG_SIZE 256 +#define IOPORT_EMPTY USHRT_MAX + struct kvm; struct ioport_operations { @@ -31,8 +36,10 @@ struct ioport_operations { void ioport__setup_legacy(void); -void ioport__register(u16 port, struct ioport_operations *ops, int count); -void ioport__register_param(u16 port, struct ioport_operations *ops, int count, void *param); +u16 ioport__register(u16 port, struct ioport_operations *ops, int count); +u16 ioport__register_param(u16 port, struct ioport_operations *ops, int count, void *param); + +u16 ioport__find_free_range(void); static inline u8 ioport__read8(u8 *data) { diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c index 159d089..b2a3272 100644 --- a/tools/kvm/ioport.c +++ b/tools/kvm/ioport.c @@ -3,6 +3,7 @@ #include kvm/kvm.h #include kvm/util.h #include kvm/rbtree-interval.h +#include kvm/mutex.h #include linux/kvm.h /* for KVM_EXIT_* */ #include linux/types.h @@ -21,9 +22,23 @@ struct ioport_entry { void*param; }; +static u16 free_io_port_idx; +DEFINE_MUTEX(free_io_port_idx_lock); static struct rb_root ioport_tree = RB_ROOT; bool ioport_debug; +static u16 ioport__find_free_port(void) +{ + u16 free_port; + + mutex_lock(free_io_port_idx_lock); + free_port = IOPORT_START + free_io_port_idx * IOPORT_SIZE; + free_io_port_idx++; + mutex_unlock(free_io_port_idx_lock); + + return free_port; +} + static struct ioport_entry *ioport_search(struct rb_root *root, u64 addr) { struct rb_int_node *node; @@ -68,10 +83,13 @@ static struct ioport_operations dummy_write_only_ioport_ops = { .io_out = dummy_io_out, }; -void ioport__register(u16 port, struct ioport_operations *ops, int count) +u16 ioport__register(u16 port, struct ioport_operations *ops, int count) { struct ioport_entry *entry; + if (port == IOPORT_EMPTY) + port = ioport__find_free_port(); + entry = ioport_search(ioport_tree, port); if (entry) { pr_warning(ioport re-registered: %x, port); @@ -88,12 +106,17 @@ void ioport__register(u16 port, struct ioport_operations *ops, int count) }; ioport_insert(ioport_tree, entry); + + return port; } -void ioport__register_param(u16 port, struct ioport_operations *ops, int count, void *param) +u16 ioport__register_param(u16 port, struct ioport_operations *ops, int count, void *param) { struct ioport_entry *entry; + if (port == IOPORT_EMPTY) + port = ioport__find_free_port(); + entry = ioport_search(ioport_tree, port); if (entry) { pr_warning(ioport re-registered: %x, port); @@ -111,6 +134,8 @@ void ioport__register_param(u16 port, struct ioport_operations *ops, int count, }; ioport_insert(ioport_tree, entry); + + return port; } static const char *to_direction(int direction) @@ -126,6 +151,13 @@ static void ioport_error(u16 port, void *data, int direction, int size, u32 coun fprintf(stderr, IO error: %s port=%x, size=%d, count=%u\n, to_direction(direction), port, size, count); } +u16 ioport__find_free_range(void) +{ + static u16 cur_loc; + + return IOPORT_START + (cur_loc++ * IOPORT_SIZE); +} + bool kvm__emulate_io(struct kvm *kvm, u16 port, void *data, int direction, int size, u32 count) { struct ioport_operations *ops; -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 3/8] kvm tools: Use ioport context to control blk devices
Since ioports now has the ability to pass context to its callbacks, we can implement multiple blk devices more efficiently. We can get a ptr to the 'current' blk dev on each ioport call, which means that we don't need to keep track of the blk device allocation and ioport distribution within the module. The advantages are easier management of multiple blk devices and removal of any hardcoded limits to the amount of possible blk devices. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |2 - tools/kvm/virtio/blk.c | 75 ++-- 2 files changed, 26 insertions(+), 51 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index c500f1e..47f9fb5 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -14,8 +14,6 @@ #define IOPORT_VESA_SIZE 256 #define IOPORT_VIRTIO_P9 0xb200 /* Virtio 9P device */ #define IOPORT_VIRTIO_P9_SIZE 256 -#define IOPORT_VIRTIO_BLK 0xc200 /* Virtio block device */ -#define IOPORT_VIRTIO_BLK_SIZE 0x200 #define IOPORT_VIRTIO_CONSOLE 0xd200 /* Virtio console device */ #define IOPORT_VIRTIO_CONSOLE_SIZE 256 #define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */ diff --git a/tools/kvm/virtio/blk.c b/tools/kvm/virtio/blk.c index 25ce61f..cb103fc 100644 --- a/tools/kvm/virtio/blk.c +++ b/tools/kvm/virtio/blk.c @@ -14,6 +14,7 @@ #include linux/virtio_ring.h #include linux/virtio_blk.h +#include linux/list.h #include linux/types.h #include pthread.h @@ -34,15 +35,16 @@ struct blk_dev_job { struct blk_dev { pthread_mutex_t mutex; + struct list_headlist; struct virtio_blk_configblk_config; struct disk_image *disk; + u64 base_addr; u32 host_features; u32 guest_features; u16 config_vector; u8 status; u8 isr; - u8 idx; /* virtio queue */ u16 queue_selector; @@ -52,7 +54,7 @@ struct blk_dev { struct pci_device_headerpci_hdr; }; -static struct blk_dev *bdevs[VIRTIO_BLK_MAX_DEV]; +static LIST_HEAD(bdevs); static bool virtio_blk_dev_in(struct blk_dev *bdev, void *data, unsigned long offset, int size, u32 count) { @@ -66,22 +68,14 @@ static bool virtio_blk_dev_in(struct blk_dev *bdev, void *data, unsigned long of return true; } -/* Translate port into device id + offset in that device addr space */ -static void virtio_blk_port2dev(u16 port, u16 base, u16 size, u16 *dev_idx, u16 *offset) -{ - *dev_idx= (port - base) / size; - *offset = port - (base + *dev_idx * size); -} - -static bool virtio_blk_pci_io_in(struct kvm *kvm, u16 port, void *data, int size, u32 count) +static bool virtio_blk_pci_io_in(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param) { struct blk_dev *bdev; - u16 offset, dev_idx; + u16 offset; bool ret = true; - virtio_blk_port2dev(port, IOPORT_VIRTIO_BLK, IOPORT_VIRTIO_BLK_SIZE, dev_idx, offset); - - bdev = bdevs[dev_idx]; + bdev= param; + offset = port - bdev-base_addr; mutex_lock(bdev-mutex); @@ -178,15 +172,14 @@ static void virtio_blk_do_io(struct kvm *kvm, void *param) virt_queue__trigger_irq(vq, bdev-pci_hdr.irq_line, bdev-isr, kvm); } -static bool virtio_blk_pci_io_out(struct kvm *kvm, u16 port, void *data, int size, u32 count) +static bool virtio_blk_pci_io_out(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param) { struct blk_dev *bdev; - u16 offset, dev_idx; + u16 offset; bool ret = true; - virtio_blk_port2dev(port, IOPORT_VIRTIO_BLK, IOPORT_VIRTIO_BLK_SIZE, dev_idx, offset); - - bdev = bdevs[dev_idx]; + bdev= param; + offset = port - bdev-base_addr; mutex_lock(bdev-mutex); @@ -246,48 +239,29 @@ static bool virtio_blk_pci_io_out(struct kvm *kvm, u16 port, void *data, int siz } static struct ioport_operations virtio_blk_io_ops = { - .io_in = virtio_blk_pci_io_in, - .io_out = virtio_blk_pci_io_out, + .io_in_param= virtio_blk_pci_io_in, + .io_out_param = virtio_blk_pci_io_out, }; -static int virtio_blk_find_empty_dev(void) -{ - int i; - - for (i = 0; i VIRTIO_BLK_MAX_DEV; i++) { - if (bdevs[i] == NULL) - return i; - } - - return -1; -} - void virtio_blk__init(struct kvm *kvm, struct disk_image *disk) { u16 blk_dev_base_addr; u8 dev, pin, line;
[PATCH v2 5/8] kvm tools: Use dynamic IO port allocation in vesa driver
Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/hw/vesa.c|7 +++ tools/kvm/include/kvm/ioport.h |2 -- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/kvm/hw/vesa.c b/tools/kvm/hw/vesa.c index 6ab07ee..9315510 100644 --- a/tools/kvm/hw/vesa.c +++ b/tools/kvm/hw/vesa.c @@ -49,7 +49,6 @@ static struct pci_device_header vesa_pci_device = { .class = 0x03, .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, .subsys_id = PCI_SUBSYSTEM_ID_VESA, - .bar[0] = IOPORT_VESA | PCI_BASE_ADDRESS_SPACE_IO, .bar[1] = VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY, }; @@ -66,17 +65,17 @@ void vesa__init(struct kvm *kvm) { u8 dev, line, pin; pthread_t thread; + u16 vesa_base_addr; if (irq__register_device(PCI_DEVICE_ID_VESA, dev, pin, line) 0) return; vesa_pci_device.irq_pin = pin; vesa_pci_device.irq_line= line; - + vesa_base_addr = ioport__register(IOPORT_EMPTY, vesa_io_ops, IOPORT_SIZE); + vesa_pci_device.bar[0] = vesa_base_addr | PCI_BASE_ADDRESS_SPACE_IO; pci__register(vesa_pci_device, dev); - ioport__register(IOPORT_VESA, vesa_io_ops, IOPORT_VESA_SIZE); - kvm__register_mmio(VESA_MEM_ADDR, VESA_MEM_SIZE, vesa_mmio_callback); pthread_create(thread, NULL, vesa__dovnc, kvm); diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index ffa6893..5dee9d2 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -10,8 +10,6 @@ #define IOPORT_START 0x6200 #define IOPORT_SIZE0x400 -#define IOPORT_VESA0xa200 -#define IOPORT_VESA_SIZE 256 #define IOPORT_VIRTIO_P9 0xb200 /* Virtio 9P device */ #define IOPORT_VIRTIO_P9_SIZE 256 #define IOPORT_VIRTIO_CONSOLE 0xd200 /* Virtio console device */ -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 6/8] kvm tools: Use dynamic IO port allocation in 9p driver
Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |2 -- tools/kvm/virtio/9p.c | 12 +++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 5dee9d2..a6bcc6a 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -10,8 +10,6 @@ #define IOPORT_START 0x6200 #define IOPORT_SIZE0x400 -#define IOPORT_VIRTIO_P9 0xb200 /* Virtio 9P device */ -#define IOPORT_VIRTIO_P9_SIZE 256 #define IOPORT_VIRTIO_CONSOLE 0xd200 /* Virtio console device */ #define IOPORT_VIRTIO_CONSOLE_SIZE 256 #define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */ diff --git a/tools/kvm/virtio/9p.c b/tools/kvm/virtio/9p.c index e307592..af21463 100644 --- a/tools/kvm/virtio/9p.c +++ b/tools/kvm/virtio/9p.c @@ -50,7 +50,6 @@ static struct pci_device_header virtio_p9_pci_device = { .class = 0x01, .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, .subsys_id = VIRTIO_ID_9P, - .bar[0] = IOPORT_VIRTIO_P9 | PCI_BASE_ADDRESS_SPACE_IO, }; struct p9_dev { @@ -59,6 +58,7 @@ struct p9_dev { u16 config_vector; u32 features; struct virtio_9p_config *config; + u16 base_addr; /* virtio queue */ u16 queue_selector; @@ -96,7 +96,7 @@ static bool virtio_p9_pci_io_in(struct kvm *kvm, u16 port, void *data, int size, unsigned long offset; bool ret = true; - offset = port - IOPORT_VIRTIO_P9; + offset = port - p9dev.base_addr; switch (offset) { case VIRTIO_PCI_HOST_FEATURES: @@ -584,7 +584,7 @@ static bool virtio_p9_pci_io_out(struct kvm *kvm, u16 port, void *data, int size unsigned long offset; bool ret = true; - offset = port - IOPORT_VIRTIO_P9; + offset = port - p9dev.base_addr; switch (offset) { case VIRTIO_MSI_QUEUE_VECTOR: @@ -636,6 +636,7 @@ void virtio_9p__init(struct kvm *kvm, const char *root) { u8 pin, line, dev; u32 i, root_len; + u16 p9_base_addr; p9dev.config = calloc(1, sizeof(*p9dev.config) + sizeof(VIRTIO_P9_TAG)); if (p9dev.config == NULL) @@ -662,7 +663,8 @@ void virtio_9p__init(struct kvm *kvm, const char *root) virtio_p9_pci_device.irq_pin= pin; virtio_p9_pci_device.irq_line = line; + p9_base_addr= ioport__register(IOPORT_EMPTY, virtio_p9_io_ops, IOPORT_SIZE); + virtio_p9_pci_device.bar[0] = p9_base_addr | PCI_BASE_ADDRESS_SPACE_IO; + p9dev.base_addr = p9_base_addr; pci__register(virtio_p9_pci_device, dev); - - ioport__register(IOPORT_VIRTIO_P9, virtio_p9_io_ops, IOPORT_VIRTIO_P9_SIZE); } -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 7/8] kvm tools: Use dynamic IO port allocation in virtio-console
Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |2 -- tools/kvm/virtio/console.c | 11 +++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index a6bcc6a..0c68e8c 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -10,8 +10,6 @@ #define IOPORT_START 0x6200 #define IOPORT_SIZE0x400 -#define IOPORT_VIRTIO_CONSOLE 0xd200 /* Virtio console device */ -#define IOPORT_VIRTIO_CONSOLE_SIZE 256 #define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */ #define IOPORT_VIRTIO_NET_SIZE 256 diff --git a/tools/kvm/virtio/console.c b/tools/kvm/virtio/console.c index a0b..a954f22 100644 --- a/tools/kvm/virtio/console.c +++ b/tools/kvm/virtio/console.c @@ -36,7 +36,6 @@ static struct pci_device_header virtio_console_pci_device = { .class = 0x078000, .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, .subsys_id = VIRTIO_ID_CONSOLE, - .bar[0] = IOPORT_VIRTIO_CONSOLE | PCI_BASE_ADDRESS_SPACE_IO, }; struct con_dev { @@ -50,6 +49,7 @@ struct con_dev { u8 status; u8 isr; u16 queue_selector; + u16 base_addr; void*jobs[VIRTIO_CONSOLE_NUM_QUEUES]; }; @@ -113,7 +113,7 @@ static bool virtio_console_pci_io_device_specific_in(void *data, unsigned long o static bool virtio_console_pci_io_in(struct kvm *kvm, u16 port, void *data, int size, u32 count) { - unsigned long offset = port - IOPORT_VIRTIO_CONSOLE; + unsigned long offset = port - cdev.base_addr; bool ret = true; mutex_lock(cdev.mutex); @@ -181,7 +181,7 @@ static void virtio_console_handle_callback(struct kvm *kvm, void *param) static bool virtio_console_pci_io_out(struct kvm *kvm, u16 port, void *data, int size, u32 count) { - unsigned long offset = port - IOPORT_VIRTIO_CONSOLE; + unsigned long offset = port - cdev.base_addr; bool ret = true; mutex_lock(cdev.mutex); @@ -243,12 +243,15 @@ static struct ioport_operations virtio_console_io_ops = { void virtio_console__init(struct kvm *kvm) { u8 dev, line, pin; + u16 console_base_addr; if (irq__register_device(VIRTIO_ID_CONSOLE, dev, pin, line) 0) return; virtio_console_pci_device.irq_pin = pin; virtio_console_pci_device.irq_line = line; + console_base_addr = ioport__register(IOPORT_EMPTY, virtio_console_io_ops, IOPORT_SIZE); + virtio_console_pci_device.bar[0]= console_base_addr | PCI_BASE_ADDRESS_SPACE_IO; + cdev.base_addr = console_base_addr; pci__register(virtio_console_pci_device, dev); - ioport__register(IOPORT_VIRTIO_CONSOLE, virtio_console_io_ops, IOPORT_VIRTIO_CONSOLE_SIZE); } -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 8/8] kvm tools: Use dynamic IO port allocation in virtio-net
Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |3 --- tools/kvm/virtio/net.c | 12 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 0c68e8c..396928b 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -10,9 +10,6 @@ #define IOPORT_START 0x6200 #define IOPORT_SIZE0x400 -#define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */ -#define IOPORT_VIRTIO_NET_SIZE 256 - #define IOPORT_EMPTY USHRT_MAX struct kvm; diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c index 649bc0f..7e4400a 100644 --- a/tools/kvm/virtio/net.c +++ b/tools/kvm/virtio/net.c @@ -37,7 +37,6 @@ static struct pci_device_header pci_header = { .class = 0x02, .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, .subsys_id = VIRTIO_ID_NET, - .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, }; struct net_device { @@ -51,6 +50,7 @@ struct net_device { u8 status; u8 isr; u16 queue_selector; + u16 base_addr; pthread_t io_rx_thread; pthread_mutex_t io_rx_lock; @@ -166,7 +166,7 @@ static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offse static bool virtio_net_pci_io_in(struct kvm *kvm, u16 port, void *data, int size, u32 count) { - unsigned long offset = port - IOPORT_VIRTIO_NET; + unsigned long offset = port - ndev.base_addr; boolret = true; mutex_lock(ndev.mutex); @@ -230,7 +230,7 @@ static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index) static bool virtio_net_pci_io_out(struct kvm *kvm, u16 port, void *data, int size, u32 count) { - unsigned long offset = port - IOPORT_VIRTIO_NET; + unsigned long offset = port - ndev.base_addr; boolret = true; mutex_lock(ndev.mutex); @@ -387,14 +387,18 @@ void virtio_net__init(const struct virtio_net_parameters *params) { if (virtio_net__tap_init(params)) { u8 dev, line, pin; + u16 net_base_addr; if (irq__register_device(VIRTIO_ID_NET, dev, pin, line) 0) return; pci_header.irq_pin = pin; pci_header.irq_line = line; + net_base_addr = ioport__register(IOPORT_EMPTY, virtio_net_io_ops, IOPORT_SIZE); + pci_header.bar[0] = net_base_addr | PCI_BASE_ADDRESS_SPACE_IO; + ndev.base_addr = net_base_addr; + pci__register(pci_header, dev); - ioport__register(IOPORT_VIRTIO_NET, virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE); virtio_net__io_thread_init(params-kvm); } -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 4/8] kvm tools: Add support for multiple virtio-rng devices
Since multiple hardware rng devices of the same type are currently unsupported by the kernel, this serves more as an example of a basic virtio driver under kvm tools and can be used to debug the PCI layer. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h|2 - tools/kvm/include/kvm/parse-options.h |9 +++ tools/kvm/include/kvm/virtio-rng.h|1 + tools/kvm/kvm-run.c |8 ++- tools/kvm/virtio/rng.c| 126 ++--- 5 files changed, 100 insertions(+), 46 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 47f9fb5..ffa6893 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -18,8 +18,6 @@ #define IOPORT_VIRTIO_CONSOLE_SIZE 256 #define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */ #define IOPORT_VIRTIO_NET_SIZE 256 -#define IOPORT_VIRTIO_RNG 0xf200 /* Virtio network device */ -#define IOPORT_VIRTIO_RNG_SIZE 256 #define IOPORT_EMPTY USHRT_MAX diff --git a/tools/kvm/include/kvm/parse-options.h b/tools/kvm/include/kvm/parse-options.h index 2d5c99e..6bf9a1d 100644 --- a/tools/kvm/include/kvm/parse-options.h +++ b/tools/kvm/include/kvm/parse-options.h @@ -132,6 +132,15 @@ intptr_t defval; .help = (h) \ } +#define OPT_INCR(s, l, v, h)\ +{ \ + .type = OPTION_INCR,\ + .short_name = (s), \ + .long_name = (l), \ + .value = check_vtype(v, int *), \ + .help = (h) \ +} + #define OPT_GROUP(h)\ { \ .type = OPTION_GROUP, \ diff --git a/tools/kvm/include/kvm/virtio-rng.h b/tools/kvm/include/kvm/virtio-rng.h index 7015c1f..c0a413b 100644 --- a/tools/kvm/include/kvm/virtio-rng.h +++ b/tools/kvm/include/kvm/virtio-rng.h @@ -4,5 +4,6 @@ struct kvm; void virtio_rng__init(struct kvm *kvm); +void virtio_rng__delete_all(struct kvm *kvm); #endif /* KVM__RNG_VIRTIO_H */ diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c index adbb25b..76b5782 100644 --- a/tools/kvm/kvm-run.c +++ b/tools/kvm/kvm-run.c @@ -52,6 +52,7 @@ static __thread struct kvm_cpu *current_kvm_cpu; static u64 ram_size; static u8 image_count; +static int virtio_rng; static const char *kernel_cmdline; static const char *kernel_filename; static const char *vmlinux_filename; @@ -66,7 +67,6 @@ static const char *script; static const char *virtio_9p_dir; static bool single_step; static bool readonly_image[MAX_DISK_IMAGES]; -static bool virtio_rng; static bool vnc; extern bool ioport_debug; extern int active_console; @@ -107,7 +107,7 @@ static const struct option options[] = { OPT_CALLBACK('d', disk, NULL, image, Disk image, img_name_parser), OPT_STRING('\0', console, console, serial or virtio, Console to use), - OPT_BOOLEAN('\0', rng, virtio_rng, + OPT_INCR('\0', rng, virtio_rng, Enable virtio Random Number Generator), OPT_STRING('\0', kvm-dev, kvm_dev, kvm-dev, KVM device file), OPT_STRING('\0', virtio-9p, virtio_9p_dir, root dir, @@ -570,7 +570,8 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) virtio_console__init(kvm); if (virtio_rng) - virtio_rng__init(kvm); + while (virtio_rng--) + virtio_rng__init(kvm); if (!network) network = DEFAULT_NETWORK; @@ -631,6 +632,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) } virtio_blk__delete_all(kvm); + virtio_rng__delete_all(kvm); disk_image__close_all(kvm-disks, image_count); kvm__delete(kvm); diff --git a/tools/kvm/virtio/rng.c b/tools/kvm/virtio/rng.c index 9bd0098..f71a59b 100644 --- a/tools/kvm/virtio/rng.c +++ b/tools/kvm/virtio/rng.c @@ -5,7 +5,6 @@ #include kvm/disk-image.h #include kvm/virtio.h #include kvm/ioport.h -#include kvm/mutex.h #include kvm/util.h #include kvm/kvm.h #include kvm/pci.h @@ -15,6 +14,7 @@ #include linux/virtio_ring.h #include linux/virtio_rng.h +#include linux/list.h #include fcntl.h #include sys/types.h #include sys/stat.h @@ -23,18 +23,17 @@ #define NUM_VIRT_QUEUES1 #define VIRTIO_RNG_QUEUE_SIZE 128 -static struct pci_device_header virtio_rng_pci_device = { - .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, - .device_id = PCI_DEVICE_ID_VIRTIO_RNG, - .header_type= PCI_HEADER_TYPE_NORMAL, - .revision_id= 0, - .class = 0x01, - .subsys_vendor_id =
Re: Restoring saved guest causes guest to reboot
On 05/25/2011 09:49 AM, Markus Schade wrote: Git bisect tells me that this is the first bad commit: -%- aff48baa34c033318ad322ecbf2e4bcd891b29ca is the first bad commit commit aff48baa34c033318ad322ecbf2e4bcd891b29ca Author: Avi Kivity a...@redhat.com Date: Sun Dec 5 18:56:11 2010 +0200 KVM: Fetch guest cr3 from hardware on demand Instead of syncing the guest cr3 every exit, which is expensince on vmx with ept enabled, sync it only on demand. [sheng: fix incorrect cr3 seen by Windows XP] Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Avi Kivity a...@redhat.com Does your machine have ept? (cat /sys/module/kvm_intel/parameters/ept) -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On Thu, May 26, 2011 at 09:31:50AM +0300, Avi Kivity wrote: On 05/25/2011 09:18 PM, Marcelo Tosatti wrote: Commit fa4491a6b667304 moved the permission check for io instructions to the -check_perm callback. It failed to copy the port value from RDX register for string and in,out ax,dx instructions. Fix it by reading RDX register at decode stage when appropriate. Fixes FC8.32 installation. +#define Sse (118) /* SSE Vector instruction */ 19/20 are still available, no need to go 64-bit just yet. /* Misc flags */ -#define Prot(121) /* instruction generates #UD if not in prot-mode */ +case SrcDX: +c-src.type = OP_REG; +c-src.bytes = c-op_bytes; Needs to be 2. Otherwise we'll see extra bits from edx, or lose bits from dx if it's a 1-byte instruction. But those extra bits will be dropped by check_perm_in() anyway. Can c-op_bytes ever be 1? +c-src.addr.reg =c-regs[VCPU_REGS_RDX]; +fetch_register_operand(c-src); +break; } if (rc != X86EMUL_CONTINUE) @@ -3649,6 +3657,12 @@ done_prefixes: c-dst.addr.mem.seg = VCPU_SREG_ES; c-dst.val = 0; break; +case DstDX: +c-dst.type = OP_REG; +c-dst.bytes = c-op_bytes; 2 again. +c-dst.addr.reg =c-regs[VCPU_REGS_RDX]; +fetch_register_operand(c-dst); +break; case ImplicitOps: /* Special instructions do their own operand decoding. */ default: We also need to unify Src/Dst decode eventually. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On 05/26/2011 09:55 AM, Gleb Natapov wrote: + case SrcDX: + c-src.type = OP_REG; + c-src.bytes = c-op_bytes; Needs to be 2. Otherwise we'll see extra bits from edx, or lose bits from dx if it's a 1-byte instruction. But those extra bits will be dropped by check_perm_in() anyway. It isn't nice to depend on it. btw, Marcelo, the patch should also make use of the decode during execution: case 0xef: /* out dx,(e/r)ax */ c-dst.val = c-regs[VCPU_REGS_RDX]; ^^ can drop do_io_out: ops-pio_out_emulated(ctxt, c-src.bytes, c-dst.val, c-src.val, 1); c-dst.type = OP_NONE;/* Disable writeback. */ break; Can c-op_bytes ever be 1? in %dx, %al -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On 05/26/2011 10:02 AM, Avi Kivity wrote: Can c-op_bytes ever be 1? in %dx, %al er, that doesn't change op_bytes. Still, op_bytes is irrelevant for SrcDX, the 16-bit version is always used. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On Thu, May 26, 2011 at 10:04:24AM +0300, Avi Kivity wrote: On 05/26/2011 10:02 AM, Avi Kivity wrote: Can c-op_bytes ever be 1? in %dx, %al er, that doesn't change op_bytes. Yep. Still, op_bytes is irrelevant for SrcDX, the 16-bit version is always used. If SrcDX/DstDX will be used only for decoding in/out instruction then yes. Otherwise it is nice to have more general decoder. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On 05/26/2011 09:07 AM, Gleb Natapov wrote: Still, op_bytes is irrelevant for SrcDX, the 16-bit version is always used. If SrcDX/DstDX will be used only for decoding in/out instruction then yes. Otherwise it is nice to have more general decoder. Not counting instructions that read/write many registers (rdmsr/wrmsr, mul/imul/div/idiv, rdtsc, etc.), I think the only other instruction with an implicit DstDX is cwd/cdq/cqo. Since cwd/cdq/cqo needs c-dst.bytes = c-src.bytes (not op_bytes) I think DstDX is not really reusable beyond port instructions. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 4/4] rbd: Add bdrv_truncate implementation
Am 25.05.2011 22:34, schrieb Josh Durgin: Signed-off-by: Josh Durgin josh.dur...@dreamhost.com --- block/rbd.c | 15 +++ 1 files changed, 15 insertions(+), 0 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index a44d160..b95b1eb 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -688,6 +688,20 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs) return info.size; } +static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset) +{ +BDRVRBDState *s = bs-opaque; +int r; + +r = rbd_resize(s-image, offset); +if (r 0) { +error_report(failed to resize rbd image); +return -EIO; +} Don't print an error message here. The caller will do it, too, so we end up with two error messages saying the same. What kind of error code does rbd_resize return? If it is a valid errno value, you should return r instead of turning it into EIO. Kevin -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 2/4] rbd: allow configuration of rados from the rbd filename
Am 25.05.2011 22:34, schrieb Josh Durgin: The new format is rbd:pool/image[@snapshot][:option1=value1[:option2=value2...]] Each option is used to configure rados, and may be any Ceph option, or conf. The conf option specifies a Ceph configuration file to read. This allows rbd volumes from more than one Ceph cluster to be used by specifying different monitor addresses, as well as having different logging levels or locations for different volumes. Signed-off-by: Josh Durgin josh.dur...@dreamhost.com --- block/rbd.c | 119 ++ 1 files changed, 102 insertions(+), 17 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index 2cee70d..d346a21 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -23,13 +23,17 @@ /* * When specifying the image filename use: * - * rbd:poolname/devicename + * rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]] * * poolname must be the name of an existing rados pool * * devicename is the basename for all objects used to * emulate the raw device. * + * Each option given is used to configure rados, and may be + * any Ceph option, or conf. The conf option specifies + * a Ceph configuration file to read. + * * Metadata information (image size, ...) is stored in an * object with the name devicename.rbd. * @@ -123,7 +127,8 @@ static int qemu_rbd_next_tok(char *dst, int dst_len, static int qemu_rbd_parsename(const char *filename, char *pool, int pool_len, char *snap, int snap_len, - char *name, int name_len) + char *name, int name_len, + char *conf, int conf_len) { const char *start; char *p, *buf; @@ -135,28 +140,84 @@ static int qemu_rbd_parsename(const char *filename, buf = qemu_strdup(start); p = buf; +*snap = '\0'; +*conf = '\0'; ret = qemu_rbd_next_tok(pool, pool_len, p, '/', pool name, p); if (ret 0 || !p) { ret = -EINVAL; goto done; } -ret = qemu_rbd_next_tok(name, name_len, p, '@', object name, p); -if (ret 0) { -goto done; + +if (strchr(p, '@')) { +ret = qemu_rbd_next_tok(name, name_len, p, '@', object name, p); +if (ret 0) { +goto done; +} +ret = qemu_rbd_next_tok(snap, snap_len, p, ':', snap name, p); +} else { +ret = qemu_rbd_next_tok(name, name_len, p, ':', object name, p); } -if (!p) { -*snap = '\0'; +if (ret 0 || !p) { goto done; } -ret = qemu_rbd_next_tok(snap, snap_len, p, '\0', snap name, p); +ret = qemu_rbd_next_tok(conf, conf_len, p, '\0', configuration, p); done: qemu_free(buf); return ret; } +static int qemu_rbd_set_conf(rados_t cluster, const char *conf) +{ +char *p, *buf; +char name[RBD_MAX_CONF_NAME_SIZE]; +char value[RBD_MAX_CONF_VAL_SIZE]; +int ret = 0; + +buf = qemu_strdup(conf); +p = buf; + +while (p) { +ret = qemu_rbd_next_tok(name, sizeof(name), p, +'=', conf option name, p); +if (ret 0) { +break; +} + +if (!p) { +error_report(conf option %s has no value, name); +ret = -EINVAL; +break; +} + +ret = qemu_rbd_next_tok(value, sizeof(value), p, +':', conf option value, p); +if (ret 0) { +break; +} + +if (strncmp(name, conf, strlen(conf))) { Do you really only want to check if name _starts_ with conf? Kevin -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] KVM: x86 emulator: Avoid clearing the whole decode_cache
On 05/25/2011 05:09 AM, Takuya Yoshikawa wrote: From: Takuya Yoshikawayoshikawa.tak...@oss.ntt.co.jp During tracing the emulator, we noticed that init_emulate_ctxt() sometimes took a bit longer time than we expected. This patch is for mitigating the problem by some degree. By looking into the function, we soon notice that it clears the whole decode_cache whose size is about 2.5K bytes now. Furthermore, most of the bytes are taken for the two read_cache arrays, which are used only by a few instructions. Considering the fact that we are not assuming the cache arrays have been cleared when we store actual data, we do not need to clear the arrays: 2K bytes elimination. In addition, we can avoid clearing the fetch_cache and regs arrays. This patch changes the initialization not to clear the arrays. On our 64-bit host, init_emulate_ctxt() becomes 0.3 to 0.5us faster with this patch applied. Thanks, applied. It strikes me that initializing the emulator in x86.c is the wrong thing. We should move the entire thing to x86_decode_insn(). We'll need a few more callbacks for that, though (register, eflags); eventually we can read just registers that are used and write only registers that were updated. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] KVM: x86 emulator: Avoid clearing the whole decode_cache
On Thu, May 26, 2011 at 11:19:03AM +0300, Avi Kivity wrote: On 05/25/2011 05:09 AM, Takuya Yoshikawa wrote: From: Takuya Yoshikawayoshikawa.tak...@oss.ntt.co.jp During tracing the emulator, we noticed that init_emulate_ctxt() sometimes took a bit longer time than we expected. This patch is for mitigating the problem by some degree. By looking into the function, we soon notice that it clears the whole decode_cache whose size is about 2.5K bytes now. Furthermore, most of the bytes are taken for the two read_cache arrays, which are used only by a few instructions. Considering the fact that we are not assuming the cache arrays have been cleared when we store actual data, we do not need to clear the arrays: 2K bytes elimination. In addition, we can avoid clearing the fetch_cache and regs arrays. This patch changes the initialization not to clear the arrays. On our 64-bit host, init_emulate_ctxt() becomes 0.3 to 0.5us faster with this patch applied. Thanks, applied. It strikes me that initializing the emulator in x86.c is the wrong thing. We should move the entire thing to x86_decode_insn(). We initialize it in task switch and interrupt injection code too. We'll need a few more callbacks for that, though (register, eflags); eventually we can read just registers that are used and write only registers that were updated. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On Thu, May 26, 2011 at 09:49:21AM +0200, Paolo Bonzini wrote: On 05/26/2011 09:07 AM, Gleb Natapov wrote: Still, op_bytes is irrelevant for SrcDX, the 16-bit version is always used. If SrcDX/DstDX will be used only for decoding in/out instruction then yes. Otherwise it is nice to have more general decoder. Not counting instructions that read/write many registers (rdmsr/wrmsr, mul/imul/div/idiv, rdtsc, etc.), I think the only other instruction with an implicit DstDX is cwd/cdq/cqo. Since cwd/cdq/cqo needs c-dst.bytes = c-src.bytes (not op_bytes) I think DstDX is not really reusable beyond port instructions. Why would c-dst.bytes != c-src.bytes for cwd/cdq/cqo if we'll set c-dst.bytes to op_bytes during decode? -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/10] qemu-kvm: Cleanup and switch to upstream - Part III
On 05/20/2011 08:12 PM, Jan Kiszka wrote: This is a rather short round as the next and final one cannot be split up very well. We start with three code cleanup patches, then work towards using upstream kvm_cpu_exec, and finally rework the core's PIO access management used for device assignment. Please review/merge. Thanks, applied. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V5 2/6 net-next] netdevice.h: Add zero-copy flag in netdevice
On Wed, May 25, 2011 at 03:49:40PM -0700, Shirley Ma wrote: On Fri, 2011-05-20 at 02:41 +0300, Michael S. Tsirkin wrote: So the requirements are - data must be released in a timely fashion (e.g. unlike virtio-net tun or bridge) The current patch doesn't enable tun zero-copy. tun will copy data It's not an issue now. We can disallow macvtap attach to bridge when zero-copy is enabled. Attach macvtap to a tun device though. Or e.g. veth device ... So there should be so generic way to disable zerocopy. It can either be a whitelist or a blacklist. - SG support - HIGHDMA support (on arches where this makes sense) This can be checked by device flags. OK, but pls note that SG can get turned off dynamically. - no filtering based on data (data is mapped in guest) - on fast path no calls to skb_copy, skb_clone, pskb_copy, pskb_expand_head as these are slow Any calls to skb_copy, skb_clone, pskb_copy, pskb_expand_head will do a copy. The performance should be the same as none zero-copy case before. I'm guessing a copy is cheaper than get_user_pages+copy+put_page. But maybe not by much. Care checking that? I have done/tested the patch V6, will send it out for review tomorrow. I am looking at where there are some cases, skb remains the same for filtering. To reliably filter on data I think we'll need to copy it first, otherwise guest can change it. Most filters only look at the header though. First 2 requirements are a must, all other requirements are just dependencies to make sure zero copy will be faster than non zero copy. Using a new feature bit is probably the simplest approach to this. macvtap on top of most physical NICs most likely works correctly so it seems a bit more work than it needs to be, but it's also the safest one I think ... For macvtap/vhost zero-copy we can use SG HIGHDMA to enable it, it looks safe to me once patching skb_copy, skb_clone, pskb_copy, pskb_expand_head. To extend zero-copy in other usages, we can have a new feature bit later. Is that reasonable? Thanks Shirley Is the problem is extra work needed to extend feature bits? -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks
On Thu, 2011-05-26 at 09:42 +0300, Sasha Levin wrote: Allow specifying an optional parameter when registering an ioport range. The callback functions provided by the registering module will be called with the same parameter. This may be used to keep context during callbacks on IO operations. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |3 ++ tools/kvm/ioport.c | 54 +-- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 8253938..2a8d74d 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -25,11 +25,14 @@ struct kvm; struct ioport_operations { bool (*io_in)(struct kvm *kvm, u16 port, void *data, int size, u32 count); bool (*io_out)(struct kvm *kvm, u16 port, void *data, int size, u32 count); + bool (*io_in_param)(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param); + bool (*io_out_param)(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param); So why not make that 'param' unconditional for io_in and io_out and just pass NULL if it's not needed? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks
On Thu, 2011-05-26 at 11:53 +0300, Pekka Enberg wrote: On Thu, 2011-05-26 at 09:42 +0300, Sasha Levin wrote: Allow specifying an optional parameter when registering an ioport range. The callback functions provided by the registering module will be called with the same parameter. This may be used to keep context during callbacks on IO operations. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |3 ++ tools/kvm/ioport.c | 54 +-- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 8253938..2a8d74d 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -25,11 +25,14 @@ struct kvm; struct ioport_operations { bool (*io_in)(struct kvm *kvm, u16 port, void *data, int size, u32 count); bool (*io_out)(struct kvm *kvm, u16 port, void *data, int size, u32 count); + bool (*io_in_param)(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param); + bool (*io_out_param)(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param); So why not make that 'param' unconditional for io_in and io_out and just pass NULL if it's not needed? I've wanted to keep the original interface clean, Most of the IO port users don't (and probably won't) require a parameter. -- Sasha. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks
On Thu, May 26, 2011 at 12:02 PM, Sasha Levin levinsasha...@gmail.com wrote: On Thu, 2011-05-26 at 11:53 +0300, Pekka Enberg wrote: On Thu, 2011-05-26 at 09:42 +0300, Sasha Levin wrote: Allow specifying an optional parameter when registering an ioport range. The callback functions provided by the registering module will be called with the same parameter. This may be used to keep context during callbacks on IO operations. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h | 3 ++ tools/kvm/ioport.c | 54 +-- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 8253938..2a8d74d 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -25,11 +25,14 @@ struct kvm; struct ioport_operations { bool (*io_in)(struct kvm *kvm, u16 port, void *data, int size, u32 count); bool (*io_out)(struct kvm *kvm, u16 port, void *data, int size, u32 count); + bool (*io_in_param)(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param); + bool (*io_out_param)(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param); So why not make that 'param' unconditional for io_in and io_out and just pass NULL if it's not needed? I've wanted to keep the original interface clean, Most of the IO port users don't (and probably won't) require a parameter. Well now struct ioport_operations isn't very clean is it - or the code that needs to determine which function pointer to call?-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On 05/26/2011 10:26 AM, Gleb Natapov wrote: Why would c-dst.bytes != c-src.bytes for cwd/cdq/cqo if we'll set c-dst.bytes to op_bytes during decode? Duh, you're right, cwd/cdq/cqo uses SrcAcc which has c-src.bytes = (c-d ByteOp) ? 1 : c-op_bytes; so in practice c-src.bytes = c-op_bytes. I still find it confusing that DstDX would use c-op_bytes without honoring ByteOp unlike pretty much everything else; but yes, there is a possible use of DstDX outside in/out. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks
On Thu, 2011-05-26 at 12:04 +0300, Pekka Enberg wrote: On Thu, May 26, 2011 at 12:02 PM, Sasha Levin levinsasha...@gmail.com wrote: On Thu, 2011-05-26 at 11:53 +0300, Pekka Enberg wrote: On Thu, 2011-05-26 at 09:42 +0300, Sasha Levin wrote: Allow specifying an optional parameter when registering an ioport range. The callback functions provided by the registering module will be called with the same parameter. This may be used to keep context during callbacks on IO operations. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |3 ++ tools/kvm/ioport.c | 54 +-- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 8253938..2a8d74d 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -25,11 +25,14 @@ struct kvm; struct ioport_operations { bool (*io_in)(struct kvm *kvm, u16 port, void *data, int size, u32 count); bool (*io_out)(struct kvm *kvm, u16 port, void *data, int size, u32 count); + bool (*io_in_param)(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param); + bool (*io_out_param)(struct kvm *kvm, u16 port, void *data, int size, u32 count, void *param); So why not make that 'param' unconditional for io_in and io_out and just pass NULL if it's not needed? I've wanted to keep the original interface clean, Most of the IO port users don't (and probably won't) require a parameter. Well now struct ioport_operations isn't very clean is it - or the code that needs to determine which function pointer to call?-) struct ioport_operations is a bit more messy, but it's one spot instead of adding a 'parameter' to each module that doesn't really need it. My assumption is that most ioport users now and in the future won't need it, it just solves several special cases more easily (multiple devices which share same handling functions). -- Sasha. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks
On Thu, May 26, 2011 at 12:14 PM, Sasha Levin levinsasha...@gmail.com wrote: I've wanted to keep the original interface clean, Most of the IO port users don't (and probably won't) require a parameter. Well now struct ioport_operations isn't very clean is it - or the code that needs to determine which function pointer to call?-) struct ioport_operations is a bit more messy, but it's one spot instead of adding a 'parameter' to each module that doesn't really need it. My assumption is that most ioport users now and in the future won't need it, it just solves several special cases more easily (multiple devices which share same handling functions). Hey, that's not an excuse to make struct ioport_operations 'bit messy'! Look at any kernel code that uses ops like we do here and you will see we don't do APIs like this. One option here is to rename 'struct ioport_entry' to 'struct ioport' and pass a pointer to that as the first argument to all of the ops. That's what most APIs in the kernel do anyway. Pekka -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On 05/26/2011 11:02 AM, Gleb Natapov wrote: We can make it honor ByteOp. There will be no instruction that will specify DstDX | ByteOp though. in %dx, %al and out %al, %dx will via D2bv. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On Thu, May 26, 2011 at 11:23:44AM +0200, Paolo Bonzini wrote: On 05/26/2011 11:02 AM, Gleb Natapov wrote: We can make it honor ByteOp. There will be no instruction that will specify DstDX | ByteOp though. in %dx, %al and out %al, %dx will via D2bv. Yeah. Should ignore ByteOp then. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks
On Thu, 2011-05-26 at 12:20 +0300, Pekka Enberg wrote: On Thu, May 26, 2011 at 12:14 PM, Sasha Levin levinsasha...@gmail.com wrote: I've wanted to keep the original interface clean, Most of the IO port users don't (and probably won't) require a parameter. Well now struct ioport_operations isn't very clean is it - or the code that needs to determine which function pointer to call?-) struct ioport_operations is a bit more messy, but it's one spot instead of adding a 'parameter' to each module that doesn't really need it. My assumption is that most ioport users now and in the future won't need it, it just solves several special cases more easily (multiple devices which share same handling functions). Hey, that's not an excuse to make struct ioport_operations 'bit messy'! Look at any kernel code that uses ops like we do here and you will see we don't do APIs like this. One option here is to rename 'struct ioport_entry' to 'struct ioport' and pass a pointer to that as the first argument to all of the ops. That's what most APIs in the kernel do anyway. Why do it like that? this way users of the callback functions will need to know the internal structure of struct ioport_entry. -- Sasha. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks
On Thu, May 26, 2011 at 12:38 PM, Sasha Levin levinsasha...@gmail.com wrote: One option here is to rename 'struct ioport_entry' to 'struct ioport' and pass a pointer to that as the first argument to all of the ops. That's what most APIs in the kernel do anyway. Why do it like that? this way users of the callback functions will need to know the internal structure of struct ioport_entry. Look at 'struct inode' or similar data structure in the kernel. That's how we do it. You can then also do s/params/priv/. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks
Hi Sasha, On Thu, May 26, 2011 at 12:38 PM, Sasha Levin levinsasha...@gmail.com wrote: One option here is to rename 'struct ioport_entry' to 'struct ioport' and pass a pointer to that as the first argument to all of the ops. That's what most APIs in the kernel do anyway. Why do it like that? this way users of the callback functions will need to know the internal structure of struct ioport_entry. On Thu, May 26, 2011 at 12:43 PM, Pekka Enberg penb...@kernel.org wrote: Look at 'struct inode' or similar data structure in the kernel. That's how we do it. You can then also do s/params/priv/. Btw, the whole notion of 'internal structure' for structs in C code is a pretty broken concept. In most cases, you just end up passing untyped fragments of the data to callers which makes following the data flow in code difficult. Passing 'struct ioport' down to the code makes the code more obvious and readable. Encapsulation is important but emulating that with hiding structs in .c files isn't helpful at all. Face it, there's no proper support for that in C so you just need to rely on conventions to do it. Pekka -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On Thu, May 26, 2011 at 11:00:24AM +0200, Paolo Bonzini wrote: On 05/26/2011 10:26 AM, Gleb Natapov wrote: Why would c-dst.bytes != c-src.bytes for cwd/cdq/cqo if we'll set c-dst.bytes to op_bytes during decode? Duh, you're right, cwd/cdq/cqo uses SrcAcc which has c-src.bytes = (c-d ByteOp) ? 1 : c-op_bytes; so in practice c-src.bytes = c-op_bytes. I still find it confusing that DstDX would use c-op_bytes without honoring ByteOp unlike pretty much everything else; but yes, there is a possible use of DstDX outside in/out. We can make it honor ByteOp. There will be no instruction that will specify DstDX | ByteOp though. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
Hello, after I tried updating our production host to 0.14.0 or 0.14.1, our windows terminal server stopped booting. Here's BSOD screen: http://nik.lbox.cz/public/wincrash.png reverting to 0.13.5 fixes the problem. I can't reproduce this on testing hardware though :( exact guest version: Microsoft Windows Server 2008 R2 Enterprise 6.1.7600 build 7600 x86_64, 4GB RAM, 1CPU host is currently running 2.6.38.7 (but I had the problem also with 2.6.37) it's 8core intel E5310 with 16GB RAM since the hosts on which I'm experiencing this problem are production machines, my possibilities of testing are a bit limited (bisecting this will be quite problematic), but I'll do what I can to help fix this... Could somebody please have a look on it? thanks a lot in advance! n. -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - pgpR7DT5kOvBM.pgp Description: PGP signature
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
On Thu, May 26, 2011 at 11:59:10AM +0200, Nikola Ciprich wrote: Hello, after I tried updating our production host to 0.14.0 or 0.14.1, our windows terminal server stopped booting. Here's BSOD screen: http://nik.lbox.cz/public/wincrash.png reverting to 0.13.5 fixes the problem. I can't reproduce this on testing hardware though :( exact guest version: Microsoft Windows Server 2008 R2 Enterprise 6.1.7600 build 7600 x86_64, 4GB RAM, 1CPU host is currently running 2.6.38.7 (but I had the problem also with 2.6.37) it's 8core intel E5310 with 16GB RAM What is you command line? since the hosts on which I'm experiencing this problem are production machines, my possibilities of testing are a bit limited (bisecting this will be quite problematic), but I'll do what I can to help fix this... Could somebody please have a look on it? thanks a lot in advance! You can make a copy from your production image, install 0.14 version in different place from 0.13 and experiment. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
Hello Gleb! What is you command line? currently it's: /usr/bin/qemu-kvm -S -M pc-0.13 -enable-kvm -m 4096 -smp 1,sockets=1,cores=1,threads=1 -name vmwts02 -uuid 1e501300-dc48-11df-a690-00304834195b -nodefconfig -nodefaults -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/vmwts02.monitor,server,nowait -mon chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot c -drive file=/dev/vgshared/vmwts02-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -drive if=none,media=cdrom,id=drive-ide0-1-0,readonly=on,format=raw -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 -netdev tap,fd=15,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:61:01:00,bus=pci.0,addr=0x3 -usb -vnc 0.0.0.0:30802 -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 but this is with 0.13.5 (so it's running well). it's started using libvirt, I guess with 0.14.0 the command line should be very similar. (I can provide it later if needed). You can make a copy from your production image, install 0.14 version in different place from 0.13 and experiment. yup, I think I've tried it also with exact copy and wasn't able to reproduce it, but I'll try it again and report soon. n. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - pgpg199JWTMf1.pgp Description: PGP signature
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
On Thu, May 26, 2011 at 12:20:56PM +0200, Nikola Ciprich wrote: You can make a copy from your production image, install 0.14 version in different place from 0.13 and experiment. yup, I think I've tried it also with exact copy and wasn't able to reproduce it, but I'll try it again and report soon. Then check image file permission please. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 1/8] kvm tools: Add optional parameter used in ioport callbacks
Allow specifying an optional parameter when registering an ioport range. The callback functions provided by the registering module will be called with the same parameter. This may be used to keep context during callbacks on IO operations. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/hw/rtc.c | 10 +++--- tools/kvm/hw/serial.c |6 ++-- tools/kvm/hw/vesa.c|6 ++-- tools/kvm/include/kvm/ioport.h | 14 ++-- tools/kvm/ioport.c | 71 +-- tools/kvm/pci.c| 12 +++--- tools/kvm/virtio/9p.c |6 ++-- tools/kvm/virtio/blk.c |6 ++-- tools/kvm/virtio/console.c |6 ++-- tools/kvm/virtio/net.c |6 ++-- tools/kvm/virtio/rng.c |6 ++-- 11 files changed, 74 insertions(+), 75 deletions(-) diff --git a/tools/kvm/hw/rtc.c b/tools/kvm/hw/rtc.c index 6735e82..146f660 100644 --- a/tools/kvm/hw/rtc.c +++ b/tools/kvm/hw/rtc.c @@ -19,7 +19,7 @@ static inline unsigned char bin2bcd(unsigned val) return ((val / 10) 4) + val % 10; } -static bool cmos_ram_data_in(struct kvm *kvm, u16 port, void *data, int size, u32 count) +static bool cmos_ram_data_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { struct tm *tm; time_t ti; @@ -52,7 +52,7 @@ static bool cmos_ram_data_in(struct kvm *kvm, u16 port, void *data, int size, u3 return true; } -static bool cmos_ram_data_out(struct kvm *kvm, u16 port, void *data, int size, u32 count) +static bool cmos_ram_data_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { return true; } @@ -62,7 +62,7 @@ static struct ioport_operations cmos_ram_data_ioport_ops = { .io_in = cmos_ram_data_in, }; -static bool cmos_ram_index_out(struct kvm *kvm, u16 port, void *data, int size, u32 count) +static bool cmos_ram_index_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { u8 value; @@ -82,6 +82,6 @@ static struct ioport_operations cmos_ram_index_ioport_ops = { void rtc__init(void) { /* PORT 0070-007F - CMOS RAM/RTC (REAL TIME CLOCK) */ - ioport__register(0x0070, cmos_ram_index_ioport_ops, 1); - ioport__register(0x0071, cmos_ram_data_ioport_ops, 1); + ioport__register(0x0070, cmos_ram_index_ioport_ops, 1, NULL); + ioport__register(0x0071, cmos_ram_data_ioport_ops, 1, NULL); } diff --git a/tools/kvm/hw/serial.c b/tools/kvm/hw/serial.c index beebbba..1199264 100644 --- a/tools/kvm/hw/serial.c +++ b/tools/kvm/hw/serial.c @@ -164,7 +164,7 @@ static struct serial8250_device *find_device(u16 port) return NULL; } -static bool serial8250_out(struct kvm *kvm, u16 port, void *data, int size, u32 count) +static bool serial8250_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { struct serial8250_device *dev; u16 offset; @@ -252,7 +252,7 @@ out_unlock: return ret; } -static bool serial8250_in(struct kvm *kvm, u16 port, void *data, int size, u32 count) +static bool serial8250_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { struct serial8250_device *dev; u16 offset; @@ -339,7 +339,7 @@ static struct ioport_operations serial8250_ops = { static void serial8250__device_init(struct kvm *kvm, struct serial8250_device *dev) { - ioport__register(dev-iobase, serial8250_ops, 8); + ioport__register(dev-iobase, serial8250_ops, 8, NULL); kvm__irq_line(kvm, dev-irq, 0); } diff --git a/tools/kvm/hw/vesa.c b/tools/kvm/hw/vesa.c index 6ab07ee..ec4788c 100644 --- a/tools/kvm/hw/vesa.c +++ b/tools/kvm/hw/vesa.c @@ -26,12 +26,12 @@ static char videomem[VESA_MEM_SIZE]; -static bool vesa_pci_io_in(struct kvm *kvm, u16 port, void *data, int size, u32 count) +static bool vesa_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { return true; } -static bool vesa_pci_io_out(struct kvm *kvm, u16 port, void *data, int size, u32 count) +static bool vesa_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { return true; } @@ -75,7 +75,7 @@ void vesa__init(struct kvm *kvm) pci__register(vesa_pci_device, dev); - ioport__register(IOPORT_VESA, vesa_io_ops, IOPORT_VESA_SIZE); + ioport__register(IOPORT_VESA, vesa_io_ops, IOPORT_VESA_SIZE, NULL); kvm__register_mmio(VESA_MEM_ADDR, VESA_MEM_SIZE, vesa_mmio_callback); diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 8253938..67b4a6f 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -1,6 +1,8 @@ #ifndef KVM__IOPORT_H #define KVM__IOPORT_H +#include kvm/rbtree-interval.h + #include stdbool.h #include asm/types.h
[PATCH v3 2/8] kvm tools: Add basic ioport dynamic allocation
Add a very simple allocation of ioports. This prevents the need to coordinate ioports between different modules. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |7 ++- tools/kvm/ioport.c | 24 +++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 67b4a6f..49f919f 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -9,6 +9,9 @@ /* some ports we reserve for own use */ #define IOPORT_DBG 0xe0 +#define IOPORT_START 0x6200 +#define IOPORT_SIZE0x400 + #define IOPORT_VESA0xa200 #define IOPORT_VESA_SIZE 256 #define IOPORT_VIRTIO_P9 0xb200 /* Virtio 9P device */ @@ -22,6 +25,8 @@ #define IOPORT_VIRTIO_RNG 0xf200 /* Virtio network device */ #define IOPORT_VIRTIO_RNG_SIZE 256 +#define IOPORT_EMPTY USHRT_MAX + struct kvm; struct ioport { @@ -37,7 +42,7 @@ struct ioport_operations { void ioport__setup_legacy(void); -void ioport__register(u16 port, struct ioport_operations *ops, int count, void *param); +u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void *param); static inline u8 ioport__read8(u8 *data) { diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c index 92ad152..492ce16 100644 --- a/tools/kvm/ioport.c +++ b/tools/kvm/ioport.c @@ -3,6 +3,9 @@ #include kvm/kvm.h #include kvm/util.h +#include kvm/rbtree-interval.h +#include kvm/mutex.h + #include linux/kvm.h /* for KVM_EXIT_* */ #include linux/types.h @@ -14,9 +17,23 @@ #define ioport_node(n) rb_entry(n, struct ioport, node) +static u16 free_io_port_idx; +DEFINE_MUTEX(free_io_port_idx_lock); static struct rb_root ioport_tree = RB_ROOT; bool ioport_debug; +static u16 ioport__find_free_port(void) +{ + u16 free_port; + + mutex_lock(free_io_port_idx_lock); + free_port = IOPORT_START + free_io_port_idx * IOPORT_SIZE; + free_io_port_idx++; + mutex_unlock(free_io_port_idx_lock); + + return free_port; +} + static struct ioport *ioport_search(struct rb_root *root, u64 addr) { struct rb_int_node *node; @@ -61,10 +78,13 @@ static struct ioport_operations dummy_write_only_ioport_ops = { .io_out = dummy_io_out, }; -void ioport__register(u16 port, struct ioport_operations *ops, int count, void *param) +u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void *param) { struct ioport *entry; + if (port == IOPORT_EMPTY) + port = ioport__find_free_port(); + entry = ioport_search(ioport_tree, port); if (entry) { pr_warning(ioport re-registered: %x, port); @@ -82,6 +102,8 @@ void ioport__register(u16 port, struct ioport_operations *ops, int count, void * }; ioport_insert(ioport_tree, entry); + + return port; } static const char *to_direction(int direction) -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 3/8] kvm tools: Use ioport context to control blk devices
Since ioports now has the ability to pass context to its callbacks, we can implement multiple blk devices more efficiently. We can get a ptr to the 'current' blk dev on each ioport call, which means that we don't need to keep track of the blk device allocation and ioport distribution within the module. The advantages are easier management of multiple blk devices and removal of any hardcoded limits to the amount of possible blk devices. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |2 - tools/kvm/virtio/blk.c | 71 +-- 2 files changed, 24 insertions(+), 49 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 49f919f..e53c03c 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -16,8 +16,6 @@ #define IOPORT_VESA_SIZE 256 #define IOPORT_VIRTIO_P9 0xb200 /* Virtio 9P device */ #define IOPORT_VIRTIO_P9_SIZE 256 -#define IOPORT_VIRTIO_BLK 0xc200 /* Virtio block device */ -#define IOPORT_VIRTIO_BLK_SIZE 0x200 #define IOPORT_VIRTIO_CONSOLE 0xd200 /* Virtio console device */ #define IOPORT_VIRTIO_CONSOLE_SIZE 256 #define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */ diff --git a/tools/kvm/virtio/blk.c b/tools/kvm/virtio/blk.c index 5720c7f..a8f9d8c 100644 --- a/tools/kvm/virtio/blk.c +++ b/tools/kvm/virtio/blk.c @@ -14,6 +14,7 @@ #include linux/virtio_ring.h #include linux/virtio_blk.h +#include linux/list.h #include linux/types.h #include pthread.h @@ -34,15 +35,16 @@ struct blk_dev_job { struct blk_dev { pthread_mutex_t mutex; + struct list_headlist; struct virtio_blk_configblk_config; struct disk_image *disk; + u64 base_addr; u32 host_features; u32 guest_features; u16 config_vector; u8 status; u8 isr; - u8 idx; /* virtio queue */ u16 queue_selector; @@ -52,7 +54,7 @@ struct blk_dev { struct pci_device_headerpci_hdr; }; -static struct blk_dev *bdevs[VIRTIO_BLK_MAX_DEV]; +static LIST_HEAD(bdevs); static bool virtio_blk_dev_in(struct blk_dev *bdev, void *data, unsigned long offset, int size, u32 count) { @@ -66,22 +68,14 @@ static bool virtio_blk_dev_in(struct blk_dev *bdev, void *data, unsigned long of return true; } -/* Translate port into device id + offset in that device addr space */ -static void virtio_blk_port2dev(u16 port, u16 base, u16 size, u16 *dev_idx, u16 *offset) -{ - *dev_idx= (port - base) / size; - *offset = port - (base + *dev_idx * size); -} - static bool virtio_blk_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { struct blk_dev *bdev; - u16 offset, dev_idx; + u16 offset; bool ret = true; - virtio_blk_port2dev(port, IOPORT_VIRTIO_BLK, IOPORT_VIRTIO_BLK_SIZE, dev_idx, offset); - - bdev = bdevs[dev_idx]; + bdev= ioport-priv; + offset = port - bdev-base_addr; mutex_lock(bdev-mutex); @@ -181,12 +175,11 @@ static void virtio_blk_do_io(struct kvm *kvm, void *param) static bool virtio_blk_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { struct blk_dev *bdev; - u16 offset, dev_idx; + u16 offset; bool ret = true; - virtio_blk_port2dev(port, IOPORT_VIRTIO_BLK, IOPORT_VIRTIO_BLK_SIZE, dev_idx, offset); - - bdev = bdevs[dev_idx]; + bdev= ioport-priv; + offset = port - bdev-base_addr; mutex_lock(bdev-mutex); @@ -246,48 +239,29 @@ static bool virtio_blk_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 po } static struct ioport_operations virtio_blk_io_ops = { - .io_in = virtio_blk_pci_io_in, - .io_out = virtio_blk_pci_io_out, + .io_in = virtio_blk_pci_io_in, + .io_out = virtio_blk_pci_io_out, }; -static int virtio_blk_find_empty_dev(void) -{ - int i; - - for (i = 0; i VIRTIO_BLK_MAX_DEV; i++) { - if (bdevs[i] == NULL) - return i; - } - - return -1; -} - void virtio_blk__init(struct kvm *kvm, struct disk_image *disk) { u16 blk_dev_base_addr; u8 dev, pin, line; struct blk_dev *bdev; - int new_dev_idx; if (!disk) return; - new_dev_idx = virtio_blk_find_empty_dev(); - if (new_dev_idx 0) - die(Could not find an empty block device slot); - -
[PATCH v3 4/8] kvm tools: Add support for multiple virtio-rng devices
Since multiple hardware rng devices of the same type are currently unsupported by the kernel, this serves more as an example of a basic virtio driver under kvm tools and can be used to debug the PCI layer. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h|2 - tools/kvm/include/kvm/parse-options.h |9 +++ tools/kvm/include/kvm/virtio-rng.h|1 + tools/kvm/kvm-run.c |8 ++- tools/kvm/virtio/rng.c| 118 ++-- 5 files changed, 96 insertions(+), 42 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index e53c03c..55d53e0 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -20,8 +20,6 @@ #define IOPORT_VIRTIO_CONSOLE_SIZE 256 #define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */ #define IOPORT_VIRTIO_NET_SIZE 256 -#define IOPORT_VIRTIO_RNG 0xf200 /* Virtio network device */ -#define IOPORT_VIRTIO_RNG_SIZE 256 #define IOPORT_EMPTY USHRT_MAX diff --git a/tools/kvm/include/kvm/parse-options.h b/tools/kvm/include/kvm/parse-options.h index 2d5c99e..6bf9a1d 100644 --- a/tools/kvm/include/kvm/parse-options.h +++ b/tools/kvm/include/kvm/parse-options.h @@ -132,6 +132,15 @@ intptr_t defval; .help = (h) \ } +#define OPT_INCR(s, l, v, h)\ +{ \ + .type = OPTION_INCR,\ + .short_name = (s), \ + .long_name = (l), \ + .value = check_vtype(v, int *), \ + .help = (h) \ +} + #define OPT_GROUP(h)\ { \ .type = OPTION_GROUP, \ diff --git a/tools/kvm/include/kvm/virtio-rng.h b/tools/kvm/include/kvm/virtio-rng.h index 7015c1f..c0a413b 100644 --- a/tools/kvm/include/kvm/virtio-rng.h +++ b/tools/kvm/include/kvm/virtio-rng.h @@ -4,5 +4,6 @@ struct kvm; void virtio_rng__init(struct kvm *kvm); +void virtio_rng__delete_all(struct kvm *kvm); #endif /* KVM__RNG_VIRTIO_H */ diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c index adbb25b..76b5782 100644 --- a/tools/kvm/kvm-run.c +++ b/tools/kvm/kvm-run.c @@ -52,6 +52,7 @@ static __thread struct kvm_cpu *current_kvm_cpu; static u64 ram_size; static u8 image_count; +static int virtio_rng; static const char *kernel_cmdline; static const char *kernel_filename; static const char *vmlinux_filename; @@ -66,7 +67,6 @@ static const char *script; static const char *virtio_9p_dir; static bool single_step; static bool readonly_image[MAX_DISK_IMAGES]; -static bool virtio_rng; static bool vnc; extern bool ioport_debug; extern int active_console; @@ -107,7 +107,7 @@ static const struct option options[] = { OPT_CALLBACK('d', disk, NULL, image, Disk image, img_name_parser), OPT_STRING('\0', console, console, serial or virtio, Console to use), - OPT_BOOLEAN('\0', rng, virtio_rng, + OPT_INCR('\0', rng, virtio_rng, Enable virtio Random Number Generator), OPT_STRING('\0', kvm-dev, kvm_dev, kvm-dev, KVM device file), OPT_STRING('\0', virtio-9p, virtio_9p_dir, root dir, @@ -570,7 +570,8 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) virtio_console__init(kvm); if (virtio_rng) - virtio_rng__init(kvm); + while (virtio_rng--) + virtio_rng__init(kvm); if (!network) network = DEFAULT_NETWORK; @@ -631,6 +632,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) } virtio_blk__delete_all(kvm); + virtio_rng__delete_all(kvm); disk_image__close_all(kvm-disks, image_count); kvm__delete(kvm); diff --git a/tools/kvm/virtio/rng.c b/tools/kvm/virtio/rng.c index a553f6b..1a3bda3 100644 --- a/tools/kvm/virtio/rng.c +++ b/tools/kvm/virtio/rng.c @@ -5,7 +5,6 @@ #include kvm/disk-image.h #include kvm/virtio.h #include kvm/ioport.h -#include kvm/mutex.h #include kvm/util.h #include kvm/kvm.h #include kvm/pci.h @@ -15,6 +14,7 @@ #include linux/virtio_ring.h #include linux/virtio_rng.h +#include linux/list.h #include fcntl.h #include sys/types.h #include sys/stat.h @@ -23,18 +23,17 @@ #define NUM_VIRT_QUEUES1 #define VIRTIO_RNG_QUEUE_SIZE 128 -static struct pci_device_header virtio_rng_pci_device = { - .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET, - .device_id = PCI_DEVICE_ID_VIRTIO_RNG, - .header_type= PCI_HEADER_TYPE_NORMAL, - .revision_id= 0, - .class = 0x01, - .subsys_vendor_id =
[PATCH v3 6/8] kvm tools: Use dynamic IO port allocation in 9p driver
Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |2 -- tools/kvm/virtio/9p.c | 12 +++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 84eb65a..310f75d 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -12,8 +12,6 @@ #define IOPORT_START 0x6200 #define IOPORT_SIZE0x400 -#define IOPORT_VIRTIO_P9 0xb200 /* Virtio 9P device */ -#define IOPORT_VIRTIO_P9_SIZE 256 #define IOPORT_VIRTIO_CONSOLE 0xd200 /* Virtio console device */ #define IOPORT_VIRTIO_CONSOLE_SIZE 256 #define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */ diff --git a/tools/kvm/virtio/9p.c b/tools/kvm/virtio/9p.c index c441089..38a997d 100644 --- a/tools/kvm/virtio/9p.c +++ b/tools/kvm/virtio/9p.c @@ -50,7 +50,6 @@ static struct pci_device_header virtio_p9_pci_device = { .class = 0x01, .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, .subsys_id = VIRTIO_ID_9P, - .bar[0] = IOPORT_VIRTIO_P9 | PCI_BASE_ADDRESS_SPACE_IO, }; struct p9_dev { @@ -59,6 +58,7 @@ struct p9_dev { u16 config_vector; u32 features; struct virtio_9p_config *config; + u16 base_addr; /* virtio queue */ u16 queue_selector; @@ -96,7 +96,7 @@ static bool virtio_p9_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port unsigned long offset; bool ret = true; - offset = port - IOPORT_VIRTIO_P9; + offset = port - p9dev.base_addr; switch (offset) { case VIRTIO_PCI_HOST_FEATURES: @@ -584,7 +584,7 @@ static bool virtio_p9_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 por unsigned long offset; bool ret = true; - offset = port - IOPORT_VIRTIO_P9; + offset = port - p9dev.base_addr; switch (offset) { case VIRTIO_MSI_QUEUE_VECTOR: @@ -636,6 +636,7 @@ void virtio_9p__init(struct kvm *kvm, const char *root) { u8 pin, line, dev; u32 i, root_len; + u16 p9_base_addr; p9dev.config = calloc(1, sizeof(*p9dev.config) + sizeof(VIRTIO_P9_TAG)); if (p9dev.config == NULL) @@ -662,7 +663,8 @@ void virtio_9p__init(struct kvm *kvm, const char *root) virtio_p9_pci_device.irq_pin= pin; virtio_p9_pci_device.irq_line = line; + p9_base_addr= ioport__register(IOPORT_EMPTY, virtio_p9_io_ops, IOPORT_SIZE, NULL); + virtio_p9_pci_device.bar[0] = p9_base_addr | PCI_BASE_ADDRESS_SPACE_IO; + p9dev.base_addr = p9_base_addr; pci__register(virtio_p9_pci_device, dev); - - ioport__register(IOPORT_VIRTIO_P9, virtio_p9_io_ops, IOPORT_VIRTIO_P9_SIZE, NULL); } -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 5/8] kvm tools: Use dynamic IO port allocation in vesa driver
Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/hw/vesa.c|7 +++ tools/kvm/include/kvm/ioport.h |2 -- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/kvm/hw/vesa.c b/tools/kvm/hw/vesa.c index ec4788c..85fe1a9 100644 --- a/tools/kvm/hw/vesa.c +++ b/tools/kvm/hw/vesa.c @@ -49,7 +49,6 @@ static struct pci_device_header vesa_pci_device = { .class = 0x03, .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, .subsys_id = PCI_SUBSYSTEM_ID_VESA, - .bar[0] = IOPORT_VESA | PCI_BASE_ADDRESS_SPACE_IO, .bar[1] = VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY, }; @@ -66,17 +65,17 @@ void vesa__init(struct kvm *kvm) { u8 dev, line, pin; pthread_t thread; + u16 vesa_base_addr; if (irq__register_device(PCI_DEVICE_ID_VESA, dev, pin, line) 0) return; vesa_pci_device.irq_pin = pin; vesa_pci_device.irq_line= line; - + vesa_base_addr = ioport__register(IOPORT_EMPTY, vesa_io_ops, IOPORT_SIZE, NULL); + vesa_pci_device.bar[0] = vesa_base_addr | PCI_BASE_ADDRESS_SPACE_IO; pci__register(vesa_pci_device, dev); - ioport__register(IOPORT_VESA, vesa_io_ops, IOPORT_VESA_SIZE, NULL); - kvm__register_mmio(VESA_MEM_ADDR, VESA_MEM_SIZE, vesa_mmio_callback); pthread_create(thread, NULL, vesa__dovnc, kvm); diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 55d53e0..84eb65a 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -12,8 +12,6 @@ #define IOPORT_START 0x6200 #define IOPORT_SIZE0x400 -#define IOPORT_VESA0xa200 -#define IOPORT_VESA_SIZE 256 #define IOPORT_VIRTIO_P9 0xb200 /* Virtio 9P device */ #define IOPORT_VIRTIO_P9_SIZE 256 #define IOPORT_VIRTIO_CONSOLE 0xd200 /* Virtio console device */ -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 8/8] kvm tools: Use dynamic IO port allocation in virtio-net
Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |3 --- tools/kvm/virtio/net.c | 12 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 4fccbd6..59f118f 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -12,9 +12,6 @@ #define IOPORT_START 0x6200 #define IOPORT_SIZE0x400 -#define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */ -#define IOPORT_VIRTIO_NET_SIZE 256 - #define IOPORT_EMPTY USHRT_MAX struct kvm; diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c index 014205b..3064da6 100644 --- a/tools/kvm/virtio/net.c +++ b/tools/kvm/virtio/net.c @@ -37,7 +37,6 @@ static struct pci_device_header pci_header = { .class = 0x02, .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, .subsys_id = VIRTIO_ID_NET, - .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO, }; struct net_device { @@ -51,6 +50,7 @@ struct net_device { u8 status; u8 isr; u16 queue_selector; + u16 base_addr; pthread_t io_rx_thread; pthread_mutex_t io_rx_lock; @@ -166,7 +166,7 @@ static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offse static bool virtio_net_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { - unsigned long offset = port - IOPORT_VIRTIO_NET; + unsigned long offset = port - ndev.base_addr; boolret = true; mutex_lock(ndev.mutex); @@ -230,7 +230,7 @@ static void virtio_net_handle_callback(struct kvm *kvm, u16 queue_index) static bool virtio_net_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { - unsigned long offset = port - IOPORT_VIRTIO_NET; + unsigned long offset = port - ndev.base_addr; boolret = true; mutex_lock(ndev.mutex); @@ -387,14 +387,18 @@ void virtio_net__init(const struct virtio_net_parameters *params) { if (virtio_net__tap_init(params)) { u8 dev, line, pin; + u16 net_base_addr; if (irq__register_device(VIRTIO_ID_NET, dev, pin, line) 0) return; pci_header.irq_pin = pin; pci_header.irq_line = line; + net_base_addr = ioport__register(IOPORT_EMPTY, virtio_net_io_ops, IOPORT_SIZE, NULL); + pci_header.bar[0] = net_base_addr | PCI_BASE_ADDRESS_SPACE_IO; + ndev.base_addr = net_base_addr; + pci__register(pci_header, dev); - ioport__register(IOPORT_VIRTIO_NET, virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE, NULL); virtio_net__io_thread_init(params-kvm); } -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 7/8] kvm tools: Use dynamic IO port allocation in virtio-console
Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/ioport.h |2 -- tools/kvm/virtio/console.c | 11 +++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 310f75d..4fccbd6 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -12,8 +12,6 @@ #define IOPORT_START 0x6200 #define IOPORT_SIZE0x400 -#define IOPORT_VIRTIO_CONSOLE 0xd200 /* Virtio console device */ -#define IOPORT_VIRTIO_CONSOLE_SIZE 256 #define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */ #define IOPORT_VIRTIO_NET_SIZE 256 diff --git a/tools/kvm/virtio/console.c b/tools/kvm/virtio/console.c index 614f0d2..038e53f 100644 --- a/tools/kvm/virtio/console.c +++ b/tools/kvm/virtio/console.c @@ -36,7 +36,6 @@ static struct pci_device_header virtio_console_pci_device = { .class = 0x078000, .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET, .subsys_id = VIRTIO_ID_CONSOLE, - .bar[0] = IOPORT_VIRTIO_CONSOLE | PCI_BASE_ADDRESS_SPACE_IO, }; struct con_dev { @@ -50,6 +49,7 @@ struct con_dev { u8 status; u8 isr; u16 queue_selector; + u16 base_addr; void*jobs[VIRTIO_CONSOLE_NUM_QUEUES]; }; @@ -113,7 +113,7 @@ static bool virtio_console_pci_io_device_specific_in(void *data, unsigned long o static bool virtio_console_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { - unsigned long offset = port - IOPORT_VIRTIO_CONSOLE; + unsigned long offset = port - cdev.base_addr; bool ret = true; mutex_lock(cdev.mutex); @@ -181,7 +181,7 @@ static void virtio_console_handle_callback(struct kvm *kvm, void *param) static bool virtio_console_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size, u32 count) { - unsigned long offset = port - IOPORT_VIRTIO_CONSOLE; + unsigned long offset = port - cdev.base_addr; bool ret = true; mutex_lock(cdev.mutex); @@ -243,12 +243,15 @@ static struct ioport_operations virtio_console_io_ops = { void virtio_console__init(struct kvm *kvm) { u8 dev, line, pin; + u16 console_base_addr; if (irq__register_device(VIRTIO_ID_CONSOLE, dev, pin, line) 0) return; virtio_console_pci_device.irq_pin = pin; virtio_console_pci_device.irq_line = line; + console_base_addr = ioport__register(IOPORT_EMPTY, virtio_console_io_ops, IOPORT_SIZE, NULL); + virtio_console_pci_device.bar[0]= console_base_addr | PCI_BASE_ADDRESS_SPACE_IO; + cdev.base_addr = console_base_addr; pci__register(virtio_console_pci_device, dev); - ioport__register(IOPORT_VIRTIO_CONSOLE, virtio_console_io_ops, IOPORT_VIRTIO_CONSOLE_SIZE, NULL); } -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Video card passthrough success
On 05/20/2011 05:56 AM, Luke-Jr wrote: Finally decided to see just how much of the Radeon was working (eg, if it was *just* OpenCL or not), and I am happy to confirm that both video output and OpenGL acceleration are fully functional inside KVM. That's really surprising (and a really good surprise, too). We should have a wiki page to summarize what has been tried, with what version, and with what results. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: use proper port value when checking io instruction permission (v2)
On Thu, May 26, 2011 at 09:49:21AM +0200, Paolo Bonzini wrote: On 05/26/2011 09:07 AM, Gleb Natapov wrote: Still, op_bytes is irrelevant for SrcDX, the 16-bit version is always used. If SrcDX/DstDX will be used only for decoding in/out instruction then yes. Otherwise it is nice to have more general decoder. Yes, the use of op_bytes instead of 2 had that in mind. Not counting instructions that read/write many registers (rdmsr/wrmsr, mul/imul/div/idiv, rdtsc, etc.), I think the only other instruction with an implicit DstDX is cwd/cdq/cqo. Since cwd/cdq/cqo needs c-dst.bytes = c-src.bytes (not op_bytes) I think DstDX is not really reusable beyond port instructions. Paolo OK, will switch to 2 then, thanks. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
OK, I tried copying the whole image to my test box running 0.14.0 again and it crashes with exactly the same BSOD! So now I have the test environment to play with... What should I check/try now? n. PS: I'm not sure what You mean by permissions, but I'm using LVM partitions and qemu-kvm is started by root user anyways.. On Thu, May 26, 2011 at 01:22:37PM +0300, Gleb Natapov wrote: On Thu, May 26, 2011 at 12:20:56PM +0200, Nikola Ciprich wrote: You can make a copy from your production image, install 0.14 version in different place from 0.13 and experiment. yup, I think I've tried it also with exact copy and wasn't able to reproduce it, but I'll try it again and report soon. Then check image file permission please. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - pgphmXcFxNkYX.pgp Description: PGP signature
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
On Thu, May 26, 2011 at 01:50:35PM +0200, Nikola Ciprich wrote: OK, I tried copying the whole image to my test box running 0.14.0 again and it crashes with exactly the same BSOD! So now I have the test environment to play with... What should I check/try now? n. PS: I'm not sure what You mean by permissions, but I'm using LVM partitions and qemu-kvm is started by root user anyways.. This BSOD usually indicates that Windows can't write to the boot disk. This is usually happens if qemu has no permission to write to the image file. But if you are starting qemu as a root this is probably is not the case. So what is your 0.14 command line? On Thu, May 26, 2011 at 01:22:37PM +0300, Gleb Natapov wrote: On Thu, May 26, 2011 at 12:20:56PM +0200, Nikola Ciprich wrote: You can make a copy from your production image, install 0.14 version in different place from 0.13 and experiment. yup, I think I've tried it also with exact copy and wasn't able to reproduce it, but I'll try it again and report soon. Then check image file permission please. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
KVM: x86: use proper port value when checking io instruction permission (v3)
Commit fa4491a6b667304 moved the permission check for io instructions to the -check_perm callback. It failed to copy the port value from RDX register for string and in,out ax,dx instructions. Fix it by reading RDX register at decode stage when appropriate. Fixes FC8.32 installation. diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3bc6b7a..fc3d2d9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -47,7 +47,7 @@ #define DstDI (51) /* Destination is in ES:(E)DI */ #define DstMem64(61) /* 64bit memory operand */ #define DstImmUByte (71) /* 8-bit unsigned immediate operand */ -#define DstMask (71) +#define DstMask ((71) | (118)) /* Source operand type. */ #define SrcNone (04) /* No source operand. */ #define SrcReg (14) /* Register operand. */ @@ -64,7 +64,7 @@ #define SrcMemFAddr (0xc4) /* Source is far address in memory */ #define SrcAcc (0xd4) /* Source Accumulator */ #define SrcImmU16 (0xe4)/* Immediate operand, unsigned, 16 bits */ -#define SrcMask (0xf4) +#define SrcMask ((0xf4) | (119)) /* Generic ModRM decode. */ #define ModRM (18) /* Destination is only written; never read. */ @@ -79,6 +79,8 @@ #define Prefix (314) /* Instruction varies with 66/f2/f3 prefix */ #define RMExt (414) /* Opcode extension in ModRM r/m if mod == 3 */ #define Sse (117) /* SSE Vector instruction */ +#define DstDX (118)/* Destination is in DX register */ +#define SrcDX (119)/* Source is in DX register */ /* Misc flags */ #define Prot(121) /* instruction generates #UD if not in prot-mode */ #define VendorSpecific (122) /* Vendor specific instruction */ @@ -3124,8 +3126,8 @@ static struct opcode opcode_table[256] = { I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), I(SrcImmByte | Mov | Stack, em_push), I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), - D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */ - D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */ + D2bvIP(DstDI | SrcDX | Mov | String, ins, check_perm_in), /* insb, insw/insd */ + D2bvIP(SrcSI | DstDX | String, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ X16(D(SrcImmByte)), /* 0x80 - 0x87 */ @@ -3182,8 +3184,8 @@ static struct opcode opcode_table[256] = { /* 0xE8 - 0xEF */ D(SrcImm | Stack), D(SrcImm | ImplicitOps), D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), - D2bvIP(SrcNone | DstAcc, in, check_perm_in), - D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out), + D2bvIP(SrcDX | DstAcc, in, check_perm_in), + D2bvIP(SrcAcc | DstDX, out, check_perm_out), /* 0xF0 - 0xF7 */ N, DI(ImplicitOps, icebp), N, N, DI(ImplicitOps | Priv, hlt), D(ImplicitOps), @@ -3580,6 +3582,12 @@ done_prefixes: memop.bytes = c-op_bytes + 2; goto srcmem_common; break; + case SrcDX: + c-src.type = OP_REG; + c-src.bytes = 2; + c-src.addr.reg = c-regs[VCPU_REGS_RDX]; + fetch_register_operand(c-src); + break; } if (rc != X86EMUL_CONTINUE) @@ -3649,6 +3657,12 @@ done_prefixes: c-dst.addr.mem.seg = VCPU_SREG_ES; c-dst.val = 0; break; + case DstDX: + c-dst.type = OP_REG; + c-dst.bytes = 2; + c-dst.addr.reg = c-regs[VCPU_REGS_RDX]; + fetch_register_operand(c-dst); + break; case ImplicitOps: /* Special instructions do their own operand decoding. */ default: @@ -3993,7 +4007,6 @@ special_insn: break; case 0xec: /* in al,dx */ case 0xed: /* in (e/r)ax,dx */ - c-src.val = c-regs[VCPU_REGS_RDX]; do_io_in: if (!pio_in_emulated(ctxt, c-dst.bytes, c-src.val, c-dst.val)) @@ -4001,7 +4014,6 @@ special_insn: break; case 0xee: /* out dx,al */ case 0xef: /* out dx,(e/r)ax */ - c-dst.val = c-regs[VCPU_REGS_RDX]; do_io_out: ops-pio_out_emulated(ctxt, c-src.bytes, c-dst.val, c-src.val, 1); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
This BSOD usually indicates that Windows can't write to the boot disk. This is usually happens if qemu has no permission to write to the image file. But if you are starting qemu as a root this is probably is not the OK, I see. case. So what is your 0.14 command line? here it goes: /usr/bin/qemu-kvm -S -M pc-0.14 -enable-kvm -m 4096 -smp 1,sockets=1,cores=1,threads=1 -name vmtst04 -uuid 1f8328b8-8849-11e0-91e9-00259009d78c -nodefconfig -nodefaults -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/vmtst04.monitor,server,nowait -mon chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev tap,fd=14,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:18:04:00,bus=pci.0,addr=0x3 -usb -vnc 0.0.0.0:24104 -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 On Thu, May 26, 2011 at 01:22:37PM +0300, Gleb Natapov wrote: On Thu, May 26, 2011 at 12:20:56PM +0200, Nikola Ciprich wrote: You can make a copy from your production image, install 0.14 version in different place from 0.13 and experiment. yup, I think I've tried it also with exact copy and wasn't able to reproduce it, but I'll try it again and report soon. Then check image file permission please. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - pgpVWXFpwtBQ6.pgp Description: PGP signature
Re: Restoring saved guest causes guest to reboot
On 05/26/2011 08:44 AM, Avi Kivity wrote: On 05/25/2011 09:49 AM, Markus Schade wrote: Git bisect tells me that this is the first bad commit: -%- aff48baa34c033318ad322ecbf2e4bcd891b29ca is the first bad commit commit aff48baa34c033318ad322ecbf2e4bcd891b29ca Author: Avi Kivity a...@redhat.com Date: Sun Dec 5 18:56:11 2010 +0200 KVM: Fetch guest cr3 from hardware on demand Instead of syncing the guest cr3 every exit, which is expensince on vmx with ept enabled, sync it only on demand. [sheng: fix incorrect cr3 seen by Windows XP] Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Avi Kivity a...@redhat.com Does your machine have ept? (cat /sys/module/kvm_intel/parameters/ept) Sure. (Assuming that the Y means yes). I am no C developer, but I was wondering, if the issue is related to the difference in ept_update_paging_mode_cr0 between original patch in the kvm git and the linux-2.6. git tree. Best regards, Markus -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
On Thu, May 26, 2011 at 02:03:57PM +0200, Nikola Ciprich wrote: This BSOD usually indicates that Windows can't write to the boot disk. This is usually happens if qemu has no permission to write to the image file. But if you are starting qemu as a root this is probably is not the OK, I see. May be libvirt does something funny with selinux. case. So what is your 0.14 command line? here it goes: /usr/bin/qemu-kvm -S -M pc-0.14 -enable-kvm -m 4096 -smp 1,sockets=1,cores=1,threads=1 -name vmtst04 -uuid 1f8328b8-8849-11e0-91e9-00259009d78c -nodefconfig -nodefaults -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/vmtst04.monitor,server,nowait -mon chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev tap,fd=14,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:18:04:00,bus=pci.0,addr=0x3 -usb -vnc 0.0.0.0:24104 -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 Try to run with -M pc-0.13. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
May be libvirt does something funny with selinux. it shouldn't, I don't have selinux enabled in host kernel at all.. Try to run with -M pc-0.13. tried now, same result... -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - pgpFCnydWiE6q.pgp Description: PGP signature
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
On Thu, May 26, 2011 at 02:30:00PM +0200, Nikola Ciprich wrote: May be libvirt does something funny with selinux. it shouldn't, I don't have selinux enabled in host kernel at all.. Try to run with -M pc-0.13. tried now, same result... Hmm. And what if you start qemu directly (without using libvirt) with 0.13 command line? -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
Hmm. And what if you start qemu directly (without using libvirt) with 0.13 command line? /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -boot c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104 same result... -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - pgpkaghaz1g42.pgp Description: PGP signature
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
On Thu, May 26, 2011 at 02:46:30PM +0200, Nikola Ciprich wrote: Hmm. And what if you start qemu directly (without using libvirt) with 0.13 command line? /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -boot c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104 same result... Should be more like that one with correct image path: /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -smp 1,sockets=1,cores=1,threads=1 -name vmwts02 -uuid 1e501300-dc48-11df-a690-00304834195b -nodefconfig -nodefaults -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/vmwts02.monitor,server,nowait -mon chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot c -drive file=/dev/vgshared/vmwts02-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -drive if=none,media=cdrom,id=drive-ide0-1-0,readonly=on,format=raw -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 -netdev user,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:61:01:00,bus=pci.0,addr=0x3 -usb -vnc 0.0.0.0:30802 -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
Should be more like that one with correct image path: huh, now I got a bit lost :) I tried running both: /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -smp 1,sockets=1,cores=1,threads=1 -name vmtst04 -uuid 1f8328b8-8849-11e0-91e9-00259009d78c -nodefconfig -nodefaults -chardev socket,id=char monitor,path=/var/lib/libvirt/qemu/vmtst04.monitor,server,nowait -mon chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev tap,fd=14,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:18:04:00,bus=pci.0,addr=0x3 -usb -vnc 0.0.0.0:24104 -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 and simplified: /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -rtc base=localtime -boot c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,driv e=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104 /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -boot c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104 where /dev/vgshared/vmtst04-1 is the copy of windows volume from production server. so is this ok? n. -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - pgpe5c27D1b1Q.pgp Description: PGP signature
[AUTOTEST PATCH] Fix rhel5 install
There is no ntpdate.rpm in RHEL-5, ntp.rpm has /usr/sbin/ntpdate. --- client/tests/kvm/unattended/RHEL-5-series.ks |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/unattended/RHEL-5-series.ks b/client/tests/kvm/unattended/RHEL-5-series.ks index 1d8d41b..f1568f2 100644 --- a/client/tests/kvm/unattended/RHEL-5-series.ks +++ b/client/tests/kvm/unattended/RHEL-5-series.ks @@ -22,7 +22,7 @@ poweroff @development-libs @development-tools kexec-tools -ntpdate +ntp watchdog %post --interpreter /usr/bin/python -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[AUTOTEST PATCH] Broken installs.
Hi, Started playing around with autotest. For starters tried to run the basic install, boot + shutdown tests for a bunch of guests I have iso images laying around here, expecting that working out-of-the-box. Unfortunaly that wasn't the case. RHEL 5.any does't install. Fixed. OpenSUSE 11.1 doesn't install too. No idea why, fails to find the repos on the cdrom. cheers, Gerd Gerd Hoffmann (1): Fix rhel5 install client/tests/kvm/unattended/RHEL-5-series.ks |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
On Thu, May 26, 2011 at 02:56:40PM +0200, Nikola Ciprich wrote: Should be more like that one with correct image path: huh, now I got a bit lost :) I tried running both: /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -smp 1,sockets=1,cores=1,threads=1 -name vmtst04 -uuid 1f8328b8-8849-11e0-91e9-00259009d78c -nodefconfig -nodefaults -chardev socket,id=char monitor,path=/var/lib/libvirt/qemu/vmtst04.monitor,server,nowait -mon chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev tap,fd=14,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:18:04:00,bus=pci.0,addr=0x3 -usb -vnc 0.0.0.0:24104 -vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 That one should fail to start. and simplified: /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -rtc base=localtime -boot c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,driv e=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104 /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -boot c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104 where /dev/vgshared/vmtst04-1 is the copy of windows volume from production server. so is this ok? No, try to run the one I gave you. Just replace the image file path. I do no have much hope it will work though, but worth trying. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
No, try to run the one I gave you. Just replace the image file path. I do no have much hope it will work though, but worth trying. OK, I tried, I just had to remove the monitor device. Still, it fails the same way... n. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - pgpnpjUPCJSjF.pgp Description: PGP signature
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
On Thu, May 26, 2011 at 03:15:19PM +0200, Nikola Ciprich wrote: No, try to run the one I gave you. Just replace the image file path. I do no have much hope it will work though, but worth trying. OK, I tried, I just had to remove the monitor device. Still, it fails the same way... According to this: http://social.msdn.microsoft.com/Forums/en-US/embeddedwindowscomponents/thread/09aae527-ff6d-4003-9e59-962d73d409ed such bsod happens because Windows can't access boot device. Your boot device is IDE. Nothing changed in this area from 13 to 14. Are you sure your image was copied correctly and is not corrupted? -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Patch v3] Enable CPU SMEP feature for KVM
This patchset enables a new CPU feature SMEP (Supervisor Mode Execution Protection) in KVM. SMEP prevents kernel from executing code in application. Updated Intel SDM describes this CPU feature. The document will be published soon. This patchset is based on Fenghua's SMEP patch series, as referred by: https://lkml.org/lkml/2011/5/17/523 Changes since v2: enable SMEP for spt mode. Signed-off-by: Yang Wei wei.y.y...@intel.com Signed-off-by: Shan Haitao haitao.s...@intel.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/paging_tmpl.h | 15 +-- arch/x86/kvm/vmx.c |9 + arch/x86/kvm/x86.c |7 +-- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d2ac8e2..154287b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -306,6 +306,7 @@ struct kvm_vcpu_arch { unsigned long cr3; unsigned long cr4; unsigned long cr4_guest_owned_bits; + unsigned long cr4_reserved_bits; unsigned long cr8; u32 hflags; u64 efer; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6c4dc01..7e0b2f8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -120,7 +120,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gva_t addr, u32 access) { - pt_element_t pte; + pt_element_t pte, pte_smep; pt_element_t __user *ptep_user; gfn_t table_gfn; unsigned index, pt_access, uninitialized_var(pte_access); @@ -150,7 +150,11 @@ walk: } --walker-level; } + pte_smep = ~0ULL; +#else + pte_smep = ~0U; #endif + ASSERT((!is_long_mode(vcpu) is_pae(vcpu)) || (mmu-get_cr3(vcpu) CR3_NONPAE_RESERVED_BITS) == 0); @@ -234,6 +238,8 @@ walk: walker-ptes[walker-level - 1] = pte; + pte_smep = pte; + if ((walker-level == PT_PAGE_TABLE_LEVEL) || ((walker-level == PT_DIRECTORY_LEVEL) is_large_pte(pte) @@ -246,6 +252,11 @@ walk: gfn_t gfn; u32 ac; + if (unlikely(fetch_fault !user_fault)) + if ((vcpu-arch.cr4 X86_CR4_SMEP) +(pte_smep PT_USER_MASK)) + eperm = true; + gfn = gpte_to_gfn_lvl(pte, lvl); gfn += (addr PT_LVL_OFFSET_MASK(lvl)) PAGE_SHIFT; @@ -305,7 +316,7 @@ error: walker-fault.error_code |= write_fault | user_fault; - if (fetch_fault mmu-nx) + if (fetch_fault (mmu-nx || (vcpu-arch.cr4 X86_CR4_SMEP))) walker-fault.error_code |= PFERR_FETCH_MASK; if (rsvd_fault) walker-fault.error_code |= PFERR_RSVD_MASK; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4c3fa0f..7ad24fd 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4507,6 +4507,15 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) } } } + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + if (best (best-ebx bit(X86_FEATURE_SMEP))) { + if (boot_cpu_has(X86_FEATURE_SMEP)) + vcpu-arch.cr4_reserved_bits = + ~((unsigned long)X86_CR4_SMEP); + else + best-ebx = ~(bit(X86_FEATURE_SMEP)); + } } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 77c9d86..6ead39e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -598,9 +598,10 @@ static void update_cpuid(struct kvm_vcpu *vcpu) int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); - unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE +| X86_CR4_PAE | X86_CR4_SMEP; - if (cr4 CR4_RESERVED_BITS) + if (cr4 vcpu-arch.cr4_reserved_bits) return 1; if (!guest_cpuid_has_xsave(vcpu) (cr4 X86_CR4_OSXSAVE)) @@ -6222,6 +6223,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_async_pf_hash_reset(vcpu); + vcpu-arch.cr4_reserved_bits = CR4_RESERVED_BITS; + return 0; fail_free_mce_banks: kfree(vcpu-arch.mce_banks); -- 1.7.4.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at
[PATCH v3] Enable CPU SMEP feature support for QEMU-KVM
This patchset enables a new CPU feature SMEP (Supervisor Mode Execution Protection) in QEMU-KVM. SMEP prevents kernel from executing code in application. Updated Intel SDM describes this CPU feature. The document will be published soon. SMEP is identified by CPUID leaf 7 EBX[7], which is 0 before. Get the right value by query KVM kernel module, so that guest can get SMEP through CPUID. Changes since v2: no changes. Signed-off-by: Yang, Wei wei.y.y...@intel.com Singed-off-by: Shan, Haitao haitao.s...@intel.com --- target-i386/cpuid.c |8 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index 091d812..cd20dbf 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -1115,6 +1115,14 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx = 0; *edx = 0; break; +case 7: +if (kvm_enabled()) { +*eax = kvm_arch_get_supported_cpuid(env, 0x7, count, R_EAX); +*ebx = kvm_arch_get_supported_cpuid(env, 0x7, count, R_EBX); +*ecx = kvm_arch_get_supported_cpuid(env, 0x7, count, R_ECX); +*edx = kvm_arch_get_supported_cpuid(env, 0x7, count, R_EDX); +} +break; case 9: /* Direct Cache Access Information Leaf */ *eax = 0; /* Bits 0-31 in DCA_CAP MSR */ -- 1.7.4.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
According to this: http://social.msdn.microsoft.com/Forums/en-US/embeddedwindowscomponents/thread/09aae527-ff6d-4003-9e59-962d73d409ed such bsod happens because Windows can't access boot device. Your boot device is IDE. Nothing changed in this area from 13 to 14. Are you sure your image was copied correctly and is not corrupted? definitly. I just tried downgrading qemu-kvm to 0.13.0 and it works again... n. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - pgppxrkeAkXEy.pgp Description: PGP signature
[PATCH 1/6] kvm tools: Prevent double assignment of guest memory info
Use values calculated and assigned to local variables instead of ignoring them. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/kvm.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c index 7284211..1d756e0 100644 --- a/tools/kvm/kvm.c +++ b/tools/kvm/kvm.c @@ -192,7 +192,7 @@ void kvm__init_ram(struct kvm *kvm) phys_size = kvm-ram_size; host_mem = kvm-ram_start; - kvm_register_mem_slot(kvm, 0, 0, kvm-ram_size, kvm-ram_start); + kvm_register_mem_slot(kvm, 0, phys_start, phys_size, host_mem); } else { /* First RAM range from zero to the PCI gap: */ -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/6] kvm tools: Exit VCPU thread only when SIGKVMEXIT is received
Currently the VCPU loop would exit when the thread received any signal. Change behaviour to exit only when SIGKVMEXIT is received. This change prevents from the guest to terminate when unrelated signals are processed by the thread (for example, when attaching a debugger). Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/kvm-cpu.h |2 ++ tools/kvm/kvm-cpu.c | 15 ++- tools/kvm/kvm-run.c |2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h index f241e86..b2b6fce 100644 --- a/tools/kvm/include/kvm/kvm-cpu.h +++ b/tools/kvm/include/kvm/kvm-cpu.h @@ -21,6 +21,8 @@ struct kvm_cpu { struct kvm_fpu fpu; struct kvm_msrs *msrs; /* dynamically allocated */ + + u8 is_running; }; struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id); diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c index 331e025..de0591f 100644 --- a/tools/kvm/kvm-cpu.c +++ b/tools/kvm/kvm-cpu.c @@ -14,6 +14,8 @@ #include errno.h #include stdio.h +extern __thread struct kvm_cpu *current_kvm_cpu; + static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) { return vcpu-sregs.cr0 0x01; @@ -87,6 +89,8 @@ struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) if (vcpu-kvm_run == MAP_FAILED) die(unable to mmap vcpu fd); + vcpu-is_running = true; + return vcpu; } @@ -381,7 +385,10 @@ void kvm_cpu__run(struct kvm_cpu *vcpu) static void kvm_cpu_exit_handler(int signum) { - /* Don't do anything here */ + if (current_kvm_cpu-is_running) { + current_kvm_cpu-is_running = false; + pthread_kill(pthread_self(), SIGKVMEXIT); + } } int kvm_cpu__start(struct kvm_cpu *cpu) @@ -437,10 +444,8 @@ int kvm_cpu__start(struct kvm_cpu *cpu) break; } case KVM_EXIT_INTR: - /* -* Currently we only handle exit signal, which means -* we just exit if KVM_RUN exited due to a signal. -*/ + if (cpu-is_running) + break; goto exit_kvm; case KVM_EXIT_SHUTDOWN: goto exit_kvm; diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c index adbb25b..d757761 100644 --- a/tools/kvm/kvm-run.c +++ b/tools/kvm/kvm-run.c @@ -48,7 +48,7 @@ static struct kvm *kvm; static struct kvm_cpu *kvm_cpus[KVM_NR_CPUS]; -static __thread struct kvm_cpu *current_kvm_cpu; +__thread struct kvm_cpu *current_kvm_cpu; static u64 ram_size; static u8 image_count; -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/6] kvm tools: Protect IRQ allocations by a mutex
Makes IRQ allocation for new devices thread-safe. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/irq.c | 20 +--- 1 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/kvm/irq.c b/tools/kvm/irq.c index 15f4702..f92123d 100644 --- a/tools/kvm/irq.c +++ b/tools/kvm/irq.c @@ -1,4 +1,5 @@ #include kvm/irq.h +#include kvm/mutex.h #include linux/types.h #include linux/rbtree.h @@ -10,6 +11,7 @@ static u8 next_line = 3; static u8 next_dev= 1; static struct rb_root pci_tree= RB_ROOT; +static DEFINE_MUTEX(irq_lock); static struct pci_dev *search(struct rb_root *root, u32 id) { @@ -58,7 +60,9 @@ static int insert(struct rb_root *root, struct pci_dev *data) int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line) { - struct pci_dev *node; + struct pci_dev *node = NULL; + + mutex_lock(irq_lock); node = search(pci_tree, dev); @@ -66,7 +70,7 @@ int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line) /* We haven't found a node - First device of it's kind */ node = malloc(sizeof(*node)); if (node == NULL) - return -1; + goto exit_fail; *node = (struct pci_dev) { .id = dev, @@ -81,17 +85,15 @@ int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line) INIT_LIST_HEAD(node-lines); - if (insert(pci_tree, node) != 1) { - free(node); - return -1; - } + if (insert(pci_tree, node) != 1) + goto exit_fail; } if (node) { /* This device already has a pin assigned, give out a new line and device id */ struct irq_line *new = malloc(sizeof(*new)); if (new == NULL) - return -1; + goto exit_fail; new-line = next_line++; *line = new-line; @@ -100,9 +102,13 @@ int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line) list_add(new-node, node-lines); + mutex_unlock(irq_lock); return 0; } +exit_fail: + free(node); + mutex_unlock(irq_lock); return -1; } -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/6] kvm tools: Protect MMIO tree by rwsem
Make MMIO code thread-safe. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/mmio.c | 24 +--- 1 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/kvm/mmio.c b/tools/kvm/mmio.c index ef986bf..59512c3 100644 --- a/tools/kvm/mmio.c +++ b/tools/kvm/mmio.c @@ -1,5 +1,6 @@ #include kvm/kvm.h #include kvm/rbtree-interval.h +#include kvm/rwsem.h #include stdio.h #include stdlib.h @@ -15,6 +16,7 @@ struct mmio_mapping { }; static struct rb_root mmio_tree = RB_ROOT; +static DECLARE_RWSEM(mmio_tree_sem); static struct mmio_mapping *mmio_search(struct rb_root *root, u64 addr, u64 len) { @@ -55,35 +57,51 @@ static const char *to_direction(u8 is_write) bool kvm__register_mmio(u64 phys_addr, u64 phys_addr_len, void (*kvm_mmio_callback_fn)(u64 addr, u8 *data, u32 len, u8 is_write)) { struct mmio_mapping *mmio; + int ret; mmio = malloc(sizeof(*mmio)); if (mmio == NULL) return false; + down_write(mmio_tree_sem); + *mmio = (struct mmio_mapping) { .node = RB_INT_INIT(phys_addr, phys_addr + phys_addr_len), .kvm_mmio_callback_fn = kvm_mmio_callback_fn, }; - return mmio_insert(mmio_tree, mmio); + ret = mmio_insert(mmio_tree, mmio); + + up_write(mmio_tree_sem); + + return ret; } bool kvm__deregister_mmio(u64 phys_addr) { struct mmio_mapping *mmio; + down_write(mmio_tree_sem); mmio = mmio_search_single(mmio_tree, phys_addr); - if (mmio == NULL) + if (mmio == NULL) { + up_write(mmio_tree_sem); return false; + } rb_int_erase(mmio_tree, mmio-node); free(mmio); + up_write(mmio_tree_sem); + return true; } bool kvm__emulate_mmio(struct kvm *kvm, u64 phys_addr, u8 *data, u32 len, u8 is_write) { - struct mmio_mapping *mmio = mmio_search(mmio_tree, phys_addr, len); + struct mmio_mapping *mmio; + + down_read(mmio_tree_sem); + mmio = mmio_search(mmio_tree, phys_addr, len); + up_read(mmio_tree_sem); if (mmio) mmio-kvm_mmio_callback_fn(phys_addr, data, len, is_write); -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/6] kvm tools: Add rwlock wrapper
Adds a rwlock wrapper which like the mutex wrapper makes rwlock calls similar to their kernel counterparts. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/rwsem.h | 39 +++ 1 files changed, 39 insertions(+), 0 deletions(-) create mode 100644 tools/kvm/include/kvm/rwsem.h diff --git a/tools/kvm/include/kvm/rwsem.h b/tools/kvm/include/kvm/rwsem.h new file mode 100644 index 000..75a22f8 --- /dev/null +++ b/tools/kvm/include/kvm/rwsem.h @@ -0,0 +1,39 @@ +#ifndef KVM__RWSEM_H +#define KVM__RWSEM_H + +#include pthread.h + +#include kvm/util.h + +/* + * Kernel-alike rwsem API - to make it easier for kernel developers + * to write user-space code! :-) + */ + +#define DECLARE_RWSEM(sem) pthread_rwlock_t sem = PTHREAD_RWLOCK_INITIALIZER + +static inline void down_read(pthread_rwlock_t *rwsem) +{ + if (pthread_rwlock_rdlock(rwsem) != 0) + die(unexpected pthread_rwlock_rdlock() failure!); +} + +static inline void down_write(pthread_rwlock_t *rwsem) +{ + if (pthread_rwlock_wrlock(rwsem) != 0) + die(unexpected pthread_rwlock_wrlock() failure!); +} + +static inline void up_read(pthread_rwlock_t *rwsem) +{ + if (pthread_rwlock_unlock(rwsem) != 0) + die(unexpected pthread_rwlock_unlock() failure!); +} + +static inline void up_write(pthread_rwlock_t *rwsem) +{ + if (pthread_rwlock_unlock(rwsem) != 0) + die(unexpected pthread_rwlock_unlock() failure!); +} + +#endif /* KVM__RWSEM_H */ -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/6] kvm tools: Protect IOPORT tree by rwsem
Makes ioport thread-safe. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/ioport.c |7 +++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c index 1f13960..db9ff0f 100644 --- a/tools/kvm/ioport.c +++ b/tools/kvm/ioport.c @@ -3,6 +3,7 @@ #include kvm/kvm.h #include kvm/util.h #include kvm/rbtree-interval.h +#include kvm/rwsem.h #include linux/kvm.h /* for KVM_EXIT_* */ #include linux/types.h @@ -22,6 +23,7 @@ struct ioport_entry { static struct rb_root ioport_tree = RB_ROOT; bool ioport_debug; +static DECLARE_RWSEM(ioport_tree_sem); static struct ioport_entry *ioport_search(struct rb_root *root, u64 addr) { @@ -71,6 +73,7 @@ void ioport__register(u16 port, struct ioport_operations *ops, int count) { struct ioport_entry *entry; + down_write(ioport_tree_sem); entry = ioport_search(ioport_tree, port); if (entry) { pr_warning(ioport re-registered: %x, port); @@ -87,6 +90,8 @@ void ioport__register(u16 port, struct ioport_operations *ops, int count) }; ioport_insert(ioport_tree, entry); + + up_write(ioport_tree_sem); } static const char *to_direction(int direction) @@ -108,7 +113,9 @@ bool kvm__emulate_io(struct kvm *kvm, u16 port, void *data, int direction, int s bool ret; struct ioport_entry *entry; + down_read(ioport_tree_sem); entry = ioport_search(ioport_tree, port); + up_read(ioport_tree_sem); if (!entry) goto error; -- 1.7.5.rc3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] kvm tools: Add virtio-9p
On 05/18/2011 02:05 AM, Sasha Levin wrote: On Tue, 2011-05-17 at 20:18 -0500, Eric Van Hensbergen wrote: On Tue, May 17, 2011 at 3:27 PM, Sasha Levinlevinsasha...@gmail.com wrote: On Tue, 2011-05-17 at 22:08 +0300, Sasha Levin wrote: 'kvm_9p' isn't created as a device under /dev, it's just a name used internally by 9pnet_virtio (and located under sysfs). I couldn't figure out which params the kernel would expect to boot using 9p over virtio (theres no device name to begin with). I've also couldn't find anything that suggested it's possible to boot using virtio-9p as rootfs. Ignore that. Naming the virtio transport /dev/root and passing proper params to the kernel makes it work: [1.844983] VFS: Mounted root (9p filesystem) on device 0:11. I'll make some changes to the virtio-9p patch to make it easier for the user to do that. Any progress on this? May I get more detailed instructions on how you did this trick? Basically booting on 9P/VirtIO. Thanks, JV This is really sweet. Thanks for beating me to the punch of porting the 9p support to kvm tools. Clear RFC and good source code to refer to within 9p modules made this easy (and fun) :) - Multiple virtio-9p devices. This should be pretty straightforward. Yes, Most of the work here is within the kvm tool. - Ugly hack in virtio_p9_stat() (See desc in code). /* +* HACK: For some reason the p9 virtio transport reads a u16 and discards +* it before reading the p9_rstat struct. I couldn't find a logical reason for +* that, so we just add an extra u16 before the struct. +*/ This is part of the protocol spec (from http://ericvh.github.com/9p-rfc/rfc9p2000.html#anchor32): To make the contents of a directory, such as returned by read(5), easy to parse, each directory entry begins with a size field. For consistency, the entries in Twstat and Rstat messages also contain their size, which means the size appears twice. For example, the Rstat message is formatted as ``(4+1+2+2+n)[4] Rstat tag[2] n[2] (n-2)[2] type[2] dev[4]...,'' where n is the value returned by convD2M. It's appropriate to duplicate the size. I think the Linux client ignores it, but others implementations may complain. Thanks for the explanation! Yes, Linux implementation just throws it away - which was what confused me initially. Why not add a u16 to the beginning of 'struct p9_rstat'? - Update atime/mtime in p9_wstat, not really needed. The underlying storage may handle this for you, I think 9p avoids updating atime by default, at least in caching scenarios -- too much unnecessary protocol traffic. My assumption was that the storage I read/write to will take care of it for me, and unless it bothers anyone in the future I'll assume it's doing a good job at it. - Pass usernames in p9_stat, not really needed and not really sure how p9 expects to handle them. The username, group name issue is one of the principle reasons behind the extended protocol operations (.u and .L) -- of course, if there was a Plan 9 or Inferno guest they would be quite happy with the usernames, but Linux (and other UNIX variants) will want the ids. To really keep things simple we could add a client option that would let you pass the various ids as strings. Although no doubt folks will want the other extensions (symlinks, links, device nodes, etc.) before long. When we built the qemu server for .L, the team tried to keep everything in a library, but there is some entanglement with the qemu APIs -- it'd be nice if we could reuse that code here, maybe we need an abstract glue layer so that the core code can be used by both the kvm tool and qemu. I'm copy the lead of that team on this message just so he's aware how far you've come. I'd prefer using a tested lib which also implements .L over what we have now, assuming it's not tangled into qemu too hard. -eric -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] kvm tools: Add virtio-9p
On Thu, 2011-05-26 at 07:28 -0700, Venkateswararao Jujjuri wrote: Any progress on this? May I get more detailed instructions on how you did this trick? Basically booting on 9P/VirtIO. Thanks, JV Ofcourse. This change didn't go into tools/kvm/ since we only support the legacy 9p2000 protocol at the moment, which means that even though we can boot - it's quite unusable to work with. The trick is pretty simple: You need to name your virtio transport /dev/root (I think it's currently named local in qemu). Once it's named this way, boot with the following kernel cmdline added: root=/dev/root rootflags=rw,trans=virtio,version=9p2000 rootfstype=9p rw (You should be able to change version to one of the 9p2000 extensions). I've noticed that the transport *has* to be named /dev/root, naming it something else (and adjusting the root= parameter) doesn't seem to work. Also, if it's named /dev/root I couldn't mount it as a simple filesystem from within a guest - not as root. -- Sasha. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
On Thu, May 26, 2011 at 03:32:43PM +0200, Nikola Ciprich wrote: According to this: http://social.msdn.microsoft.com/Forums/en-US/embeddedwindowscomponents/thread/09aae527-ff6d-4003-9e59-962d73d409ed such bsod happens because Windows can't access boot device. Your boot device is IDE. Nothing changed in this area from 13 to 14. Are you sure your image was copied correctly and is not corrupted? definitly. I just tried downgrading qemu-kvm to 0.13.0 and it works again... In this case I am very puzzled :) -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Restoring saved guest causes guest to reboot
On 05/26/2011 01:28 PM, Markus Schade wrote: On 05/26/2011 08:44 AM, Avi Kivity wrote: On 05/25/2011 09:49 AM, Markus Schade wrote: Git bisect tells me that this is the first bad commit: -%- aff48baa34c033318ad322ecbf2e4bcd891b29ca is the first bad commit Does your machine have ept? (cat /sys/module/kvm_intel/parameters/ept) Sure. (Assuming that the Y means yes). I am no C developer, but I was wondering, if the issue is related to the difference in ept_update_paging_mode_cr0 between original patch in the kvm git and the linux-2.6. git tree. I have re-added the missing 4 lines to ept_update_paging_mode_cr0 in vmx.c, which resolves this issue for in Kernel 2.6,37 and 2.6.39. I haven't tested all guests, but neither Squeeze nor 2008 R2 reboot anymore. Markus static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -1921,6 +1940,8 @@ unsigned long cr0, struct kvm_vcpu *vcpu) { + ulong cr3; + vmx_decache_cr3(vcpu); if (!(cr0 X86_CR0_PG)) { /* From paging/starting to nonpaging */ @@ -1936,8 +1957,11 @@ vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING)); + /* Must fetch cr3 before updating cr0 */ + cr3 = kvm_read_cr3(vcpu); vcpu-arch.cr0 = cr0; vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); + vmx_set_cr3(vcpu, cr3); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] kvm tools: Add virtio-9p
On 05/26/2011 07:36 AM, Sasha Levin wrote: On Thu, 2011-05-26 at 07:28 -0700, Venkateswararao Jujjuri wrote: Any progress on this? May I get more detailed instructions on how you did this trick? Basically booting on 9P/VirtIO. Thanks, JV Ofcourse. This change didn't go into tools/kvm/ since we only support the legacy 9p2000 protocol at the moment, which means that even though we can boot - it's quite unusable to work with. The trick is pretty simple: You need to name your virtio transport /dev/root (I think it's currently named local in qemu). Once it's named this way, boot with the following kernel cmdline added: root=/dev/root rootflags=rw,trans=virtio,version=9p2000 rootfstype=9p rw (You should be able to change version to one of the 9p2000 extensions). Ah I guess you are making use of rootfstype. So in this setup basically the virtio transport you create is /dev/root instead of kvm_9p correct? Also your dir will be / ? i.e Start KVM with '--virtio-9p /'. ? I've noticed that the transport *has* to be named /dev/root, naming it something else (and adjusting the root= parameter) doesn't seem to work. Also, if it's named /dev/root I couldn't mount it as a simple filesystem from within a guest - not as root. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Restoring saved guest causes guest to reboot
On Thu, May 26, 2011 at 05:20:32PM +0200, Markus Schade wrote: On 05/26/2011 01:28 PM, Markus Schade wrote: On 05/26/2011 08:44 AM, Avi Kivity wrote: On 05/25/2011 09:49 AM, Markus Schade wrote: Git bisect tells me that this is the first bad commit: -%- aff48baa34c033318ad322ecbf2e4bcd891b29ca is the first bad commit Does your machine have ept? (cat /sys/module/kvm_intel/parameters/ept) Sure. (Assuming that the Y means yes). I am no C developer, but I was wondering, if the issue is related to the difference in ept_update_paging_mode_cr0 between original patch in the kvm git and the linux-2.6. git tree. I have re-added the missing 4 lines to ept_update_paging_mode_cr0 in vmx.c, which resolves this issue for in Kernel 2.6,37 and 2.6.39. I haven't tested all guests, but neither Squeeze nor 2008 R2 reboot anymore. Markus static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -1921,6 +1940,8 @@ unsigned long cr0, struct kvm_vcpu *vcpu) { + ulong cr3; + vmx_decache_cr3(vcpu); if (!(cr0 X86_CR0_PG)) { /* From paging/starting to nonpaging */ @@ -1936,8 +1957,11 @@ vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING)); + /* Must fetch cr3 before updating cr0 */ + cr3 = kvm_read_cr3(vcpu); vcpu-arch.cr0 = cr0; vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); + vmx_set_cr3(vcpu, cr3); Hmm, not 100% sure, but the issue might be that the arch.cr3 value does not make it into the vmcs after cr3 was changed from user-space? This would also be fixed with the change above. Joerg -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V5 2/6 net-next] netdevice.h: Add zero-copy flag in netdevice
On Thu, 2011-05-26 at 11:49 +0300, Michael S. Tsirkin wrote: On Wed, May 25, 2011 at 03:49:40PM -0700, Shirley Ma wrote: On Fri, 2011-05-20 at 02:41 +0300, Michael S. Tsirkin wrote: So the requirements are - data must be released in a timely fashion (e.g. unlike virtio-net tun or bridge) The current patch doesn't enable tun zero-copy. tun will copy data It's not an issue now. We can disallow macvtap attach to bridge when zero-copy is enabled. Attach macvtap to a tun device though. Or e.g. veth device ... So there should be so generic way to disable zerocopy. It can either be a whitelist or a blacklist. - SG support - HIGHDMA support (on arches where this makes sense) This can be checked by device flags. OK, but pls note that SG can get turned off dynamically. - no filtering based on data (data is mapped in guest) - on fast path no calls to skb_copy, skb_clone, pskb_copy, pskb_expand_head as these are slow Any calls to skb_copy, skb_clone, pskb_copy, pskb_expand_head will do a copy. The performance should be the same as none zero-copy case before. I'm guessing a copy is cheaper than get_user_pages+copy+put_page. But maybe not by much. Care checking that? That's I have done already. Patch is going out for review. I have done/tested the patch V6, will send it out for review tomorrow. I am looking at where there are some cases, skb remains the same for filtering. To reliably filter on data I think we'll need to copy it first, otherwise guest can change it. Most filters only look at the header though. First 2 requirements are a must, all other requirements are just dependencies to make sure zero copy will be faster than non zero copy. Using a new feature bit is probably the simplest approach to this. macvtap on top of most physical NICs most likely works correctly so it seems a bit more work than it needs to be, but it's also the safest one I think ... For macvtap/vhost zero-copy we can use SG HIGHDMA to enable it, it looks safe to me once patching skb_copy, skb_clone, pskb_copy, pskb_expand_head. To extend zero-copy in other usages, we can have a new feature bit later. Is that reasonable? Thanks Shirley Is the problem is extra work needed to extend feature bits? There is no problem to use it, Mahesh is working on this patch. I just want to remove macvtap/vhost zero-copy patch dependency. Thanks Shirley -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PERF RESULTS] virtio and vhost-net performance enhancements
Michael S. Tsirkin m...@redhat.com wrote on 05/20/2011 04:40:07 AM: OK, here is the large patchset that implements the virtio spec update that I sent earlier (the spec itself needs a minor update, will send that out too next week, but I think we are on the same page here already). It supercedes the PUBLISH_USED_IDX patches I sent out earlier. I was able to get this tested by applying the v2 patches to git-next tree (somehow MST's git tree hung on my guest which never got resolved). Testing was from Guest - Remote node, using an ixgbe 10g card. The test results are *excellent* (table: #netperf sesssions, BW% improvement, SD% improvement, CPU% improvement): ___ 512 byte I/O # BW% SD% CPU% 1 151.6 -65.1-10.7 2 180.6 -66.6-6.4 4 15.5-35.8-26.1 8 1.8 -28.4-26.7 163.1 -29.0-26.5 321.1 -27.4-27.5 643.8 -30.9-26.7 965.4 -21.7-24.2 128 5.7 -24.4-25.5 BW: 16.6% SD: -24.6%CPU: -25.5% 1K I/O # BW% SD% CPU% 1 233.9 -76.5-18.0 2 112.2 -64.0-23.2 4 9.2 -31.6-26.1 8-1.7 -26.8-30.3 163.5 -31.5-30.6 324.8 -25.2-30.5 645.7 -31.0-28.9 965.3 -32.2-31.7 128 4.6 -38.2-33.6 BW: 16.4% SD: -35.%CPU: -31.5% 16K I/O # BW% SD% CPU% 1 18.8-27.2-18.3 2 14.8-36.7-27.7 4 12.7-45.2-38.1 8 4.4 -56.4-54.4 164.8 -38.3-36.1 32078.0 79.2 643.8 -38.1-37.5 967.3 -35.2-31.1 128 3.4 -31.1-32.1 BW: 7.6% SD: -30.1% CPU: -23.7% I plan to run some more tests tomorrow. Please let me know if any other scenario will help. Thanks, - KK -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] kvm tools: Add virtio-9p
On Thu, 2011-05-26 at 08:22 -0700, Venkateswararao Jujjuri wrote: On 05/26/2011 07:36 AM, Sasha Levin wrote: On Thu, 2011-05-26 at 07:28 -0700, Venkateswararao Jujjuri wrote: Any progress on this? May I get more detailed instructions on how you did this trick? Basically booting on 9P/VirtIO. Thanks, JV Ofcourse. This change didn't go into tools/kvm/ since we only support the legacy 9p2000 protocol at the moment, which means that even though we can boot - it's quite unusable to work with. The trick is pretty simple: You need to name your virtio transport /dev/root (I think it's currently named local in qemu). Once it's named this way, boot with the following kernel cmdline added: root=/dev/root rootflags=rw,trans=virtio,version=9p2000 rootfstype=9p rw (You should be able to change version to one of the 9p2000 extensions). Ah I guess you are making use of rootfstype. So in this setup basically the virtio transport you create is /dev/root instead of kvm_9p correct? Yes, exactly. Also your dir will be / ? i.e Start KVM with '--virtio-9p /'. ? It can be '/', but I'd suggest against trying to boot your hosts root device within a guest (unless in RO mode maybe). I've mounted a disk image to some directory on the host and booted that directory for testing. I've noticed that the transport *has* to be named /dev/root, naming it something else (and adjusting the root= parameter) doesn't seem to work. Also, if it's named /dev/root I couldn't mount it as a simple filesystem from within a guest - not as root. -- Sasha. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 6/6] kvm tools: Protect IOPORT tree by rwsem
On Thu, 26 May 2011, Sasha Levin wrote: Makes ioport thread-safe. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/ioport.c |7 +++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c index 1f13960..db9ff0f 100644 --- a/tools/kvm/ioport.c +++ b/tools/kvm/ioport.c @@ -3,6 +3,7 @@ #include kvm/kvm.h #include kvm/util.h #include kvm/rbtree-interval.h +#include kvm/rwsem.h #include linux/kvm.h/* for KVM_EXIT_* */ #include linux/types.h @@ -22,6 +23,7 @@ struct ioport_entry { static struct rb_root ioport_tree = RB_ROOT; bool ioport_debug; +static DECLARE_RWSEM(ioport_tree_sem); Why do we need a new lock here? Can't we reuse the new ioport_mutex? Pekka -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] kvm tools: Add rwlock wrapper
On Thu, 26 May 2011, Sasha Levin wrote: Adds a rwlock wrapper which like the mutex wrapper makes rwlock calls similar to their kernel counterparts. Signed-off-by: Sasha Levin levinsasha...@gmail.com There's no explanation why a mutex isn't sufficient. The pthread locking primitives aren't all that great in practice so unless you have some correctness issue that requires a rwlock or some numbers, I'd prefer you go for a mutex. Pekka -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PERF RESULTS] virtio and vhost-net performance enhancements
Shirley Ma x...@us.ibm.com wrote on 05/26/2011 09:12:22 PM: Could you please try TCP_RRs as well? Right. Here's the result for TCP_RR: __ # RR% SD% CPU% __ 1 4.5 -31.4-27.9 2 5.1 -9.7 -5.4 4 60.4 -13.4 38.8 8 67.8 -13.5 45.0 16 55.8 -8.0 43.2 32 66.9 -14.1 43.3 64 47.2 -23.7 12.2 96 29.7 -11.8 14.3 1288.0 2.2 10.7 ___ BW: 37.3% SD: -6.7% CPU: 15.7% ___ Thanks, - KK -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] kvm tools: Add rwlock wrapper
On Thu, 2011-05-26 at 19:02 +0300, Pekka Enberg wrote: On Thu, 26 May 2011, Sasha Levin wrote: Adds a rwlock wrapper which like the mutex wrapper makes rwlock calls similar to their kernel counterparts. Signed-off-by: Sasha Levin levinsasha...@gmail.com There's no explanation why a mutex isn't sufficient. The pthread locking primitives aren't all that great in practice so unless you have some correctness issue that requires a rwlock or some numbers, I'd prefer you go for a mutex. I've added some rwlocks because of what Ingo said yesterday about adding/removing devices after the first initialization phase. Take MMIO lock for example: Since we can now run SMP guests, we may have multiple MMIO exits (one from each VCPU thread). Each of those exits leads to searching the MMIO rbtree. We can use a mutex to lock it, but it just means that those threads will be blocked there instead of concurrently searching the MMIO tree which makes the search linear instead of parallel. It's hard to bring 'real' numbers at this stage because the only 'real' device we have which uses MMIO is the VESA driver, and we can't really simulate many VCPUs writing to it :) -- Sasha. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 6/6] kvm tools: Protect IOPORT tree by rwsem
On Thu, 2011-05-26 at 19:01 +0300, Pekka Enberg wrote: On Thu, 26 May 2011, Sasha Levin wrote: Makes ioport thread-safe. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/ioport.c |7 +++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c index 1f13960..db9ff0f 100644 --- a/tools/kvm/ioport.c +++ b/tools/kvm/ioport.c @@ -3,6 +3,7 @@ #include kvm/kvm.h #include kvm/util.h #include kvm/rbtree-interval.h +#include kvm/rwsem.h #include linux/kvm.h /* for KVM_EXIT_* */ #include linux/types.h @@ -22,6 +23,7 @@ struct ioport_entry { static struct rb_root ioport_tree = RB_ROOT; bool ioport_debug; +static DECLARE_RWSEM(ioport_tree_sem); Why do we need a new lock here? Can't we reuse the new ioport_mutex? ioport_mutex is used for allocations of ioports to devices, this lock is intended to protect the ioport tree from being read while new devices are added. -- Sasha. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PERF RESULTS] virtio and vhost-net performance enhancements
Krishna Kumar2/India/IBM wrote on 05/26/2011 09:51:32 PM: Could you please try TCP_RRs as well? Right. Here's the result for TCP_RR: The actual transaction rate/second numbers are: _ # RR1 RR2 (%) SD1SD2 (%) _ 1 9476 9903 (4.5) 28.9 19.8 (-31.4) 2 1733718225 (5.1) 92.7 83.7 (-9.7) 4 1738527902 (60.4) 364.8 315.8 (-13.4) 8 2556042912 (67.8) 1428.1 1234.0 (-13.5) 163589855934 (55.8) 4391.6 4038.1 (-8.0) 324804880228 (66.9) 17391.414932.0 (-14.1) 646041288929 (47.2) 71087.754230.1 (-23.7) 967126392439 (29.7) 145434.1 128214.0 (-11.8) 128 8420891014 (8.0) 233668.2 23.6 (2.2) _ RR: 37.3% SD: -6.7% _ Thanks, - KK -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] kvm tools: Add rwlock wrapper
* Sasha Levin levinsasha...@gmail.com wrote: On Thu, 2011-05-26 at 19:02 +0300, Pekka Enberg wrote: On Thu, 26 May 2011, Sasha Levin wrote: Adds a rwlock wrapper which like the mutex wrapper makes rwlock calls similar to their kernel counterparts. Signed-off-by: Sasha Levin levinsasha...@gmail.com There's no explanation why a mutex isn't sufficient. The pthread locking primitives aren't all that great in practice so unless you have some correctness issue that requires a rwlock or some numbers, I'd prefer you go for a mutex. I've added some rwlocks because of what Ingo said yesterday about adding/removing devices after the first initialization phase. Take MMIO lock for example: Since we can now run SMP guests, we may have multiple MMIO exits (one from each VCPU thread). Each of those exits leads to searching the MMIO rbtree. We can use a mutex to lock it, but it just means that those threads will be blocked there instead of concurrently searching the MMIO tree which makes the search linear instead of parallel. It's hard to bring 'real' numbers at this stage because the only 'real' device we have which uses MMIO is the VESA driver, and we can't really simulate many VCPUs writing to it :) I'd suggest keeping it simple first - rwlocks are nasty and will bounce a cacheline just as much. If lookup scalability is an issue we can extend RCU to tools/kvm/. Thanks, Ingo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] kvm tools: Add rwlock wrapper
On 05/26/2011 09:05 PM, Ingo Molnar wrote: I've added some rwlocks because of what Ingo said yesterday about adding/removing devices after the first initialization phase. Take MMIO lock for example: Since we can now run SMP guests, we may have multiple MMIO exits (one from each VCPU thread). Each of those exits leads to searching the MMIO rbtree. We can use a mutex to lock it, but it just means that those threads will be blocked there instead of concurrently searching the MMIO tree which makes the search linear instead of parallel. It's hard to bring 'real' numbers at this stage because the only 'real' device we have which uses MMIO is the VESA driver, and we can't really simulate many VCPUs writing to it :) I'd suggest keeping it simple first - rwlocks are nasty and will bounce a cacheline just as much. Well, this is the first case where tools/kvm can do better than qemu with its global lock, so I think it's worth it. If lookup scalability is an issue we can extend RCU to tools/kvm/. Definitely rcu is a perfect patch for mmio dispatch. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] kvm tools: Add rwlock wrapper
On Thu, May 26, 2011 at 9:11 PM, Avi Kivity a...@redhat.com wrote: On 05/26/2011 09:05 PM, Ingo Molnar wrote: I've added some rwlocks because of what Ingo said yesterday about adding/removing devices after the first initialization phase. Take MMIO lock for example: Since we can now run SMP guests, we may have multiple MMIO exits (one from each VCPU thread). Each of those exits leads to searching the MMIO rbtree. We can use a mutex to lock it, but it just means that those threads will be blocked there instead of concurrently searching the MMIO tree which makes the search linear instead of parallel. It's hard to bring 'real' numbers at this stage because the only 'real' device we have which uses MMIO is the VESA driver, and we can't really simulate many VCPUs writing to it :) I'd suggest keeping it simple first - rwlocks are nasty and will bounce a cacheline just as much. Well, this is the first case where tools/kvm can do better than qemu with its global lock, so I think it's worth it. If lookup scalability is an issue we can extend RCU to tools/kvm/. Definitely rcu is a perfect patch for mmio dispatch. Userspace RCU code is here, Sasha, if you feel like tackling this: http://lttng.org/urcu :-) I'm CC'ing Paul and Mathieu as well for urcu. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting
In this case I am very puzzled :) well, so am I :) but at least, I now know there seems to be problem with disk access. I'll ask some of our windows guys to try some failsave mode or something and find out what is going on... I'll report when I know something. thanks for Your help! n. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- - Ing. Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax:+420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: ser...@linuxbox.cz - pgp9091LC9pNf.pgp Description: PGP signature
Re: [PATCH 4/6] kvm tools: Add rwlock wrapper
On Thu, 2011-05-26 at 21:21 +0300, Pekka Enberg wrote: On Thu, May 26, 2011 at 9:11 PM, Avi Kivity a...@redhat.com wrote: On 05/26/2011 09:05 PM, Ingo Molnar wrote: I've added some rwlocks because of what Ingo said yesterday about adding/removing devices after the first initialization phase. Take MMIO lock for example: Since we can now run SMP guests, we may have multiple MMIO exits (one from each VCPU thread). Each of those exits leads to searching the MMIO rbtree. We can use a mutex to lock it, but it just means that those threads will be blocked there instead of concurrently searching the MMIO tree which makes the search linear instead of parallel. It's hard to bring 'real' numbers at this stage because the only 'real' device we have which uses MMIO is the VESA driver, and we can't really simulate many VCPUs writing to it :) I'd suggest keeping it simple first - rwlocks are nasty and will bounce a cacheline just as much. Well, this is the first case where tools/kvm can do better than qemu with its global lock, so I think it's worth it. If lookup scalability is an issue we can extend RCU to tools/kvm/. Definitely rcu is a perfect patch for mmio dispatch. Userspace RCU code is here, Sasha, if you feel like tackling this: http://lttng.org/urcu :-) I'm CC'ing Paul and Mathieu as well for urcu. Sounds good! Should be quite an addition and could be used in more places than just the MMIO dispatcher. -- Sasha. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
ioeventfd question
Hi Avi, I'm working on adding ioeventfd support into tools/kvm/. Currently the implementation creates ioeventfd entries at the 'VIRTIO_PCI_QUEUE_NOTIFY' of each device and waits on all of them using epoll(). The basics are working - when IO is triggered I receive a notification using the event instead of an exit. I couldn't find a way to retrieve the value written to the PIO port - the guest memory at that location doesn't change, 'reading' the event just returns 1 (as expected) and I couldn't find anything else which might suggest what value was written to the PIO port. This is an issue with devices which have multiple virtio-rings, such as virtio-net. -- Sasha. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V5 2/6 net-next] netdevice.h: Add zero-copy flag in netdevice
On Thu, 2011-05-26 at 11:49 +0300, Michael S. Tsirkin wrote: - SG support - HIGHDMA support (on arches where this makes sense) This can be checked by device flags. OK, but pls note that SG can get turned off dynamically. Tested the patch w/i SG dynmically on/off and tcpdump suspended. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V6 0/4]
This patchset add supports for TX zero-copy between guest and host kernel through vhost. It significantly reduces CPU utilization on the local host on which the guest is located (It reduced about 50% CPU usage for single stream test on the host, while 4K message size BW has increased about 50%). The patchset is based on previous submission and comments from the community regarding when/how to handle guest kernel buffers to be released. This is the simplest approach I can think of after comparing with several other solutions. This patchset has integrated V3 review comments from community: 1. Add more comments on how to use device ZEROCOPY flag; 2. Change device ZEROCOPY to available bit 31 3. Fix skb header linear allocation when virtio_net GSO is not enabled It has integrated V4 review comments from MST and Sridhar: 1. In vhost, using socket poll wake up for outstanding DMAs 2. Add detailed comments for vhost_zerocopy_signal_used call 3. Add sleep in vhost shutting down instead of busy-wait for outstanding DMAs. 4. Copy small packets, don't do zero-copy callback in mavtap, mark it's DMA done in vhost 5. change zerocopy to bool in macvtap. It integrates V5 review comments from MST and Michał Mirosław mir...@gmail.com 1. Prevent userspace apps from holding skb userspace buffers by copying userspace buffers to kernel in skb_clone, skb_copy, pskb_copy, pskb_expand_head. 2. It is also used HIGHDMA, SG feature bits to enable ZEROCOPY to remove the dependency of a new feature bit, we can add it later when new feature bit is available. This patchset includes: 1/4: Add a new sock zero-copy flag, SOCK_ZEROCOPY; 2/4: Add a new struct skb_ubuf_info in skb_share_info for userspace buffers release callback when lower device DMA has done for that skb, which is the last reference count gone; Or whenever skb_clone, skb_copy, pskb_copy, pskb_expand_head get call from tcpdump, filtering, these userspace buffers will be copied into kernel ... we don't want userspace apps to hold userspace buffers too long. 3/4: Add vhost zero-copy callback in vhost when skb last refcnt is gone; add vhost_zerocopy_signal_used to notify guest to release TX skb buffers. 4/4: Add macvtap zero-copy in lower device when sending packet is greater than 256 bytes. The patchset is built against most recent net-next linux 2.6.39-rc7. It has passed netperf/netserver multiple streams stress test, tcpdump suspended test, dynamically SG change test. Single TCP_STREAM 120 secs test results over ixgbe 10Gb NIC results: Message BW(Gb/s)qemu-kvm (NumCPU)vhost-net(NumCPU) PerfTop irq/s 4K 7408.57 92.1% 22.6% 1229 4K(Orig)4913.17 118.1% 84.1% 2086 8K 9129.90 89.3% 23.3% 1141 8K(Orig)7094.55 115.9% 84.7% 2157 16K 9178.81 89.1% 23.3% 1139 16K(Orig)8927.1 118.7% 83.4% 2262 64K 9171.43 88.4% 24.9% 1253 64K(Orig)9085.85115.9% 82.4% 2229 For message size less or equal than 2K, there is a known KVM guest TX overrun issue. With this zero-copy patch, the issue becomes more severe, guest io_exits has tripled than before, so the performance is not good. Once the TX overrun problem has been addressed, I will retest the small message size performance. drivers/net/macvtap.c | 132 --- drivers/vhost/net.c| 44 +- drivers/vhost/vhost.c | 49 +++ drivers/vhost/vhost.h | 13 include/linux/netdevice.h | 10 +++ include/linux/skbuff.h | 26 include/net/sock.h |1 + net/core/skbuff.c | 81 - 8 files changed, 345 insertions(+), 17 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V6 1/4 net-next] sock.h: Add a new sock zero-copy flag
Signed-off-by: Shirley Ma x...@us.ibm.com --- include/net/sock.h |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 01810a3..ab09097 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -562,6 +562,7 @@ enum sock_flags { SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ SOCK_FASYNC, /* fasync() active */ SOCK_RXQ_OVFL, + SOCK_ZEROCOPY, /* buffers from userspace */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V6 2/4 net-next] skbuff: Add userspace zero-copy buffers in skb
This patch adds userspace buffers support in skb shared info. A new struct skb_ubuf_info is needed to maintain the userspace buffers argument and index, a callback is used to notify userspace to release the buffers once lower device has done DMA (Last reference to that skb has gone). If there is any userspace apps to reference these userspace buffers, then these userspaces buffers will be copied into kernel. This way we can prevent userspace apps to hold these userspace buffers too long. Signed-off-by: Shirley Ma x...@us.ibm.com --- include/linux/skbuff.h | 26 +++ net/core/skbuff.c | 80 ++- 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d0ae90a..025de5c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -189,6 +189,18 @@ enum { SKBTX_DRV_NEEDS_SK_REF = 1 3, }; +/* + * The callback notifies userspace to release buffers when skb DMA is done in + * lower device, the skb last reference should be 0 when calling this. + * The desc is used to track userspace buffer index. + */ +struct skb_ubuf_info { + /* support buffers allocation from userspace */ + void(*callback)(struct sk_buff *); + void*arg; + size_t desc; +}; + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb-end. */ @@ -211,6 +223,10 @@ struct skb_shared_info { /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ void * destructor_arg; + + /* DMA mapping from/to userspace buffers */ + struct skb_ubuf_info ubuf; + /* must be last field, see pskb_expand_head() */ skb_frag_t frags[MAX_SKB_FRAGS]; }; @@ -2261,5 +2277,15 @@ static inline void skb_checksum_none_assert(struct sk_buff *skb) } bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); + +/* + * skb_ubuf - is the buffer from userspace + * @skb: buffer to check + */ +static inline int skb_ubuf(const struct sk_buff *skb) +{ + return (skb_shinfo(skb)-ubuf.callback != NULL); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7ebeed0..890447c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -210,6 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(shinfo-dataref, 1); + shinfo-ubuf.callback = NULL; + shinfo-ubuf.arg = NULL; kmemcheck_annotate_variable(shinfo-destructor_arg); if (fclone) { @@ -328,6 +330,14 @@ static void skb_release_data(struct sk_buff *skb) put_page(skb_shinfo(skb)-frags[i].page); } + /* +* if skb buf is from userspace, we need to notify the caller +* the lower device DMA has done; +*/ + if (skb_ubuf(skb)) { + skb_shinfo(skb)-ubuf.callback(skb); + skb_shinfo(skb)-ubuf.callback = NULL; + } if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); @@ -480,6 +490,9 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size) if (irqs_disabled()) return false; + if (skb_ubuf(skb)) + return false; + if (skb_is_nonlinear(skb) || skb-fclone != SKB_FCLONE_UNAVAILABLE) return false; @@ -572,6 +585,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) atomic_set(n-users, 1); atomic_inc((skb_shinfo(skb)-dataref)); + skb_shinfo(skb)-ubuf.callback = NULL; skb-cloned = 1; return n; @@ -595,6 +609,48 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); +/* skb frags copy userspace buffers to kernel */ +static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) +{ + int i; + int num_frags = skb_shinfo(skb)-nr_frags; + struct page *page, *head = NULL; + + for (i = 0; i num_frags; i++) { + u8 *vaddr; + skb_frag_t *f = skb_shinfo(skb)-frags[i]; + + page = alloc_page(GFP_ATOMIC); + if (!page) { + while (head) { + put_page(head); + head = (struct page *)head-private; + } + return -ENOMEM; + } + vaddr = kmap_skb_frag(skb_shinfo(skb)-frags[i]); + memcpy(page_address(page), vaddr + f-page_offset, f-size); + kunmap_skb_frag(vaddr); + page-private = (unsigned long)head; +
[PATCH V6 3/4]macvtap: macvtap TX zero-copy support
Only when buffer size is greater than GOODCOPY_LEN (256), macvtap enables zero-copy. Signed-off-by: Shirley Ma x...@us.ibm.com --- drivers/net/macvtap.c | 132 1 files changed, 121 insertions(+), 11 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 6696e56..97ad224 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -60,6 +60,7 @@ static struct proto macvtap_proto = { */ static dev_t macvtap_major; #define MACVTAP_NUM_DEVS 65536 +#define GOODCOPY_LEN 256 static struct class *macvtap_class; static struct cdev macvtap_cdev; @@ -340,6 +341,7 @@ static int macvtap_open(struct inode *inode, struct file *file) { struct net *net = current-nsproxy-net_ns; struct net_device *dev = dev_get_by_index(net, iminor(inode)); + struct macvlan_dev *vlan = netdev_priv(dev); struct macvtap_queue *q; int err; @@ -369,6 +371,16 @@ static int macvtap_open(struct inode *inode, struct file *file) q-flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; q-vnet_hdr_sz = sizeof(struct virtio_net_hdr); + /* +* so far only KVM virtio_net uses macvtap, enable zero copy between +* guest kernel and host kernel when lower device supports highdma +* and sg +*/ + if (vlan) { + if (vlan-lowerdev-features (NETIF_F_HIGHDMA | NETIF_F_SG)) + sock_set_flag(q-sk, SOCK_ZEROCOPY); + } + err = macvtap_set_queue(dev, file, q); if (err) sock_put(q-sk); @@ -433,6 +445,80 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, return skb; } +/* set skb frags from iovec, this can move to core network code for reuse */ +static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, + int offset, size_t count) +{ + int len = iov_length(from, count) - offset; + int copy = skb_headlen(skb); + int size, offset1 = 0; + int i = 0; + skb_frag_t *f; + + /* Skip over from offset */ + while (count (offset = from-iov_len)) { + offset -= from-iov_len; + ++from; + --count; + } + + /* copy up to skb headlen */ + while (count (copy 0)) { + size = min_t(unsigned int, copy, from-iov_len - offset); + if (copy_from_user(skb-data + offset1, from-iov_base + offset, + size)) + return -EFAULT; + if (copy size) { + ++from; + --count; + } + copy -= size; + offset1 += size; + offset = 0; + } + + if (len == offset1) + return 0; + + while (count--) { + struct page *page[MAX_SKB_FRAGS]; + int num_pages; + unsigned long base; + + len = from-iov_len - offset1; + if (!len) { + offset1 = 0; + ++from; + continue; + } + base = (unsigned long)from-iov_base + offset1; + size = ((base ~PAGE_MASK) + len + ~PAGE_MASK) PAGE_SHIFT; + num_pages = get_user_pages_fast(base, size, 0, page[i]); + if ((num_pages != size) || + (num_pages MAX_SKB_FRAGS - skb_shinfo(skb)-nr_frags)) + /* put_page is in skb free */ + return -EFAULT; + skb-data_len += len; + skb-len += len; + skb-truesize += len; + atomic_add(len, skb-sk-sk_wmem_alloc); + while (len) { + f = skb_shinfo(skb)-frags[i]; + f-page = page[i]; + f-page_offset = base ~PAGE_MASK; + f-size = min_t(int, len, PAGE_SIZE - f-page_offset); + skb_shinfo(skb)-nr_frags++; + /* increase sk_wmem_alloc */ + base += f-size; + len -= f-size; + i++; + } + offset1 = 0; + ++from; + } + return 0; +} + /* * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should * be shared with the tun/tap driver. @@ -515,16 +601,18 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, /* Get packet from user space buffer */ -static ssize_t macvtap_get_user(struct macvtap_queue *q, - const struct iovec *iv, size_t count, - int noblock) +static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, + const struct iovec *iv, unsigned long total_len, + size_t
[PATCH V6 4/4 net-next] vhost: vhost TX zero-copy support
Hello Michael, Let me anything I might miss from your preview's review. Thanks Shirley --- This patch maintains the outstanding userspace buffers in the sequence it is delivered to vhost. The outstanding userspace buffers will be marked as done once the lower device buffers DMA has finished. This is monitored through last reference of kfree_skb callback. Two buffer index are used for this purpose. The vhost passes the userspace buffers info to lower device skb through message control. Since there will be some done DMAs when entering vhost handle_tx. The worse case is all buffers in the vq are in pending/done status, so we need to notify guest to release DMA done buffers first before get any new buffers from the vq. Signed-off-by: Shirley x...@us.ibm.com --- drivers/vhost/net.c | 44 +++- drivers/vhost/vhost.c | 49 + drivers/vhost/vhost.h | 13 + 3 files changed, 105 insertions(+), 1 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2f7c76a..b27ba64 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -32,6 +32,10 @@ * Using this limit prevents one virtqueue from starving others. */ #define VHOST_NET_WEIGHT 0x8 +/* MAX number of TX used buffers for outstanding zerocopy */ +#define VHOST_MAX_PEND 128 +#define VHOST_GOODCOPY_LEN 256 + enum { VHOST_NET_VQ_RX = 0, VHOST_NET_VQ_TX = 1, @@ -129,6 +133,7 @@ static void handle_tx(struct vhost_net *net) int err, wmem; size_t hdr_size; struct socket *sock; + struct skb_ubuf_info pend; /* TODO: check that we are running from vhost_worker? */ sock = rcu_dereference_check(vq-private_data, 1); @@ -151,6 +156,10 @@ static void handle_tx(struct vhost_net *net) hdr_size = vq-vhost_hlen; for (;;) { + /* Release DMAs done buffers first */ + if (atomic_read(vq-refcnt) VHOST_MAX_PEND) + vhost_zerocopy_signal_used(vq, false); + head = vhost_get_vq_desc(net-dev, vq, vq-iov, ARRAY_SIZE(vq-iov), out, in, @@ -166,6 +175,12 @@ static void handle_tx(struct vhost_net *net) set_bit(SOCK_ASYNC_NOSPACE, sock-flags); break; } + /* If more outstanding DMAs, queue the work */ + if (atomic_read(vq-refcnt) VHOST_MAX_PEND) { + tx_poll_start(net, sock); + set_bit(SOCK_ASYNC_NOSPACE, sock-flags); + break; + } if (unlikely(vhost_enable_notify(vq))) { vhost_disable_notify(vq); continue; @@ -188,6 +203,24 @@ static void handle_tx(struct vhost_net *net) iov_length(vq-hdr, s), hdr_size); break; } + /* use msg_control to pass vhost zerocopy ubuf info to skb */ + if (sock_flag(sock-sk, SOCK_ZEROCOPY)) { + vq-heads[vq-upend_idx].id = head; + if (len VHOST_GOODCOPY_LEN) + /* copy don't need to wait for DMA done */ + vq-heads[vq-upend_idx].len = + VHOST_DMA_DONE_LEN; + else { + vq-heads[vq-upend_idx].len = len; + pend.callback = vhost_zerocopy_callback; + pend.arg = vq; + pend.desc = vq-upend_idx; + msg.msg_control = pend; + msg.msg_controllen = sizeof(pend); + } + atomic_inc(vq-refcnt); + vq-upend_idx = (vq-upend_idx + 1) % UIO_MAXIOV; + } /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock-ops-sendmsg(NULL, sock, msg, len); if (unlikely(err 0)) { @@ -198,12 +231,21 @@ static void handle_tx(struct vhost_net *net) if (err != len) pr_debug(Truncated TX packet: len %d != %zd\n, err, len); - vhost_add_used_and_signal(net-dev, vq, head, 0); + if (!sock_flag(sock-sk, SOCK_ZEROCOPY)) + vhost_add_used_and_signal(net-dev, vq, head, 0); total_len += len; if (unlikely(total_len = VHOST_NET_WEIGHT)) { vhost_poll_queue(vq-poll); break; } + /* if upend_idx is full,