Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Avi Kivity

On 05/25/2011 09:18 PM, Marcelo Tosatti wrote:

Commit fa4491a6b667304 moved the permission check for io instructions
to the -check_perm callback. It failed to copy the port value from RDX
register for string and in,out ax,dx instructions.

Fix it by reading RDX register at decode stage when appropriate.

Fixes FC8.32 installation.

+#define Sse (118) /* SSE Vector instruction */


19/20 are still available, no need to go 64-bit just yet.


  /* Misc flags */
-#define Prot(121) /* instruction generates #UD if not in prot-mode */

+   case SrcDX:
+   c-src.type = OP_REG;
+   c-src.bytes = c-op_bytes;


Needs to be 2.  Otherwise we'll see extra bits from edx, or lose bits 
from dx if it's a 1-byte instruction.



+   c-src.addr.reg =c-regs[VCPU_REGS_RDX];
+   fetch_register_operand(c-src);
+   break;
}

if (rc != X86EMUL_CONTINUE)
@@ -3649,6 +3657,12 @@ done_prefixes:
c-dst.addr.mem.seg = VCPU_SREG_ES;
c-dst.val = 0;
break;
+   case DstDX:
+   c-dst.type = OP_REG;
+   c-dst.bytes = c-op_bytes;


2 again.


+   c-dst.addr.reg =c-regs[VCPU_REGS_RDX];
+   fetch_register_operand(c-dst);
+   break;
case ImplicitOps:
/* Special instructions do their own operand decoding. */
default:


We also need to unify Src/Dst decode eventually.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks

2011-05-26 Thread Sasha Levin
Allow specifying an optional parameter when registering an
ioport range. The callback functions provided by the registering
module will be called with the same parameter.

This may be used to keep context during callbacks on IO operations.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |3 ++
 tools/kvm/ioport.c |   54 +--
 2 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 8253938..2a8d74d 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -25,11 +25,14 @@ struct kvm;
 struct ioport_operations {
bool (*io_in)(struct kvm *kvm, u16 port, void *data, int size, u32 
count);
bool (*io_out)(struct kvm *kvm, u16 port, void *data, int size, u32 
count);
+   bool (*io_in_param)(struct kvm *kvm, u16 port, void *data, int size, 
u32 count, void *param);
+   bool (*io_out_param)(struct kvm *kvm, u16 port, void *data, int size, 
u32 count, void *param);
 };
 
 void ioport__setup_legacy(void);
 
 void ioport__register(u16 port, struct ioport_operations *ops, int count);
+void ioport__register_param(u16 port, struct ioport_operations *ops, int 
count, void *param);
 
 static inline u8 ioport__read8(u8 *data)
 {
diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c
index 1f13960..159d089 100644
--- a/tools/kvm/ioport.c
+++ b/tools/kvm/ioport.c
@@ -18,6 +18,7 @@
 struct ioport_entry {
struct rb_int_node  node;
struct ioport_operations*ops;
+   void*param;
 };
 
 static struct rb_root ioport_tree = RB_ROOT;
@@ -89,6 +90,29 @@ void ioport__register(u16 port, struct ioport_operations 
*ops, int count)
ioport_insert(ioport_tree, entry);
 }
 
+void ioport__register_param(u16 port, struct ioport_operations *ops, int 
count, void *param)
+{
+   struct ioport_entry *entry;
+
+   entry = ioport_search(ioport_tree, port);
+   if (entry) {
+   pr_warning(ioport re-registered: %x, port);
+   rb_int_erase(ioport_tree, entry-node);
+   }
+
+   entry = malloc(sizeof(*entry));
+   if (entry == NULL)
+   die(Failed allocating new ioport entry);
+
+   *entry = (struct ioport_entry) {
+   .node   = RB_INT_INIT(port, port + count),
+   .ops= ops,
+   .param  = param,
+   };
+
+   ioport_insert(ioport_tree, entry);
+}
+
 static const char *to_direction(int direction)
 {
if (direction == KVM_EXIT_IO_IN)
@@ -105,30 +129,32 @@ static void ioport_error(u16 port, void *data, int 
direction, int size, u32 coun
 bool kvm__emulate_io(struct kvm *kvm, u16 port, void *data, int direction, int 
size, u32 count)
 {
struct ioport_operations *ops;
-   bool ret;
+   bool ret = false;
struct ioport_entry *entry;
+   void *param;
 
entry = ioport_search(ioport_tree, port);
if (!entry)
goto error;
 
-   ops = entry-ops;
+   ops = entry-ops;
+   param   = entry-param;
 
if (direction == KVM_EXIT_IO_IN) {
-   if (!ops-io_in)
-   goto error;
-
-   ret = ops-io_in(kvm, port, data, size, count);
-   if (!ret)
-   goto error;
+   if (!param  ops-io_in)
+   ret = ops-io_in(kvm, port, data, size, count);
+   if (param  ops-io_in_param)
+   ret = ops-io_in_param(kvm, port, data, size, count, 
param);
} else {
-   if (!ops-io_out)
-   goto error;
-
-   ret = ops-io_out(kvm, port, data, size, count);
-   if (!ret)
-   goto error;
+   if (!param  ops-io_out)
+   ret = ops-io_out(kvm, port, data, size, count);
+   if (param  ops-io_out_param)
+   ret = ops-io_out_param(kvm, port, data, size, count, 
param);
}
+
+   if (!ret)
+   goto error;
+
return true;
 error:
if (ioport_debug)
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/8] kvm tools: Add basic ioport dynamic allocation

2011-05-26 Thread Sasha Levin
Add a very simple allocation of ioports.

This prevents the need to coordinate ioports between different
modules.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |   11 +--
 tools/kvm/ioport.c |   36 ++--
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 2a8d74d..c500f1e 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -7,6 +7,9 @@
 
 /* some ports we reserve for own use */
 #define IOPORT_DBG 0xe0
+#define IOPORT_START   0x6200
+#define IOPORT_SIZE0x400
+
 #define IOPORT_VESA0xa200
 #define IOPORT_VESA_SIZE   256
 #define IOPORT_VIRTIO_P9   0xb200  /* Virtio 9P device */
@@ -20,6 +23,8 @@
 #define IOPORT_VIRTIO_RNG  0xf200  /* Virtio network device */
 #define IOPORT_VIRTIO_RNG_SIZE 256
 
+#define IOPORT_EMPTY   USHRT_MAX
+
 struct kvm;
 
 struct ioport_operations {
@@ -31,8 +36,10 @@ struct ioport_operations {
 
 void ioport__setup_legacy(void);
 
-void ioport__register(u16 port, struct ioport_operations *ops, int count);
-void ioport__register_param(u16 port, struct ioport_operations *ops, int 
count, void *param);
+u16 ioport__register(u16 port, struct ioport_operations *ops, int count);
+u16 ioport__register_param(u16 port, struct ioport_operations *ops, int count, 
void *param);
+
+u16 ioport__find_free_range(void);
 
 static inline u8 ioport__read8(u8 *data)
 {
diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c
index 159d089..b2a3272 100644
--- a/tools/kvm/ioport.c
+++ b/tools/kvm/ioport.c
@@ -3,6 +3,7 @@
 #include kvm/kvm.h
 #include kvm/util.h
 #include kvm/rbtree-interval.h
+#include kvm/mutex.h
 
 #include linux/kvm.h /* for KVM_EXIT_* */
 #include linux/types.h
@@ -21,9 +22,23 @@ struct ioport_entry {
void*param;
 };
 
+static u16 free_io_port_idx;
+DEFINE_MUTEX(free_io_port_idx_lock);
 static struct rb_root ioport_tree = RB_ROOT;
 bool ioport_debug;
 
+static u16 ioport__find_free_port(void)
+{
+   u16 free_port;
+
+   mutex_lock(free_io_port_idx_lock);
+   free_port = IOPORT_START + free_io_port_idx * IOPORT_SIZE;
+   free_io_port_idx++;
+   mutex_unlock(free_io_port_idx_lock);
+
+   return free_port;
+}
+
 static struct ioport_entry *ioport_search(struct rb_root *root, u64 addr)
 {
struct rb_int_node *node;
@@ -68,10 +83,13 @@ static struct ioport_operations dummy_write_only_ioport_ops 
= {
.io_out = dummy_io_out,
 };
 
-void ioport__register(u16 port, struct ioport_operations *ops, int count)
+u16 ioport__register(u16 port, struct ioport_operations *ops, int count)
 {
struct ioport_entry *entry;
 
+   if (port == IOPORT_EMPTY)
+   port = ioport__find_free_port();
+
entry = ioport_search(ioport_tree, port);
if (entry) {
pr_warning(ioport re-registered: %x, port);
@@ -88,12 +106,17 @@ void ioport__register(u16 port, struct ioport_operations 
*ops, int count)
};
 
ioport_insert(ioport_tree, entry);
+
+   return port;
 }
 
-void ioport__register_param(u16 port, struct ioport_operations *ops, int 
count, void *param)
+u16 ioport__register_param(u16 port, struct ioport_operations *ops, int count, 
void *param)
 {
struct ioport_entry *entry;
 
+   if (port == IOPORT_EMPTY)
+   port = ioport__find_free_port();
+
entry = ioport_search(ioport_tree, port);
if (entry) {
pr_warning(ioport re-registered: %x, port);
@@ -111,6 +134,8 @@ void ioport__register_param(u16 port, struct 
ioport_operations *ops, int count,
};
 
ioport_insert(ioport_tree, entry);
+
+   return port;
 }
 
 static const char *to_direction(int direction)
@@ -126,6 +151,13 @@ static void ioport_error(u16 port, void *data, int 
direction, int size, u32 coun
fprintf(stderr, IO error: %s port=%x, size=%d, count=%u\n, 
to_direction(direction), port, size, count);
 }
 
+u16 ioport__find_free_range(void)
+{
+   static u16 cur_loc;
+
+   return IOPORT_START + (cur_loc++ * IOPORT_SIZE);
+}
+
 bool kvm__emulate_io(struct kvm *kvm, u16 port, void *data, int direction, int 
size, u32 count)
 {
struct ioport_operations *ops;
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/8] kvm tools: Use ioport context to control blk devices

2011-05-26 Thread Sasha Levin
Since ioports now has the ability to pass context to its
callbacks, we can implement multiple blk devices more efficiently.

We can get a ptr to the 'current' blk dev on each ioport call, which
means that we don't need to keep track of the blk device allocation
and ioport distribution within the module.

The advantages are easier management of multiple blk devices and
removal of any hardcoded limits to the amount of possible blk
devices.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |2 -
 tools/kvm/virtio/blk.c |   75 ++--
 2 files changed, 26 insertions(+), 51 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index c500f1e..47f9fb5 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -14,8 +14,6 @@
 #define IOPORT_VESA_SIZE   256
 #define IOPORT_VIRTIO_P9   0xb200  /* Virtio 9P device */
 #define IOPORT_VIRTIO_P9_SIZE  256
-#define IOPORT_VIRTIO_BLK  0xc200  /* Virtio block device */
-#define IOPORT_VIRTIO_BLK_SIZE 0x200
 #define IOPORT_VIRTIO_CONSOLE  0xd200  /* Virtio console device */
 #define IOPORT_VIRTIO_CONSOLE_SIZE 256
 #define IOPORT_VIRTIO_NET  0xe200  /* Virtio network device */
diff --git a/tools/kvm/virtio/blk.c b/tools/kvm/virtio/blk.c
index 25ce61f..cb103fc 100644
--- a/tools/kvm/virtio/blk.c
+++ b/tools/kvm/virtio/blk.c
@@ -14,6 +14,7 @@
 #include linux/virtio_ring.h
 #include linux/virtio_blk.h
 
+#include linux/list.h
 #include linux/types.h
 #include pthread.h
 
@@ -34,15 +35,16 @@ struct blk_dev_job {
 
 struct blk_dev {
pthread_mutex_t mutex;
+   struct list_headlist;
 
struct virtio_blk_configblk_config;
struct disk_image   *disk;
+   u64 base_addr;
u32 host_features;
u32 guest_features;
u16 config_vector;
u8  status;
u8  isr;
-   u8  idx;
 
/* virtio queue */
u16 queue_selector;
@@ -52,7 +54,7 @@ struct blk_dev {
struct pci_device_headerpci_hdr;
 };
 
-static struct blk_dev *bdevs[VIRTIO_BLK_MAX_DEV];
+static LIST_HEAD(bdevs);
 
 static bool virtio_blk_dev_in(struct blk_dev *bdev, void *data, unsigned long 
offset, int size, u32 count)
 {
@@ -66,22 +68,14 @@ static bool virtio_blk_dev_in(struct blk_dev *bdev, void 
*data, unsigned long of
return true;
 }
 
-/* Translate port into device id + offset in that device addr space */
-static void virtio_blk_port2dev(u16 port, u16 base, u16 size, u16 *dev_idx, 
u16 *offset)
-{
-   *dev_idx= (port - base) / size;
-   *offset = port - (base + *dev_idx * size);
-}
-
-static bool virtio_blk_pci_io_in(struct kvm *kvm, u16 port, void *data, int 
size, u32 count)
+static bool virtio_blk_pci_io_in(struct kvm *kvm, u16 port, void *data, int 
size, u32 count, void *param)
 {
struct blk_dev *bdev;
-   u16 offset, dev_idx;
+   u16 offset;
bool ret = true;
 
-   virtio_blk_port2dev(port, IOPORT_VIRTIO_BLK, IOPORT_VIRTIO_BLK_SIZE, 
dev_idx, offset);
-
-   bdev = bdevs[dev_idx];
+   bdev= param;
+   offset  = port - bdev-base_addr;
 
mutex_lock(bdev-mutex);
 
@@ -178,15 +172,14 @@ static void virtio_blk_do_io(struct kvm *kvm, void *param)
virt_queue__trigger_irq(vq, bdev-pci_hdr.irq_line, bdev-isr, kvm);
 }
 
-static bool virtio_blk_pci_io_out(struct kvm *kvm, u16 port, void *data, int 
size, u32 count)
+static bool virtio_blk_pci_io_out(struct kvm *kvm, u16 port, void *data, int 
size, u32 count, void *param)
 {
struct blk_dev *bdev;
-   u16 offset, dev_idx;
+   u16 offset;
bool ret = true;
 
-   virtio_blk_port2dev(port, IOPORT_VIRTIO_BLK, IOPORT_VIRTIO_BLK_SIZE, 
dev_idx, offset);
-
-   bdev = bdevs[dev_idx];
+   bdev= param;
+   offset  = port - bdev-base_addr;
 
mutex_lock(bdev-mutex);
 
@@ -246,48 +239,29 @@ static bool virtio_blk_pci_io_out(struct kvm *kvm, u16 
port, void *data, int siz
 }
 
 static struct ioport_operations virtio_blk_io_ops = {
-   .io_in  = virtio_blk_pci_io_in,
-   .io_out = virtio_blk_pci_io_out,
+   .io_in_param= virtio_blk_pci_io_in,
+   .io_out_param   = virtio_blk_pci_io_out,
 };
 
-static int virtio_blk_find_empty_dev(void)
-{
-   int i;
-
-   for (i = 0; i  VIRTIO_BLK_MAX_DEV; i++) {
-   if (bdevs[i] == NULL)
-   return i;
-   }
-
-   return -1;
-}
-
 void virtio_blk__init(struct kvm *kvm, struct disk_image *disk)
 {
u16 blk_dev_base_addr;
u8 dev, pin, line;

[PATCH v2 5/8] kvm tools: Use dynamic IO port allocation in vesa driver

2011-05-26 Thread Sasha Levin
Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/hw/vesa.c|7 +++
 tools/kvm/include/kvm/ioport.h |2 --
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/tools/kvm/hw/vesa.c b/tools/kvm/hw/vesa.c
index 6ab07ee..9315510 100644
--- a/tools/kvm/hw/vesa.c
+++ b/tools/kvm/hw/vesa.c
@@ -49,7 +49,6 @@ static struct pci_device_header vesa_pci_device = {
.class  = 0x03,
.subsys_vendor_id   = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
.subsys_id  = PCI_SUBSYSTEM_ID_VESA,
-   .bar[0] = IOPORT_VESA   | PCI_BASE_ADDRESS_SPACE_IO,
.bar[1] = VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY,
 };
 
@@ -66,17 +65,17 @@ void vesa__init(struct kvm *kvm)
 {
u8 dev, line, pin;
pthread_t thread;
+   u16 vesa_base_addr;
 
if (irq__register_device(PCI_DEVICE_ID_VESA, dev, pin, line)  0)
return;
 
vesa_pci_device.irq_pin = pin;
vesa_pci_device.irq_line= line;
-
+   vesa_base_addr  = ioport__register(IOPORT_EMPTY, 
vesa_io_ops, IOPORT_SIZE);
+   vesa_pci_device.bar[0]  = vesa_base_addr | 
PCI_BASE_ADDRESS_SPACE_IO;
pci__register(vesa_pci_device, dev);
 
-   ioport__register(IOPORT_VESA, vesa_io_ops, IOPORT_VESA_SIZE);
-
kvm__register_mmio(VESA_MEM_ADDR, VESA_MEM_SIZE, vesa_mmio_callback);
 
pthread_create(thread, NULL, vesa__dovnc, kvm);
diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index ffa6893..5dee9d2 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -10,8 +10,6 @@
 #define IOPORT_START   0x6200
 #define IOPORT_SIZE0x400
 
-#define IOPORT_VESA0xa200
-#define IOPORT_VESA_SIZE   256
 #define IOPORT_VIRTIO_P9   0xb200  /* Virtio 9P device */
 #define IOPORT_VIRTIO_P9_SIZE  256
 #define IOPORT_VIRTIO_CONSOLE  0xd200  /* Virtio console device */
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 6/8] kvm tools: Use dynamic IO port allocation in 9p driver

2011-05-26 Thread Sasha Levin
Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |2 --
 tools/kvm/virtio/9p.c  |   12 +++-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 5dee9d2..a6bcc6a 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -10,8 +10,6 @@
 #define IOPORT_START   0x6200
 #define IOPORT_SIZE0x400
 
-#define IOPORT_VIRTIO_P9   0xb200  /* Virtio 9P device */
-#define IOPORT_VIRTIO_P9_SIZE  256
 #define IOPORT_VIRTIO_CONSOLE  0xd200  /* Virtio console device */
 #define IOPORT_VIRTIO_CONSOLE_SIZE 256
 #define IOPORT_VIRTIO_NET  0xe200  /* Virtio network device */
diff --git a/tools/kvm/virtio/9p.c b/tools/kvm/virtio/9p.c
index e307592..af21463 100644
--- a/tools/kvm/virtio/9p.c
+++ b/tools/kvm/virtio/9p.c
@@ -50,7 +50,6 @@ static struct pci_device_header virtio_p9_pci_device = {
.class  = 0x01,
.subsys_vendor_id   = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
.subsys_id  = VIRTIO_ID_9P,
-   .bar[0] = IOPORT_VIRTIO_P9 | PCI_BASE_ADDRESS_SPACE_IO,
 };
 
 struct p9_dev {
@@ -59,6 +58,7 @@ struct p9_dev {
u16 config_vector;
u32 features;
struct virtio_9p_config *config;
+   u16 base_addr;
 
/* virtio queue */
u16 queue_selector;
@@ -96,7 +96,7 @@ static bool virtio_p9_pci_io_in(struct kvm *kvm, u16 port, 
void *data, int size,
unsigned long offset;
bool ret = true;
 
-   offset = port - IOPORT_VIRTIO_P9;
+   offset = port - p9dev.base_addr;
 
switch (offset) {
case VIRTIO_PCI_HOST_FEATURES:
@@ -584,7 +584,7 @@ static bool virtio_p9_pci_io_out(struct kvm *kvm, u16 port, 
void *data, int size
unsigned long offset;
bool ret = true;
 
-   offset  = port - IOPORT_VIRTIO_P9;
+   offset = port - p9dev.base_addr;
 
switch (offset) {
case VIRTIO_MSI_QUEUE_VECTOR:
@@ -636,6 +636,7 @@ void virtio_9p__init(struct kvm *kvm, const char *root)
 {
u8 pin, line, dev;
u32 i, root_len;
+   u16 p9_base_addr;
 
p9dev.config = calloc(1, sizeof(*p9dev.config) + sizeof(VIRTIO_P9_TAG));
if (p9dev.config == NULL)
@@ -662,7 +663,8 @@ void virtio_9p__init(struct kvm *kvm, const char *root)
 
virtio_p9_pci_device.irq_pin= pin;
virtio_p9_pci_device.irq_line   = line;
+   p9_base_addr= ioport__register(IOPORT_EMPTY, 
virtio_p9_io_ops, IOPORT_SIZE);
+   virtio_p9_pci_device.bar[0] = p9_base_addr | 
PCI_BASE_ADDRESS_SPACE_IO;
+   p9dev.base_addr = p9_base_addr;
pci__register(virtio_p9_pci_device, dev);
-
-   ioport__register(IOPORT_VIRTIO_P9, virtio_p9_io_ops, 
IOPORT_VIRTIO_P9_SIZE);
 }
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 7/8] kvm tools: Use dynamic IO port allocation in virtio-console

2011-05-26 Thread Sasha Levin
Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |2 --
 tools/kvm/virtio/console.c |   11 +++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index a6bcc6a..0c68e8c 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -10,8 +10,6 @@
 #define IOPORT_START   0x6200
 #define IOPORT_SIZE0x400
 
-#define IOPORT_VIRTIO_CONSOLE  0xd200  /* Virtio console device */
-#define IOPORT_VIRTIO_CONSOLE_SIZE 256
 #define IOPORT_VIRTIO_NET  0xe200  /* Virtio network device */
 #define IOPORT_VIRTIO_NET_SIZE 256
 
diff --git a/tools/kvm/virtio/console.c b/tools/kvm/virtio/console.c
index a0b..a954f22 100644
--- a/tools/kvm/virtio/console.c
+++ b/tools/kvm/virtio/console.c
@@ -36,7 +36,6 @@ static struct pci_device_header virtio_console_pci_device = {
.class  = 0x078000,
.subsys_vendor_id   = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
.subsys_id  = VIRTIO_ID_CONSOLE,
-   .bar[0] = IOPORT_VIRTIO_CONSOLE | 
PCI_BASE_ADDRESS_SPACE_IO,
 };
 
 struct con_dev {
@@ -50,6 +49,7 @@ struct con_dev {
u8  status;
u8  isr;
u16 queue_selector;
+   u16 base_addr;
 
void*jobs[VIRTIO_CONSOLE_NUM_QUEUES];
 };
@@ -113,7 +113,7 @@ static bool virtio_console_pci_io_device_specific_in(void 
*data, unsigned long o
 
 static bool virtio_console_pci_io_in(struct kvm *kvm, u16 port, void *data, 
int size, u32 count)
 {
-   unsigned long offset = port - IOPORT_VIRTIO_CONSOLE;
+   unsigned long offset = port - cdev.base_addr;
bool ret = true;
 
mutex_lock(cdev.mutex);
@@ -181,7 +181,7 @@ static void virtio_console_handle_callback(struct kvm *kvm, 
void *param)
 
 static bool virtio_console_pci_io_out(struct kvm *kvm, u16 port, void *data, 
int size, u32 count)
 {
-   unsigned long offset = port - IOPORT_VIRTIO_CONSOLE;
+   unsigned long offset = port - cdev.base_addr;
bool ret = true;
 
mutex_lock(cdev.mutex);
@@ -243,12 +243,15 @@ static struct ioport_operations virtio_console_io_ops = {
 void virtio_console__init(struct kvm *kvm)
 {
u8 dev, line, pin;
+   u16 console_base_addr;
 
if (irq__register_device(VIRTIO_ID_CONSOLE, dev, pin, line)  0)
return;
 
virtio_console_pci_device.irq_pin   = pin;
virtio_console_pci_device.irq_line  = line;
+   console_base_addr   = 
ioport__register(IOPORT_EMPTY, virtio_console_io_ops, IOPORT_SIZE);
+   virtio_console_pci_device.bar[0]= console_base_addr | 
PCI_BASE_ADDRESS_SPACE_IO;
+   cdev.base_addr  = console_base_addr;
pci__register(virtio_console_pci_device, dev);
-   ioport__register(IOPORT_VIRTIO_CONSOLE, virtio_console_io_ops, 
IOPORT_VIRTIO_CONSOLE_SIZE);
 }
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 8/8] kvm tools: Use dynamic IO port allocation in virtio-net

2011-05-26 Thread Sasha Levin
Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |3 ---
 tools/kvm/virtio/net.c |   12 
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 0c68e8c..396928b 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -10,9 +10,6 @@
 #define IOPORT_START   0x6200
 #define IOPORT_SIZE0x400
 
-#define IOPORT_VIRTIO_NET  0xe200  /* Virtio network device */
-#define IOPORT_VIRTIO_NET_SIZE 256
-
 #define IOPORT_EMPTY   USHRT_MAX
 
 struct kvm;
diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
index 649bc0f..7e4400a 100644
--- a/tools/kvm/virtio/net.c
+++ b/tools/kvm/virtio/net.c
@@ -37,7 +37,6 @@ static struct pci_device_header pci_header = {
.class  = 0x02,
.subsys_vendor_id   = 
PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
.subsys_id  = VIRTIO_ID_NET,
-   .bar[0] = IOPORT_VIRTIO_NET | 
PCI_BASE_ADDRESS_SPACE_IO,
 };
 
 struct net_device {
@@ -51,6 +50,7 @@ struct net_device {
u8  status;
u8  isr;
u16 queue_selector;
+   u16 base_addr;
 
pthread_t   io_rx_thread;
pthread_mutex_t io_rx_lock;
@@ -166,7 +166,7 @@ static bool virtio_net_pci_io_device_specific_in(void 
*data, unsigned long offse
 
 static bool virtio_net_pci_io_in(struct kvm *kvm, u16 port, void *data, int 
size, u32 count)
 {
-   unsigned long   offset  = port - IOPORT_VIRTIO_NET;
+   unsigned long   offset  = port - ndev.base_addr;
boolret = true;
 
mutex_lock(ndev.mutex);
@@ -230,7 +230,7 @@ static void virtio_net_handle_callback(struct kvm *kvm, u16 
queue_index)
 
 static bool virtio_net_pci_io_out(struct kvm *kvm, u16 port, void *data, int 
size, u32 count)
 {
-   unsigned long   offset  = port - IOPORT_VIRTIO_NET;
+   unsigned long   offset  = port - ndev.base_addr;
boolret = true;
 
mutex_lock(ndev.mutex);
@@ -387,14 +387,18 @@ void virtio_net__init(const struct virtio_net_parameters 
*params)
 {
if (virtio_net__tap_init(params)) {
u8 dev, line, pin;
+   u16 net_base_addr;
 
if (irq__register_device(VIRTIO_ID_NET, dev, pin, line)  0)
return;
 
pci_header.irq_pin  = pin;
pci_header.irq_line = line;
+   net_base_addr   = ioport__register(IOPORT_EMPTY, 
virtio_net_io_ops, IOPORT_SIZE);
+   pci_header.bar[0]   = net_base_addr | 
PCI_BASE_ADDRESS_SPACE_IO;
+   ndev.base_addr  = net_base_addr;
+
pci__register(pci_header, dev);
-   ioport__register(IOPORT_VIRTIO_NET, virtio_net_io_ops, 
IOPORT_VIRTIO_NET_SIZE);
 
virtio_net__io_thread_init(params-kvm);
}
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 4/8] kvm tools: Add support for multiple virtio-rng devices

2011-05-26 Thread Sasha Levin
Since multiple hardware rng devices of the same type are currently
unsupported by the kernel, this serves more as an example of a basic
virtio driver under kvm tools and can be used to debug the PCI layer.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h|2 -
 tools/kvm/include/kvm/parse-options.h |9 +++
 tools/kvm/include/kvm/virtio-rng.h|1 +
 tools/kvm/kvm-run.c   |8 ++-
 tools/kvm/virtio/rng.c|  126 ++---
 5 files changed, 100 insertions(+), 46 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 47f9fb5..ffa6893 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -18,8 +18,6 @@
 #define IOPORT_VIRTIO_CONSOLE_SIZE 256
 #define IOPORT_VIRTIO_NET  0xe200  /* Virtio network device */
 #define IOPORT_VIRTIO_NET_SIZE 256
-#define IOPORT_VIRTIO_RNG  0xf200  /* Virtio network device */
-#define IOPORT_VIRTIO_RNG_SIZE 256
 
 #define IOPORT_EMPTY   USHRT_MAX
 
diff --git a/tools/kvm/include/kvm/parse-options.h 
b/tools/kvm/include/kvm/parse-options.h
index 2d5c99e..6bf9a1d 100644
--- a/tools/kvm/include/kvm/parse-options.h
+++ b/tools/kvm/include/kvm/parse-options.h
@@ -132,6 +132,15 @@ intptr_t defval;
.help = (h) \
 }
 
+#define OPT_INCR(s, l, v, h)\
+{   \
+   .type = OPTION_INCR,\
+   .short_name = (s),  \
+   .long_name = (l),   \
+   .value = check_vtype(v, int *), \
+   .help = (h) \
+}
+
 #define OPT_GROUP(h)\
 {   \
.type = OPTION_GROUP,   \
diff --git a/tools/kvm/include/kvm/virtio-rng.h 
b/tools/kvm/include/kvm/virtio-rng.h
index 7015c1f..c0a413b 100644
--- a/tools/kvm/include/kvm/virtio-rng.h
+++ b/tools/kvm/include/kvm/virtio-rng.h
@@ -4,5 +4,6 @@
 struct kvm;
 
 void virtio_rng__init(struct kvm *kvm);
+void virtio_rng__delete_all(struct kvm *kvm);
 
 #endif /* KVM__RNG_VIRTIO_H */
diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c
index adbb25b..76b5782 100644
--- a/tools/kvm/kvm-run.c
+++ b/tools/kvm/kvm-run.c
@@ -52,6 +52,7 @@ static __thread struct kvm_cpu *current_kvm_cpu;
 
 static u64 ram_size;
 static u8  image_count;
+static int virtio_rng;
 static const char *kernel_cmdline;
 static const char *kernel_filename;
 static const char *vmlinux_filename;
@@ -66,7 +67,6 @@ static const char *script;
 static const char *virtio_9p_dir;
 static bool single_step;
 static bool readonly_image[MAX_DISK_IMAGES];
-static bool virtio_rng;
 static bool vnc;
 extern bool ioport_debug;
 extern int  active_console;
@@ -107,7 +107,7 @@ static const struct option options[] = {
OPT_CALLBACK('d', disk, NULL, image, Disk image, img_name_parser),
OPT_STRING('\0', console, console, serial or virtio,
Console to use),
-   OPT_BOOLEAN('\0', rng, virtio_rng,
+   OPT_INCR('\0', rng, virtio_rng,
Enable virtio Random Number Generator),
OPT_STRING('\0', kvm-dev, kvm_dev, kvm-dev, KVM device file),
OPT_STRING('\0', virtio-9p, virtio_9p_dir, root dir,
@@ -570,7 +570,8 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
virtio_console__init(kvm);
 
if (virtio_rng)
-   virtio_rng__init(kvm);
+   while (virtio_rng--)
+   virtio_rng__init(kvm);
 
if (!network)
network = DEFAULT_NETWORK;
@@ -631,6 +632,7 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
}
 
virtio_blk__delete_all(kvm);
+   virtio_rng__delete_all(kvm);
 
disk_image__close_all(kvm-disks, image_count);
kvm__delete(kvm);
diff --git a/tools/kvm/virtio/rng.c b/tools/kvm/virtio/rng.c
index 9bd0098..f71a59b 100644
--- a/tools/kvm/virtio/rng.c
+++ b/tools/kvm/virtio/rng.c
@@ -5,7 +5,6 @@
 #include kvm/disk-image.h
 #include kvm/virtio.h
 #include kvm/ioport.h
-#include kvm/mutex.h
 #include kvm/util.h
 #include kvm/kvm.h
 #include kvm/pci.h
@@ -15,6 +14,7 @@
 #include linux/virtio_ring.h
 #include linux/virtio_rng.h
 
+#include linux/list.h
 #include fcntl.h
 #include sys/types.h
 #include sys/stat.h
@@ -23,18 +23,17 @@
 #define NUM_VIRT_QUEUES1
 #define VIRTIO_RNG_QUEUE_SIZE  128
 
-static struct pci_device_header virtio_rng_pci_device = {
-   .vendor_id  = PCI_VENDOR_ID_REDHAT_QUMRANET,
-   .device_id  = PCI_DEVICE_ID_VIRTIO_RNG,
-   .header_type= PCI_HEADER_TYPE_NORMAL,
-   .revision_id= 0,
-   .class  = 0x01,
-   .subsys_vendor_id   = 

Re: Restoring saved guest causes guest to reboot

2011-05-26 Thread Avi Kivity

On 05/25/2011 09:49 AM, Markus Schade wrote:


Git bisect tells me that this is the first bad commit:
-%-
aff48baa34c033318ad322ecbf2e4bcd891b29ca is the first bad commit
commit aff48baa34c033318ad322ecbf2e4bcd891b29ca
Author: Avi Kivity a...@redhat.com
Date:   Sun Dec 5 18:56:11 2010 +0200

KVM: Fetch guest cr3 from hardware on demand

Instead of syncing the guest cr3 every exit, which is expensince 
on vmx

with ept enabled, sync it only on demand.

[sheng: fix incorrect cr3 seen by Windows XP]

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Avi Kivity a...@redhat.com



Does your machine have ept?

(cat /sys/module/kvm_intel/parameters/ept)

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 09:31:50AM +0300, Avi Kivity wrote:
 On 05/25/2011 09:18 PM, Marcelo Tosatti wrote:
 Commit fa4491a6b667304 moved the permission check for io instructions
 to the -check_perm callback. It failed to copy the port value from RDX
 register for string and in,out ax,dx instructions.
 
 Fix it by reading RDX register at decode stage when appropriate.
 
 Fixes FC8.32 installation.
 
 +#define Sse (118) /* SSE Vector instruction */
 
 19/20 are still available, no need to go 64-bit just yet.
 
   /* Misc flags */
 -#define Prot(121) /* instruction generates #UD if not in 
 prot-mode */
 
 +case SrcDX:
 +c-src.type = OP_REG;
 +c-src.bytes = c-op_bytes;
 
 Needs to be 2.  Otherwise we'll see extra bits from edx, or lose
 bits from dx if it's a 1-byte instruction.
 
But those extra bits will be dropped by check_perm_in() anyway. Can
c-op_bytes ever be 1?

 +c-src.addr.reg =c-regs[VCPU_REGS_RDX];
 +fetch_register_operand(c-src);
 +break;
  }
 
  if (rc != X86EMUL_CONTINUE)
 @@ -3649,6 +3657,12 @@ done_prefixes:
  c-dst.addr.mem.seg = VCPU_SREG_ES;
  c-dst.val = 0;
  break;
 +case DstDX:
 +c-dst.type = OP_REG;
 +c-dst.bytes = c-op_bytes;
 
 2 again.
 
 +c-dst.addr.reg =c-regs[VCPU_REGS_RDX];
 +fetch_register_operand(c-dst);
 +break;
  case ImplicitOps:
  /* Special instructions do their own operand decoding. */
  default:
 
 We also need to unify Src/Dst decode eventually.
 
 -- 
 I have a truly marvellous patch that fixes the bug which this
 signature is too narrow to contain.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Avi Kivity

On 05/26/2011 09:55 AM, Gleb Natapov wrote:

  
  + case SrcDX:
  + c-src.type = OP_REG;
  + c-src.bytes = c-op_bytes;

  Needs to be 2.  Otherwise we'll see extra bits from edx, or lose
  bits from dx if it's a 1-byte instruction.

But those extra bits will be dropped by check_perm_in() anyway.


It isn't nice to depend on it.

btw, Marcelo, the patch should also make use of the decode during execution:

case 0xef: /* out dx,(e/r)ax */
c-dst.val = c-regs[VCPU_REGS_RDX];

^^ can drop


do_io_out:
ops-pio_out_emulated(ctxt, c-src.bytes, c-dst.val,
c-src.val, 1);
c-dst.type = OP_NONE;/* Disable writeback. */
break;


Can
c-op_bytes ever be 1?


in %dx, %al

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Avi Kivity

On 05/26/2011 10:02 AM, Avi Kivity wrote:



Can
c-op_bytes ever be 1?


in %dx, %al



er, that doesn't change op_bytes.  Still, op_bytes is irrelevant for 
SrcDX, the 16-bit version is always used.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 10:04:24AM +0300, Avi Kivity wrote:
 On 05/26/2011 10:02 AM, Avi Kivity wrote:
 
 Can
 c-op_bytes ever be 1?
 
 in %dx, %al
 
 
 er, that doesn't change op_bytes.
Yep.

Still, op_bytes is irrelevant for
 SrcDX, the 16-bit version is always used.
 
If SrcDX/DstDX will be used only for decoding in/out instruction
then yes. Otherwise it is nice to have more general decoder.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Paolo Bonzini

On 05/26/2011 09:07 AM, Gleb Natapov wrote:

 Still, op_bytes is irrelevant for
 SrcDX, the 16-bit version is always used.

If SrcDX/DstDX will be used only for decoding in/out instruction
then yes. Otherwise it is nice to have more general decoder.


Not counting instructions that read/write many registers (rdmsr/wrmsr, 
mul/imul/div/idiv, rdtsc, etc.), I think the only other instruction with 
an implicit DstDX is cwd/cdq/cqo.  Since cwd/cdq/cqo needs c-dst.bytes 
= c-src.bytes (not op_bytes) I think DstDX is not really reusable 
beyond port instructions.


Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 4/4] rbd: Add bdrv_truncate implementation

2011-05-26 Thread Kevin Wolf
Am 25.05.2011 22:34, schrieb Josh Durgin:
 Signed-off-by: Josh Durgin josh.dur...@dreamhost.com
 ---
  block/rbd.c |   15 +++
  1 files changed, 15 insertions(+), 0 deletions(-)
 
 diff --git a/block/rbd.c b/block/rbd.c
 index a44d160..b95b1eb 100644
 --- a/block/rbd.c
 +++ b/block/rbd.c
 @@ -688,6 +688,20 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
  return info.size;
  }
  
 +static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset)
 +{
 +BDRVRBDState *s = bs-opaque;
 +int r;
 +
 +r = rbd_resize(s-image, offset);
 +if (r  0) {
 +error_report(failed to resize rbd image);
 +return -EIO;
 +}

Don't print an error message here. The caller will do it, too, so we end
up with two error messages saying the same.

What kind of error code does rbd_resize return? If it is a valid errno
value, you should return r instead of turning it into EIO.

Kevin
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 2/4] rbd: allow configuration of rados from the rbd filename

2011-05-26 Thread Kevin Wolf
Am 25.05.2011 22:34, schrieb Josh Durgin:
 The new format is 
 rbd:pool/image[@snapshot][:option1=value1[:option2=value2...]]
 Each option is used to configure rados, and may be any Ceph option, or conf.
 The conf option specifies a Ceph configuration file to read.
 
 This allows rbd volumes from more than one Ceph cluster to be used by
 specifying different monitor addresses, as well as having different
 logging levels or locations for different volumes.
 
 Signed-off-by: Josh Durgin josh.dur...@dreamhost.com
 ---
  block/rbd.c |  119 ++
  1 files changed, 102 insertions(+), 17 deletions(-)
 
 diff --git a/block/rbd.c b/block/rbd.c
 index 2cee70d..d346a21 100644
 --- a/block/rbd.c
 +++ b/block/rbd.c
 @@ -23,13 +23,17 @@
  /*
   * When specifying the image filename use:
   *
 - * rbd:poolname/devicename
 + * 
 rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]]
   *
   * poolname must be the name of an existing rados pool
   *
   * devicename is the basename for all objects used to
   * emulate the raw device.
   *
 + * Each option given is used to configure rados, and may be
 + * any Ceph option, or conf. The conf option specifies
 + * a Ceph configuration file to read.
 + *
   * Metadata information (image size, ...) is stored in an
   * object with the name devicename.rbd.
   *
 @@ -123,7 +127,8 @@ static int qemu_rbd_next_tok(char *dst, int dst_len,
  static int qemu_rbd_parsename(const char *filename,
char *pool, int pool_len,
char *snap, int snap_len,
 -  char *name, int name_len)
 +  char *name, int name_len,
 +  char *conf, int conf_len)
  {
  const char *start;
  char *p, *buf;
 @@ -135,28 +140,84 @@ static int qemu_rbd_parsename(const char *filename,
  
  buf = qemu_strdup(start);
  p = buf;
 +*snap = '\0';
 +*conf = '\0';
  
  ret = qemu_rbd_next_tok(pool, pool_len, p, '/', pool name, p);
  if (ret  0 || !p) {
  ret = -EINVAL;
  goto done;
  }
 -ret = qemu_rbd_next_tok(name, name_len, p, '@', object name, p);
 -if (ret  0) {
 -goto done;
 +
 +if (strchr(p, '@')) {
 +ret = qemu_rbd_next_tok(name, name_len, p, '@', object name, p);
 +if (ret  0) {
 +goto done;
 +}
 +ret = qemu_rbd_next_tok(snap, snap_len, p, ':', snap name, p);
 +} else {
 +ret = qemu_rbd_next_tok(name, name_len, p, ':', object name, p);
  }
 -if (!p) {
 -*snap = '\0';
 +if (ret  0 || !p) {
  goto done;
  }
  
 -ret = qemu_rbd_next_tok(snap, snap_len, p, '\0', snap name, p);
 +ret = qemu_rbd_next_tok(conf, conf_len, p, '\0', configuration, p);
  
  done:
  qemu_free(buf);
  return ret;
  }
  
 +static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
 +{
 +char *p, *buf;
 +char name[RBD_MAX_CONF_NAME_SIZE];
 +char value[RBD_MAX_CONF_VAL_SIZE];
 +int ret = 0;
 +
 +buf = qemu_strdup(conf);
 +p = buf;
 +
 +while (p) {
 +ret = qemu_rbd_next_tok(name, sizeof(name), p,
 +'=', conf option name, p);
 +if (ret  0) {
 +break;
 +}
 +
 +if (!p) {
 +error_report(conf option %s has no value, name);
 +ret = -EINVAL;
 +break;
 +}
 +
 +ret = qemu_rbd_next_tok(value, sizeof(value), p,
 +':', conf option value, p);
 +if (ret  0) {
 +break;
 +}
 +
 +if (strncmp(name, conf, strlen(conf))) {

Do you really only want to check if name _starts_ with conf?

Kevin
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] KVM: x86 emulator: Avoid clearing the whole decode_cache

2011-05-26 Thread Avi Kivity

On 05/25/2011 05:09 AM, Takuya Yoshikawa wrote:

From: Takuya Yoshikawayoshikawa.tak...@oss.ntt.co.jp

During tracing the emulator, we noticed that init_emulate_ctxt()
sometimes took a bit longer time than we expected.

This patch is for mitigating the problem by some degree.

By looking into the function, we soon notice that it clears the whole
decode_cache whose size is about 2.5K bytes now.  Furthermore, most of
the bytes are taken for the two read_cache arrays, which are used only
by a few instructions.

Considering the fact that we are not assuming the cache arrays have
been cleared when we store actual data, we do not need to clear the
arrays: 2K bytes elimination.  In addition, we can avoid clearing the
fetch_cache and regs arrays.

This patch changes the initialization not to clear the arrays.

On our 64-bit host, init_emulate_ctxt() becomes 0.3 to 0.5us faster with
this patch applied.



Thanks, applied.

It strikes me that initializing the emulator in x86.c is the wrong 
thing.  We should move the entire thing to x86_decode_insn().


We'll need a few more callbacks for that, though (register, eflags); 
eventually we can read just registers that are used and write only 
registers that were updated.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] KVM: x86 emulator: Avoid clearing the whole decode_cache

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 11:19:03AM +0300, Avi Kivity wrote:
 On 05/25/2011 05:09 AM, Takuya Yoshikawa wrote:
 From: Takuya Yoshikawayoshikawa.tak...@oss.ntt.co.jp
 
 During tracing the emulator, we noticed that init_emulate_ctxt()
 sometimes took a bit longer time than we expected.
 
 This patch is for mitigating the problem by some degree.
 
 By looking into the function, we soon notice that it clears the whole
 decode_cache whose size is about 2.5K bytes now.  Furthermore, most of
 the bytes are taken for the two read_cache arrays, which are used only
 by a few instructions.
 
 Considering the fact that we are not assuming the cache arrays have
 been cleared when we store actual data, we do not need to clear the
 arrays: 2K bytes elimination.  In addition, we can avoid clearing the
 fetch_cache and regs arrays.
 
 This patch changes the initialization not to clear the arrays.
 
 On our 64-bit host, init_emulate_ctxt() becomes 0.3 to 0.5us faster with
 this patch applied.
 
 
 Thanks, applied.
 
 It strikes me that initializing the emulator in x86.c is the wrong
 thing.  We should move the entire thing to x86_decode_insn().
 
We initialize it in task switch and interrupt injection code too.

 We'll need a few more callbacks for that, though (register, eflags);
 eventually we can read just registers that are used and write only
 registers that were updated.
 

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 09:49:21AM +0200, Paolo Bonzini wrote:
 On 05/26/2011 09:07 AM, Gleb Natapov wrote:
  Still, op_bytes is irrelevant for
  SrcDX, the 16-bit version is always used.
 
 If SrcDX/DstDX will be used only for decoding in/out instruction
 then yes. Otherwise it is nice to have more general decoder.
 
 Not counting instructions that read/write many registers
 (rdmsr/wrmsr, mul/imul/div/idiv, rdtsc, etc.), I think the only
 other instruction with an implicit DstDX is cwd/cdq/cqo.  Since
 cwd/cdq/cqo needs c-dst.bytes = c-src.bytes (not op_bytes) I think
 DstDX is not really reusable beyond port instructions.
 
Why would c-dst.bytes != c-src.bytes for cwd/cdq/cqo if we'll set
c-dst.bytes to op_bytes during decode?

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/10] qemu-kvm: Cleanup and switch to upstream - Part III

2011-05-26 Thread Avi Kivity

On 05/20/2011 08:12 PM, Jan Kiszka wrote:

This is a rather short round as the next and final one cannot be split
up very well.

We start with three code cleanup patches, then work towards using
upstream kvm_cpu_exec, and finally rework the core's PIO access
management used for device assignment.

Please review/merge.



Thanks, applied.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V5 2/6 net-next] netdevice.h: Add zero-copy flag in netdevice

2011-05-26 Thread Michael S. Tsirkin
On Wed, May 25, 2011 at 03:49:40PM -0700, Shirley Ma wrote:
 On Fri, 2011-05-20 at 02:41 +0300, Michael S. Tsirkin wrote:
  So the requirements are
  - data must be released in a timely fashion (e.g. unlike virtio-net
tun or bridge)
 The current patch doesn't enable tun zero-copy. tun will copy data It's
 not an issue now.
 We can disallow macvtap attach to bridge when
 zero-copy is enabled.

Attach macvtap to a tun device though. Or e.g. veth device ...
So there should be so generic way to disable zerocopy.
It can either be a whitelist or a blacklist.

 
  - SG support
  - HIGHDMA support (on arches where this makes sense)
 
 This can be checked by device flags.

OK, but pls note that SG can get turned off dynamically.

  - no filtering based on data (data is mapped in guest)
 
  - on fast path no calls to skb_copy, skb_clone, pskb_copy,
pskb_expand_head as these are slow
 
 Any calls to skb_copy, skb_clone, pskb_copy, pskb_expand_head will do a
 copy. The performance should be the same as none zero-copy case before.

I'm guessing a copy is cheaper than get_user_pages+copy+put_page.
But maybe not by much. Care checking that?

 I have done/tested the patch V6, will send it out for review tomorrow.
 
 I am looking at where there are some cases, skb remains the same for
 filtering.

To reliably filter on data I think we'll need to copy it first, otherwise
guest can change it. Most filters only look at the header though.

  First 2 requirements are a must, all other requirements
  are just dependencies to make sure zero copy will be faster
  than non zero copy.
  Using a new feature bit is probably the simplest approach to
  this. macvtap on top of most physical NICs most likely works
  correctly so it seems a bit more work than it needs to be,
  but it's also the safest one I think ... 
 
 For macvtap/vhost zero-copy we can use SG  HIGHDMA to enable it, it
 looks safe to me once patching skb_copy, skb_clone, pskb_copy,
 pskb_expand_head.
 
 To extend zero-copy in other usages, we can have a new feature bit
 later.
 
 Is that reasonable?
 
 Thanks
 Shirley

Is the problem is extra work needed to extend feature bits?

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks

2011-05-26 Thread Pekka Enberg
On Thu, 2011-05-26 at 09:42 +0300, Sasha Levin wrote:
 Allow specifying an optional parameter when registering an
 ioport range. The callback functions provided by the registering
 module will be called with the same parameter.
 
 This may be used to keep context during callbacks on IO operations.
 
 Signed-off-by: Sasha Levin levinsasha...@gmail.com
 ---
  tools/kvm/include/kvm/ioport.h |3 ++
  tools/kvm/ioport.c |   54 +--
  2 files changed, 43 insertions(+), 14 deletions(-)
 
 diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
 index 8253938..2a8d74d 100644
 --- a/tools/kvm/include/kvm/ioport.h
 +++ b/tools/kvm/include/kvm/ioport.h
 @@ -25,11 +25,14 @@ struct kvm;
  struct ioport_operations {
   bool (*io_in)(struct kvm *kvm, u16 port, void *data, int size, u32 
 count);
   bool (*io_out)(struct kvm *kvm, u16 port, void *data, int size, u32 
 count);
 + bool (*io_in_param)(struct kvm *kvm, u16 port, void *data, int size, 
 u32 count, void *param);
 + bool (*io_out_param)(struct kvm *kvm, u16 port, void *data, int size, 
 u32 count, void *param);

So why not make that 'param' unconditional for io_in and io_out and just
pass NULL if it's not needed?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks

2011-05-26 Thread Sasha Levin
On Thu, 2011-05-26 at 11:53 +0300, Pekka Enberg wrote:
 On Thu, 2011-05-26 at 09:42 +0300, Sasha Levin wrote:
  Allow specifying an optional parameter when registering an
  ioport range. The callback functions provided by the registering
  module will be called with the same parameter.
  
  This may be used to keep context during callbacks on IO operations.
  
  Signed-off-by: Sasha Levin levinsasha...@gmail.com
  ---
   tools/kvm/include/kvm/ioport.h |3 ++
   tools/kvm/ioport.c |   54 
  +--
   2 files changed, 43 insertions(+), 14 deletions(-)
  
  diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
  index 8253938..2a8d74d 100644
  --- a/tools/kvm/include/kvm/ioport.h
  +++ b/tools/kvm/include/kvm/ioport.h
  @@ -25,11 +25,14 @@ struct kvm;
   struct ioport_operations {
  bool (*io_in)(struct kvm *kvm, u16 port, void *data, int size, u32 
  count);
  bool (*io_out)(struct kvm *kvm, u16 port, void *data, int size, u32 
  count);
  +   bool (*io_in_param)(struct kvm *kvm, u16 port, void *data, int size, 
  u32 count, void *param);
  +   bool (*io_out_param)(struct kvm *kvm, u16 port, void *data, int size, 
  u32 count, void *param);
 
 So why not make that 'param' unconditional for io_in and io_out and just
 pass NULL if it's not needed?
 

I've wanted to keep the original interface clean, Most of the IO port
users don't (and probably won't) require a parameter.

-- 

Sasha.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks

2011-05-26 Thread Pekka Enberg
On Thu, May 26, 2011 at 12:02 PM, Sasha Levin levinsasha...@gmail.com wrote:
 On Thu, 2011-05-26 at 11:53 +0300, Pekka Enberg wrote:
 On Thu, 2011-05-26 at 09:42 +0300, Sasha Levin wrote:
  Allow specifying an optional parameter when registering an
  ioport range. The callback functions provided by the registering
  module will be called with the same parameter.
 
  This may be used to keep context during callbacks on IO operations.
 
  Signed-off-by: Sasha Levin levinsasha...@gmail.com
  ---
   tools/kvm/include/kvm/ioport.h |    3 ++
   tools/kvm/ioport.c             |   54 
  +--
   2 files changed, 43 insertions(+), 14 deletions(-)
 
  diff --git a/tools/kvm/include/kvm/ioport.h 
  b/tools/kvm/include/kvm/ioport.h
  index 8253938..2a8d74d 100644
  --- a/tools/kvm/include/kvm/ioport.h
  +++ b/tools/kvm/include/kvm/ioport.h
  @@ -25,11 +25,14 @@ struct kvm;
   struct ioport_operations {
      bool (*io_in)(struct kvm *kvm, u16 port, void *data, int size, u32 
  count);
      bool (*io_out)(struct kvm *kvm, u16 port, void *data, int size, u32 
  count);
  +   bool (*io_in_param)(struct kvm *kvm, u16 port, void *data, int size, 
  u32 count, void *param);
  +   bool (*io_out_param)(struct kvm *kvm, u16 port, void *data, int size, 
  u32 count, void *param);

 So why not make that 'param' unconditional for io_in and io_out and just
 pass NULL if it's not needed?


 I've wanted to keep the original interface clean, Most of the IO port
 users don't (and probably won't) require a parameter.

Well now struct ioport_operations isn't very clean is it - or the code
that needs to determine which function pointer to call?-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Paolo Bonzini

On 05/26/2011 10:26 AM, Gleb Natapov wrote:

Why would c-dst.bytes != c-src.bytes for cwd/cdq/cqo if we'll set
c-dst.bytes to op_bytes during decode?


Duh, you're right, cwd/cdq/cqo uses SrcAcc which has

c-src.bytes = (c-d  ByteOp) ? 1 : c-op_bytes;

so in practice c-src.bytes = c-op_bytes.

I still find it confusing that DstDX would use c-op_bytes without 
honoring ByteOp unlike pretty much everything else; but yes, there is a 
possible use of DstDX outside in/out.


Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks

2011-05-26 Thread Sasha Levin
On Thu, 2011-05-26 at 12:04 +0300, Pekka Enberg wrote:
 On Thu, May 26, 2011 at 12:02 PM, Sasha Levin levinsasha...@gmail.com wrote:
  On Thu, 2011-05-26 at 11:53 +0300, Pekka Enberg wrote:
  On Thu, 2011-05-26 at 09:42 +0300, Sasha Levin wrote:
   Allow specifying an optional parameter when registering an
   ioport range. The callback functions provided by the registering
   module will be called with the same parameter.
  
   This may be used to keep context during callbacks on IO operations.
  
   Signed-off-by: Sasha Levin levinsasha...@gmail.com
   ---
tools/kvm/include/kvm/ioport.h |3 ++
tools/kvm/ioport.c |   54 
   +--
2 files changed, 43 insertions(+), 14 deletions(-)
  
   diff --git a/tools/kvm/include/kvm/ioport.h 
   b/tools/kvm/include/kvm/ioport.h
   index 8253938..2a8d74d 100644
   --- a/tools/kvm/include/kvm/ioport.h
   +++ b/tools/kvm/include/kvm/ioport.h
   @@ -25,11 +25,14 @@ struct kvm;
struct ioport_operations {
   bool (*io_in)(struct kvm *kvm, u16 port, void *data, int size, u32 
   count);
   bool (*io_out)(struct kvm *kvm, u16 port, void *data, int size, u32 
   count);
   +   bool (*io_in_param)(struct kvm *kvm, u16 port, void *data, int size, 
   u32 count, void *param);
   +   bool (*io_out_param)(struct kvm *kvm, u16 port, void *data, int 
   size, u32 count, void *param);
 
  So why not make that 'param' unconditional for io_in and io_out and just
  pass NULL if it's not needed?
 
 
  I've wanted to keep the original interface clean, Most of the IO port
  users don't (and probably won't) require a parameter.
 
 Well now struct ioport_operations isn't very clean is it - or the code
 that needs to determine which function pointer to call?-)

struct ioport_operations is a bit more messy, but it's one spot instead
of adding a 'parameter' to each module that doesn't really need it.

My assumption is that most ioport users now and in the future won't need
it, it just solves several special cases more easily (multiple devices
which share same handling functions).

-- 

Sasha.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks

2011-05-26 Thread Pekka Enberg
On Thu, May 26, 2011 at 12:14 PM, Sasha Levin levinsasha...@gmail.com wrote:
  I've wanted to keep the original interface clean, Most of the IO port
  users don't (and probably won't) require a parameter.

 Well now struct ioport_operations isn't very clean is it - or the code
 that needs to determine which function pointer to call?-)

 struct ioport_operations is a bit more messy, but it's one spot instead
 of adding a 'parameter' to each module that doesn't really need it.

 My assumption is that most ioport users now and in the future won't need
 it, it just solves several special cases more easily (multiple devices
 which share same handling functions).

Hey, that's not an excuse to make struct ioport_operations 'bit
messy'! Look at any kernel code that uses ops like we do here and you
will see we don't do APIs like this.

One option here is to rename 'struct ioport_entry' to 'struct ioport'
and pass a pointer to that as the first argument to all of the ops.
That's what most APIs in the kernel do anyway.

Pekka
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Paolo Bonzini

On 05/26/2011 11:02 AM, Gleb Natapov wrote:

We can make it honor ByteOp. There will be no instruction that will
specify DstDX | ByteOp though.


in %dx, %al and out %al, %dx will via D2bv.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 11:23:44AM +0200, Paolo Bonzini wrote:
 On 05/26/2011 11:02 AM, Gleb Natapov wrote:
 We can make it honor ByteOp. There will be no instruction that will
 specify DstDX | ByteOp though.
 
 in %dx, %al and out %al, %dx will via D2bv.
 
Yeah. Should ignore ByteOp then.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks

2011-05-26 Thread Sasha Levin
On Thu, 2011-05-26 at 12:20 +0300, Pekka Enberg wrote:
 On Thu, May 26, 2011 at 12:14 PM, Sasha Levin levinsasha...@gmail.com wrote:
   I've wanted to keep the original interface clean, Most of the IO port
   users don't (and probably won't) require a parameter.
 
  Well now struct ioport_operations isn't very clean is it - or the code
  that needs to determine which function pointer to call?-)
 
  struct ioport_operations is a bit more messy, but it's one spot instead
  of adding a 'parameter' to each module that doesn't really need it.
 
  My assumption is that most ioport users now and in the future won't need
  it, it just solves several special cases more easily (multiple devices
  which share same handling functions).
 
 Hey, that's not an excuse to make struct ioport_operations 'bit
 messy'! Look at any kernel code that uses ops like we do here and you
 will see we don't do APIs like this.
 
 One option here is to rename 'struct ioport_entry' to 'struct ioport'
 and pass a pointer to that as the first argument to all of the ops.
 That's what most APIs in the kernel do anyway.

Why do it like that? this way users of the callback functions will need
to know the internal structure of struct ioport_entry.

-- 

Sasha.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks

2011-05-26 Thread Pekka Enberg
On Thu, May 26, 2011 at 12:38 PM, Sasha Levin levinsasha...@gmail.com wrote:
 One option here is to rename 'struct ioport_entry' to 'struct ioport'
 and pass a pointer to that as the first argument to all of the ops.
 That's what most APIs in the kernel do anyway.

 Why do it like that? this way users of the callback functions will need
 to know the internal structure of struct ioport_entry.

Look at 'struct inode' or similar data structure in the kernel. That's
how we do it. You can then also do s/params/priv/.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/8] kvm tools: Add optional parameter used in ioport callbacks

2011-05-26 Thread Pekka Enberg
Hi Sasha,

On Thu, May 26, 2011 at 12:38 PM, Sasha Levin levinsasha...@gmail.com wrote:
 One option here is to rename 'struct ioport_entry' to 'struct ioport'
 and pass a pointer to that as the first argument to all of the ops.
 That's what most APIs in the kernel do anyway.

 Why do it like that? this way users of the callback functions will need
 to know the internal structure of struct ioport_entry.

On Thu, May 26, 2011 at 12:43 PM, Pekka Enberg penb...@kernel.org wrote:
 Look at 'struct inode' or similar data structure in the kernel. That's
 how we do it. You can then also do s/params/priv/.

Btw, the whole notion of 'internal structure' for structs in C code is
a pretty broken concept. In most cases, you just end up passing
untyped fragments of the data to callers which makes following the
data flow in code difficult. Passing 'struct ioport' down to the code
makes the code more obvious and readable.

Encapsulation is important but emulating that with hiding structs in
.c files isn't helpful at all. Face it, there's no proper support for
that in C so you just need to rely on conventions to do it.

  Pekka
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 11:00:24AM +0200, Paolo Bonzini wrote:
 On 05/26/2011 10:26 AM, Gleb Natapov wrote:
 Why would c-dst.bytes != c-src.bytes for cwd/cdq/cqo if we'll set
 c-dst.bytes to op_bytes during decode?
 
 Duh, you're right, cwd/cdq/cqo uses SrcAcc which has
 
 c-src.bytes = (c-d  ByteOp) ? 1 : c-op_bytes;
 
 so in practice c-src.bytes = c-op_bytes.
 
 I still find it confusing that DstDX would use c-op_bytes without
 honoring ByteOp unlike pretty much everything else; but yes, there
 is a possible use of DstDX outside in/out.
 
We can make it honor ByteOp. There will be no instruction that will
specify DstDX | ByteOp though.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Nikola Ciprich
Hello,
after I tried updating our production host to 0.14.0 or 0.14.1, our windows 
terminal server stopped booting.
Here's BSOD screen:
http://nik.lbox.cz/public/wincrash.png
reverting to 0.13.5 fixes the problem.
I can't reproduce this on testing hardware though :(

exact guest version:
Microsoft Windows Server 2008 R2 Enterprise
6.1.7600 build 7600
x86_64, 4GB RAM, 1CPU

host is currently running 2.6.38.7 (but I had the problem also with 2.6.37)
it's 8core intel E5310 with 16GB RAM

since the hosts on which I'm experiencing this problem are production machines, 
my possibilities of testing are a bit limited (bisecting this will be quite 
problematic), but I'll do what I can to help fix this...
Could somebody please have a look on it?
thanks a lot in advance!
n.


-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-


pgpR7DT5kOvBM.pgp
Description: PGP signature


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 11:59:10AM +0200, Nikola Ciprich wrote:
 Hello,
 after I tried updating our production host to 0.14.0 or 0.14.1, our windows 
 terminal server stopped booting.
 Here's BSOD screen:
 http://nik.lbox.cz/public/wincrash.png
 reverting to 0.13.5 fixes the problem.
 I can't reproduce this on testing hardware though :(
 
 exact guest version:
 Microsoft Windows Server 2008 R2 Enterprise
 6.1.7600 build 7600
 x86_64, 4GB RAM, 1CPU
 
 host is currently running 2.6.38.7 (but I had the problem also with 2.6.37)
 it's 8core intel E5310 with 16GB RAM
 
What is you command line?

 since the hosts on which I'm experiencing this problem are production 
 machines, my possibilities of testing are a bit limited (bisecting this will 
 be quite problematic), but I'll do what I can to help fix this...
 Could somebody please have a look on it?
 thanks a lot in advance!
You can make a copy from your production image, install 0.14 version in
different place from 0.13 and experiment.


--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Nikola Ciprich
Hello Gleb!
 What is you command line?
currently it's:
/usr/bin/qemu-kvm -S -M pc-0.13 -enable-kvm -m 4096 -smp 
1,sockets=1,cores=1,threads=1 -name vmwts02 -uuid 
1e501300-dc48-11df-a690-00304834195b -nodefconfig -nodefaults -chardev 
socket,id=charmonitor,path=/var/lib/libvirt/qemu/vmwts02.monitor,server,nowait 
-mon chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot c 
-drive file=/dev/vgshared/vmwts02-1,if=none,id=drive-ide0-0-0,format=raw 
-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -drive 
if=none,media=cdrom,id=drive-ide0-1-0,readonly=on,format=raw -device 
ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 -netdev 
tap,fd=15,id=hostnet0 -device 
rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:61:01:00,bus=pci.0,addr=0x3 -usb 
-vnc 0.0.0.0:30802 -vga cirrus -device 
virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4
but this is with 0.13.5 (so it's running well). it's started using libvirt, I 
guess with 0.14.0 the command line should be very similar. (I can provide it 
later if needed).

 You can make a copy from your production image, install 0.14 version in
 different place from 0.13 and experiment.
yup, I think I've tried it also with exact copy and wasn't able to reproduce 
it, but I'll try it again and report soon.
n.


 
 
 --
   Gleb.
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-


pgpg199JWTMf1.pgp
Description: PGP signature


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 12:20:56PM +0200, Nikola Ciprich wrote:
  You can make a copy from your production image, install 0.14 version in
  different place from 0.13 and experiment.
 yup, I think I've tried it also with exact copy and wasn't able to reproduce 
 it, but I'll try it again and report soon.
Then check image file permission please.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/8] kvm tools: Add optional parameter used in ioport callbacks

2011-05-26 Thread Sasha Levin
Allow specifying an optional parameter when registering an
ioport range. The callback functions provided by the registering
module will be called with the same parameter.

This may be used to keep context during callbacks on IO operations.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/hw/rtc.c |   10 +++---
 tools/kvm/hw/serial.c  |6 ++--
 tools/kvm/hw/vesa.c|6 ++--
 tools/kvm/include/kvm/ioport.h |   14 ++--
 tools/kvm/ioport.c |   71 +--
 tools/kvm/pci.c|   12 +++---
 tools/kvm/virtio/9p.c  |6 ++--
 tools/kvm/virtio/blk.c |6 ++--
 tools/kvm/virtio/console.c |6 ++--
 tools/kvm/virtio/net.c |6 ++--
 tools/kvm/virtio/rng.c |6 ++--
 11 files changed, 74 insertions(+), 75 deletions(-)

diff --git a/tools/kvm/hw/rtc.c b/tools/kvm/hw/rtc.c
index 6735e82..146f660 100644
--- a/tools/kvm/hw/rtc.c
+++ b/tools/kvm/hw/rtc.c
@@ -19,7 +19,7 @@ static inline unsigned char bin2bcd(unsigned val)
return ((val / 10)  4) + val % 10;
 }
 
-static bool cmos_ram_data_in(struct kvm *kvm, u16 port, void *data, int size, 
u32 count)
+static bool cmos_ram_data_in(struct ioport *ioport, struct kvm *kvm, u16 port, 
void *data, int size, u32 count)
 {
struct tm *tm;
time_t ti;
@@ -52,7 +52,7 @@ static bool cmos_ram_data_in(struct kvm *kvm, u16 port, void 
*data, int size, u3
return true;
 }
 
-static bool cmos_ram_data_out(struct kvm *kvm, u16 port, void *data, int size, 
u32 count)
+static bool cmos_ram_data_out(struct ioport *ioport, struct kvm *kvm, u16 
port, void *data, int size, u32 count)
 {
return true;
 }
@@ -62,7 +62,7 @@ static struct ioport_operations cmos_ram_data_ioport_ops = {
.io_in  = cmos_ram_data_in,
 };
 
-static bool cmos_ram_index_out(struct kvm *kvm, u16 port, void *data, int 
size, u32 count)
+static bool cmos_ram_index_out(struct ioport *ioport, struct kvm *kvm, u16 
port, void *data, int size, u32 count)
 {
u8 value;
 
@@ -82,6 +82,6 @@ static struct ioport_operations cmos_ram_index_ioport_ops = {
 void rtc__init(void)
 {
/* PORT 0070-007F - CMOS RAM/RTC (REAL TIME CLOCK) */
-   ioport__register(0x0070, cmos_ram_index_ioport_ops, 1);
-   ioport__register(0x0071, cmos_ram_data_ioport_ops, 1);
+   ioport__register(0x0070, cmos_ram_index_ioport_ops, 1, NULL);
+   ioport__register(0x0071, cmos_ram_data_ioport_ops, 1, NULL);
 }
diff --git a/tools/kvm/hw/serial.c b/tools/kvm/hw/serial.c
index beebbba..1199264 100644
--- a/tools/kvm/hw/serial.c
+++ b/tools/kvm/hw/serial.c
@@ -164,7 +164,7 @@ static struct serial8250_device *find_device(u16 port)
return NULL;
 }
 
-static bool serial8250_out(struct kvm *kvm, u16 port, void *data, int size, 
u32 count)
+static bool serial8250_out(struct ioport *ioport, struct kvm *kvm, u16 port, 
void *data, int size, u32 count)
 {
struct serial8250_device *dev;
u16 offset;
@@ -252,7 +252,7 @@ out_unlock:
return ret;
 }
 
-static bool serial8250_in(struct kvm *kvm, u16 port, void *data, int size, u32 
count)
+static bool serial8250_in(struct ioport *ioport, struct kvm *kvm, u16 port, 
void *data, int size, u32 count)
 {
struct serial8250_device *dev;
u16 offset;
@@ -339,7 +339,7 @@ static struct ioport_operations serial8250_ops = {
 
 static void serial8250__device_init(struct kvm *kvm, struct serial8250_device 
*dev)
 {
-   ioport__register(dev-iobase, serial8250_ops, 8);
+   ioport__register(dev-iobase, serial8250_ops, 8, NULL);
kvm__irq_line(kvm, dev-irq, 0);
 }
 
diff --git a/tools/kvm/hw/vesa.c b/tools/kvm/hw/vesa.c
index 6ab07ee..ec4788c 100644
--- a/tools/kvm/hw/vesa.c
+++ b/tools/kvm/hw/vesa.c
@@ -26,12 +26,12 @@
 
 static char videomem[VESA_MEM_SIZE];
 
-static bool vesa_pci_io_in(struct kvm *kvm, u16 port, void *data, int size, 
u32 count)
+static bool vesa_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, 
void *data, int size, u32 count)
 {
return true;
 }
 
-static bool vesa_pci_io_out(struct kvm *kvm, u16 port, void *data, int size, 
u32 count)
+static bool vesa_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, 
void *data, int size, u32 count)
 {
return true;
 }
@@ -75,7 +75,7 @@ void vesa__init(struct kvm *kvm)
 
pci__register(vesa_pci_device, dev);
 
-   ioport__register(IOPORT_VESA, vesa_io_ops, IOPORT_VESA_SIZE);
+   ioport__register(IOPORT_VESA, vesa_io_ops, IOPORT_VESA_SIZE, NULL);
 
kvm__register_mmio(VESA_MEM_ADDR, VESA_MEM_SIZE, vesa_mmio_callback);
 
diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 8253938..67b4a6f 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -1,6 +1,8 @@
 #ifndef KVM__IOPORT_H
 #define KVM__IOPORT_H
 
+#include kvm/rbtree-interval.h
+
 #include stdbool.h
 #include asm/types.h
 

[PATCH v3 2/8] kvm tools: Add basic ioport dynamic allocation

2011-05-26 Thread Sasha Levin
Add a very simple allocation of ioports.

This prevents the need to coordinate ioports between different
modules.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |7 ++-
 tools/kvm/ioport.c |   24 +++-
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 67b4a6f..49f919f 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -9,6 +9,9 @@
 
 /* some ports we reserve for own use */
 #define IOPORT_DBG 0xe0
+#define IOPORT_START   0x6200
+#define IOPORT_SIZE0x400
+
 #define IOPORT_VESA0xa200
 #define IOPORT_VESA_SIZE   256
 #define IOPORT_VIRTIO_P9   0xb200  /* Virtio 9P device */
@@ -22,6 +25,8 @@
 #define IOPORT_VIRTIO_RNG  0xf200  /* Virtio network device */
 #define IOPORT_VIRTIO_RNG_SIZE 256
 
+#define IOPORT_EMPTY   USHRT_MAX
+
 struct kvm;
 
 struct ioport {
@@ -37,7 +42,7 @@ struct ioport_operations {
 
 void ioport__setup_legacy(void);
 
-void ioport__register(u16 port, struct ioport_operations *ops, int count, void 
*param);
+u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void 
*param);
 
 static inline u8 ioport__read8(u8 *data)
 {
diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c
index 92ad152..492ce16 100644
--- a/tools/kvm/ioport.c
+++ b/tools/kvm/ioport.c
@@ -3,6 +3,9 @@
 #include kvm/kvm.h
 #include kvm/util.h
 
+#include kvm/rbtree-interval.h
+#include kvm/mutex.h
+
 #include linux/kvm.h /* for KVM_EXIT_* */
 #include linux/types.h
 
@@ -14,9 +17,23 @@
 
 #define ioport_node(n) rb_entry(n, struct ioport, node)
 
+static u16 free_io_port_idx;
+DEFINE_MUTEX(free_io_port_idx_lock);
 static struct rb_root ioport_tree = RB_ROOT;
 bool ioport_debug;
 
+static u16 ioport__find_free_port(void)
+{
+   u16 free_port;
+
+   mutex_lock(free_io_port_idx_lock);
+   free_port = IOPORT_START + free_io_port_idx * IOPORT_SIZE;
+   free_io_port_idx++;
+   mutex_unlock(free_io_port_idx_lock);
+
+   return free_port;
+}
+
 static struct ioport *ioport_search(struct rb_root *root, u64 addr)
 {
struct rb_int_node *node;
@@ -61,10 +78,13 @@ static struct ioport_operations dummy_write_only_ioport_ops 
= {
.io_out = dummy_io_out,
 };
 
-void ioport__register(u16 port, struct ioport_operations *ops, int count, void 
*param)
+u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void 
*param)
 {
struct ioport *entry;
 
+   if (port == IOPORT_EMPTY)
+   port = ioport__find_free_port();
+
entry = ioport_search(ioport_tree, port);
if (entry) {
pr_warning(ioport re-registered: %x, port);
@@ -82,6 +102,8 @@ void ioport__register(u16 port, struct ioport_operations 
*ops, int count, void *
};
 
ioport_insert(ioport_tree, entry);
+
+   return port;
 }
 
 static const char *to_direction(int direction)
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 3/8] kvm tools: Use ioport context to control blk devices

2011-05-26 Thread Sasha Levin
Since ioports now has the ability to pass context to its
callbacks, we can implement multiple blk devices more efficiently.

We can get a ptr to the 'current' blk dev on each ioport call, which
means that we don't need to keep track of the blk device allocation
and ioport distribution within the module.

The advantages are easier management of multiple blk devices and
removal of any hardcoded limits to the amount of possible blk
devices.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |2 -
 tools/kvm/virtio/blk.c |   71 +--
 2 files changed, 24 insertions(+), 49 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 49f919f..e53c03c 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -16,8 +16,6 @@
 #define IOPORT_VESA_SIZE   256
 #define IOPORT_VIRTIO_P9   0xb200  /* Virtio 9P device */
 #define IOPORT_VIRTIO_P9_SIZE  256
-#define IOPORT_VIRTIO_BLK  0xc200  /* Virtio block device */
-#define IOPORT_VIRTIO_BLK_SIZE 0x200
 #define IOPORT_VIRTIO_CONSOLE  0xd200  /* Virtio console device */
 #define IOPORT_VIRTIO_CONSOLE_SIZE 256
 #define IOPORT_VIRTIO_NET  0xe200  /* Virtio network device */
diff --git a/tools/kvm/virtio/blk.c b/tools/kvm/virtio/blk.c
index 5720c7f..a8f9d8c 100644
--- a/tools/kvm/virtio/blk.c
+++ b/tools/kvm/virtio/blk.c
@@ -14,6 +14,7 @@
 #include linux/virtio_ring.h
 #include linux/virtio_blk.h
 
+#include linux/list.h
 #include linux/types.h
 #include pthread.h
 
@@ -34,15 +35,16 @@ struct blk_dev_job {
 
 struct blk_dev {
pthread_mutex_t mutex;
+   struct list_headlist;
 
struct virtio_blk_configblk_config;
struct disk_image   *disk;
+   u64 base_addr;
u32 host_features;
u32 guest_features;
u16 config_vector;
u8  status;
u8  isr;
-   u8  idx;
 
/* virtio queue */
u16 queue_selector;
@@ -52,7 +54,7 @@ struct blk_dev {
struct pci_device_headerpci_hdr;
 };
 
-static struct blk_dev *bdevs[VIRTIO_BLK_MAX_DEV];
+static LIST_HEAD(bdevs);
 
 static bool virtio_blk_dev_in(struct blk_dev *bdev, void *data, unsigned long 
offset, int size, u32 count)
 {
@@ -66,22 +68,14 @@ static bool virtio_blk_dev_in(struct blk_dev *bdev, void 
*data, unsigned long of
return true;
 }
 
-/* Translate port into device id + offset in that device addr space */
-static void virtio_blk_port2dev(u16 port, u16 base, u16 size, u16 *dev_idx, 
u16 *offset)
-{
-   *dev_idx= (port - base) / size;
-   *offset = port - (base + *dev_idx * size);
-}
-
 static bool virtio_blk_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 
port, void *data, int size, u32 count)
 {
struct blk_dev *bdev;
-   u16 offset, dev_idx;
+   u16 offset;
bool ret = true;
 
-   virtio_blk_port2dev(port, IOPORT_VIRTIO_BLK, IOPORT_VIRTIO_BLK_SIZE, 
dev_idx, offset);
-
-   bdev = bdevs[dev_idx];
+   bdev= ioport-priv;
+   offset  = port - bdev-base_addr;
 
mutex_lock(bdev-mutex);
 
@@ -181,12 +175,11 @@ static void virtio_blk_do_io(struct kvm *kvm, void *param)
 static bool virtio_blk_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 
port, void *data, int size, u32 count)
 {
struct blk_dev *bdev;
-   u16 offset, dev_idx;
+   u16 offset;
bool ret = true;
 
-   virtio_blk_port2dev(port, IOPORT_VIRTIO_BLK, IOPORT_VIRTIO_BLK_SIZE, 
dev_idx, offset);
-
-   bdev = bdevs[dev_idx];
+   bdev= ioport-priv;
+   offset  = port - bdev-base_addr;
 
mutex_lock(bdev-mutex);
 
@@ -246,48 +239,29 @@ static bool virtio_blk_pci_io_out(struct ioport *ioport, 
struct kvm *kvm, u16 po
 }
 
 static struct ioport_operations virtio_blk_io_ops = {
-   .io_in  = virtio_blk_pci_io_in,
-   .io_out = virtio_blk_pci_io_out,
+   .io_in  = virtio_blk_pci_io_in,
+   .io_out = virtio_blk_pci_io_out,
 };
 
-static int virtio_blk_find_empty_dev(void)
-{
-   int i;
-
-   for (i = 0; i  VIRTIO_BLK_MAX_DEV; i++) {
-   if (bdevs[i] == NULL)
-   return i;
-   }
-
-   return -1;
-}
-
 void virtio_blk__init(struct kvm *kvm, struct disk_image *disk)
 {
u16 blk_dev_base_addr;
u8 dev, pin, line;
struct blk_dev *bdev;
-   int new_dev_idx;
 
if (!disk)
return;
 
-   new_dev_idx = virtio_blk_find_empty_dev();
-   if (new_dev_idx  0)
-   die(Could not find an empty block device slot);
-
-   

[PATCH v3 4/8] kvm tools: Add support for multiple virtio-rng devices

2011-05-26 Thread Sasha Levin
Since multiple hardware rng devices of the same type are currently
unsupported by the kernel, this serves more as an example of a basic
virtio driver under kvm tools and can be used to debug the PCI layer.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h|2 -
 tools/kvm/include/kvm/parse-options.h |9 +++
 tools/kvm/include/kvm/virtio-rng.h|1 +
 tools/kvm/kvm-run.c   |8 ++-
 tools/kvm/virtio/rng.c|  118 ++--
 5 files changed, 96 insertions(+), 42 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index e53c03c..55d53e0 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -20,8 +20,6 @@
 #define IOPORT_VIRTIO_CONSOLE_SIZE 256
 #define IOPORT_VIRTIO_NET  0xe200  /* Virtio network device */
 #define IOPORT_VIRTIO_NET_SIZE 256
-#define IOPORT_VIRTIO_RNG  0xf200  /* Virtio network device */
-#define IOPORT_VIRTIO_RNG_SIZE 256
 
 #define IOPORT_EMPTY   USHRT_MAX
 
diff --git a/tools/kvm/include/kvm/parse-options.h 
b/tools/kvm/include/kvm/parse-options.h
index 2d5c99e..6bf9a1d 100644
--- a/tools/kvm/include/kvm/parse-options.h
+++ b/tools/kvm/include/kvm/parse-options.h
@@ -132,6 +132,15 @@ intptr_t defval;
.help = (h) \
 }
 
+#define OPT_INCR(s, l, v, h)\
+{   \
+   .type = OPTION_INCR,\
+   .short_name = (s),  \
+   .long_name = (l),   \
+   .value = check_vtype(v, int *), \
+   .help = (h) \
+}
+
 #define OPT_GROUP(h)\
 {   \
.type = OPTION_GROUP,   \
diff --git a/tools/kvm/include/kvm/virtio-rng.h 
b/tools/kvm/include/kvm/virtio-rng.h
index 7015c1f..c0a413b 100644
--- a/tools/kvm/include/kvm/virtio-rng.h
+++ b/tools/kvm/include/kvm/virtio-rng.h
@@ -4,5 +4,6 @@
 struct kvm;
 
 void virtio_rng__init(struct kvm *kvm);
+void virtio_rng__delete_all(struct kvm *kvm);
 
 #endif /* KVM__RNG_VIRTIO_H */
diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c
index adbb25b..76b5782 100644
--- a/tools/kvm/kvm-run.c
+++ b/tools/kvm/kvm-run.c
@@ -52,6 +52,7 @@ static __thread struct kvm_cpu *current_kvm_cpu;
 
 static u64 ram_size;
 static u8  image_count;
+static int virtio_rng;
 static const char *kernel_cmdline;
 static const char *kernel_filename;
 static const char *vmlinux_filename;
@@ -66,7 +67,6 @@ static const char *script;
 static const char *virtio_9p_dir;
 static bool single_step;
 static bool readonly_image[MAX_DISK_IMAGES];
-static bool virtio_rng;
 static bool vnc;
 extern bool ioport_debug;
 extern int  active_console;
@@ -107,7 +107,7 @@ static const struct option options[] = {
OPT_CALLBACK('d', disk, NULL, image, Disk image, img_name_parser),
OPT_STRING('\0', console, console, serial or virtio,
Console to use),
-   OPT_BOOLEAN('\0', rng, virtio_rng,
+   OPT_INCR('\0', rng, virtio_rng,
Enable virtio Random Number Generator),
OPT_STRING('\0', kvm-dev, kvm_dev, kvm-dev, KVM device file),
OPT_STRING('\0', virtio-9p, virtio_9p_dir, root dir,
@@ -570,7 +570,8 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
virtio_console__init(kvm);
 
if (virtio_rng)
-   virtio_rng__init(kvm);
+   while (virtio_rng--)
+   virtio_rng__init(kvm);
 
if (!network)
network = DEFAULT_NETWORK;
@@ -631,6 +632,7 @@ int kvm_cmd_run(int argc, const char **argv, const char 
*prefix)
}
 
virtio_blk__delete_all(kvm);
+   virtio_rng__delete_all(kvm);
 
disk_image__close_all(kvm-disks, image_count);
kvm__delete(kvm);
diff --git a/tools/kvm/virtio/rng.c b/tools/kvm/virtio/rng.c
index a553f6b..1a3bda3 100644
--- a/tools/kvm/virtio/rng.c
+++ b/tools/kvm/virtio/rng.c
@@ -5,7 +5,6 @@
 #include kvm/disk-image.h
 #include kvm/virtio.h
 #include kvm/ioport.h
-#include kvm/mutex.h
 #include kvm/util.h
 #include kvm/kvm.h
 #include kvm/pci.h
@@ -15,6 +14,7 @@
 #include linux/virtio_ring.h
 #include linux/virtio_rng.h
 
+#include linux/list.h
 #include fcntl.h
 #include sys/types.h
 #include sys/stat.h
@@ -23,18 +23,17 @@
 #define NUM_VIRT_QUEUES1
 #define VIRTIO_RNG_QUEUE_SIZE  128
 
-static struct pci_device_header virtio_rng_pci_device = {
-   .vendor_id  = PCI_VENDOR_ID_REDHAT_QUMRANET,
-   .device_id  = PCI_DEVICE_ID_VIRTIO_RNG,
-   .header_type= PCI_HEADER_TYPE_NORMAL,
-   .revision_id= 0,
-   .class  = 0x01,
-   .subsys_vendor_id   = 

[PATCH v3 6/8] kvm tools: Use dynamic IO port allocation in 9p driver

2011-05-26 Thread Sasha Levin
Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |2 --
 tools/kvm/virtio/9p.c  |   12 +++-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 84eb65a..310f75d 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -12,8 +12,6 @@
 #define IOPORT_START   0x6200
 #define IOPORT_SIZE0x400
 
-#define IOPORT_VIRTIO_P9   0xb200  /* Virtio 9P device */
-#define IOPORT_VIRTIO_P9_SIZE  256
 #define IOPORT_VIRTIO_CONSOLE  0xd200  /* Virtio console device */
 #define IOPORT_VIRTIO_CONSOLE_SIZE 256
 #define IOPORT_VIRTIO_NET  0xe200  /* Virtio network device */
diff --git a/tools/kvm/virtio/9p.c b/tools/kvm/virtio/9p.c
index c441089..38a997d 100644
--- a/tools/kvm/virtio/9p.c
+++ b/tools/kvm/virtio/9p.c
@@ -50,7 +50,6 @@ static struct pci_device_header virtio_p9_pci_device = {
.class  = 0x01,
.subsys_vendor_id   = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
.subsys_id  = VIRTIO_ID_9P,
-   .bar[0] = IOPORT_VIRTIO_P9 | PCI_BASE_ADDRESS_SPACE_IO,
 };
 
 struct p9_dev {
@@ -59,6 +58,7 @@ struct p9_dev {
u16 config_vector;
u32 features;
struct virtio_9p_config *config;
+   u16 base_addr;
 
/* virtio queue */
u16 queue_selector;
@@ -96,7 +96,7 @@ static bool virtio_p9_pci_io_in(struct ioport *ioport, struct 
kvm *kvm, u16 port
unsigned long offset;
bool ret = true;
 
-   offset = port - IOPORT_VIRTIO_P9;
+   offset = port - p9dev.base_addr;
 
switch (offset) {
case VIRTIO_PCI_HOST_FEATURES:
@@ -584,7 +584,7 @@ static bool virtio_p9_pci_io_out(struct ioport *ioport, 
struct kvm *kvm, u16 por
unsigned long offset;
bool ret = true;
 
-   offset  = port - IOPORT_VIRTIO_P9;
+   offset = port - p9dev.base_addr;
 
switch (offset) {
case VIRTIO_MSI_QUEUE_VECTOR:
@@ -636,6 +636,7 @@ void virtio_9p__init(struct kvm *kvm, const char *root)
 {
u8 pin, line, dev;
u32 i, root_len;
+   u16 p9_base_addr;
 
p9dev.config = calloc(1, sizeof(*p9dev.config) + sizeof(VIRTIO_P9_TAG));
if (p9dev.config == NULL)
@@ -662,7 +663,8 @@ void virtio_9p__init(struct kvm *kvm, const char *root)
 
virtio_p9_pci_device.irq_pin= pin;
virtio_p9_pci_device.irq_line   = line;
+   p9_base_addr= ioport__register(IOPORT_EMPTY, 
virtio_p9_io_ops, IOPORT_SIZE, NULL);
+   virtio_p9_pci_device.bar[0] = p9_base_addr | 
PCI_BASE_ADDRESS_SPACE_IO;
+   p9dev.base_addr = p9_base_addr;
pci__register(virtio_p9_pci_device, dev);
-
-   ioport__register(IOPORT_VIRTIO_P9, virtio_p9_io_ops, 
IOPORT_VIRTIO_P9_SIZE, NULL);
 }
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 5/8] kvm tools: Use dynamic IO port allocation in vesa driver

2011-05-26 Thread Sasha Levin
Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/hw/vesa.c|7 +++
 tools/kvm/include/kvm/ioport.h |2 --
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/tools/kvm/hw/vesa.c b/tools/kvm/hw/vesa.c
index ec4788c..85fe1a9 100644
--- a/tools/kvm/hw/vesa.c
+++ b/tools/kvm/hw/vesa.c
@@ -49,7 +49,6 @@ static struct pci_device_header vesa_pci_device = {
.class  = 0x03,
.subsys_vendor_id   = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
.subsys_id  = PCI_SUBSYSTEM_ID_VESA,
-   .bar[0] = IOPORT_VESA   | PCI_BASE_ADDRESS_SPACE_IO,
.bar[1] = VESA_MEM_ADDR | PCI_BASE_ADDRESS_SPACE_MEMORY,
 };
 
@@ -66,17 +65,17 @@ void vesa__init(struct kvm *kvm)
 {
u8 dev, line, pin;
pthread_t thread;
+   u16 vesa_base_addr;
 
if (irq__register_device(PCI_DEVICE_ID_VESA, dev, pin, line)  0)
return;
 
vesa_pci_device.irq_pin = pin;
vesa_pci_device.irq_line= line;
-
+   vesa_base_addr  = ioport__register(IOPORT_EMPTY, 
vesa_io_ops, IOPORT_SIZE, NULL);
+   vesa_pci_device.bar[0]  = vesa_base_addr | 
PCI_BASE_ADDRESS_SPACE_IO;
pci__register(vesa_pci_device, dev);
 
-   ioport__register(IOPORT_VESA, vesa_io_ops, IOPORT_VESA_SIZE, NULL);
-
kvm__register_mmio(VESA_MEM_ADDR, VESA_MEM_SIZE, vesa_mmio_callback);
 
pthread_create(thread, NULL, vesa__dovnc, kvm);
diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 55d53e0..84eb65a 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -12,8 +12,6 @@
 #define IOPORT_START   0x6200
 #define IOPORT_SIZE0x400
 
-#define IOPORT_VESA0xa200
-#define IOPORT_VESA_SIZE   256
 #define IOPORT_VIRTIO_P9   0xb200  /* Virtio 9P device */
 #define IOPORT_VIRTIO_P9_SIZE  256
 #define IOPORT_VIRTIO_CONSOLE  0xd200  /* Virtio console device */
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 8/8] kvm tools: Use dynamic IO port allocation in virtio-net

2011-05-26 Thread Sasha Levin
Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |3 ---
 tools/kvm/virtio/net.c |   12 
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 4fccbd6..59f118f 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -12,9 +12,6 @@
 #define IOPORT_START   0x6200
 #define IOPORT_SIZE0x400
 
-#define IOPORT_VIRTIO_NET  0xe200  /* Virtio network device */
-#define IOPORT_VIRTIO_NET_SIZE 256
-
 #define IOPORT_EMPTY   USHRT_MAX
 
 struct kvm;
diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c
index 014205b..3064da6 100644
--- a/tools/kvm/virtio/net.c
+++ b/tools/kvm/virtio/net.c
@@ -37,7 +37,6 @@ static struct pci_device_header pci_header = {
.class  = 0x02,
.subsys_vendor_id   = 
PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
.subsys_id  = VIRTIO_ID_NET,
-   .bar[0] = IOPORT_VIRTIO_NET | 
PCI_BASE_ADDRESS_SPACE_IO,
 };
 
 struct net_device {
@@ -51,6 +50,7 @@ struct net_device {
u8  status;
u8  isr;
u16 queue_selector;
+   u16 base_addr;
 
pthread_t   io_rx_thread;
pthread_mutex_t io_rx_lock;
@@ -166,7 +166,7 @@ static bool virtio_net_pci_io_device_specific_in(void 
*data, unsigned long offse
 
 static bool virtio_net_pci_io_in(struct ioport *ioport, struct kvm *kvm, u16 
port, void *data, int size, u32 count)
 {
-   unsigned long   offset  = port - IOPORT_VIRTIO_NET;
+   unsigned long   offset  = port - ndev.base_addr;
boolret = true;
 
mutex_lock(ndev.mutex);
@@ -230,7 +230,7 @@ static void virtio_net_handle_callback(struct kvm *kvm, u16 
queue_index)
 
 static bool virtio_net_pci_io_out(struct ioport *ioport, struct kvm *kvm, u16 
port, void *data, int size, u32 count)
 {
-   unsigned long   offset  = port - IOPORT_VIRTIO_NET;
+   unsigned long   offset  = port - ndev.base_addr;
boolret = true;
 
mutex_lock(ndev.mutex);
@@ -387,14 +387,18 @@ void virtio_net__init(const struct virtio_net_parameters 
*params)
 {
if (virtio_net__tap_init(params)) {
u8 dev, line, pin;
+   u16 net_base_addr;
 
if (irq__register_device(VIRTIO_ID_NET, dev, pin, line)  0)
return;
 
pci_header.irq_pin  = pin;
pci_header.irq_line = line;
+   net_base_addr   = ioport__register(IOPORT_EMPTY, 
virtio_net_io_ops, IOPORT_SIZE, NULL);
+   pci_header.bar[0]   = net_base_addr | 
PCI_BASE_ADDRESS_SPACE_IO;
+   ndev.base_addr  = net_base_addr;
+
pci__register(pci_header, dev);
-   ioport__register(IOPORT_VIRTIO_NET, virtio_net_io_ops, 
IOPORT_VIRTIO_NET_SIZE, NULL);
 
virtio_net__io_thread_init(params-kvm);
}
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 7/8] kvm tools: Use dynamic IO port allocation in virtio-console

2011-05-26 Thread Sasha Levin
Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/ioport.h |2 --
 tools/kvm/virtio/console.c |   11 +++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 310f75d..4fccbd6 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -12,8 +12,6 @@
 #define IOPORT_START   0x6200
 #define IOPORT_SIZE0x400
 
-#define IOPORT_VIRTIO_CONSOLE  0xd200  /* Virtio console device */
-#define IOPORT_VIRTIO_CONSOLE_SIZE 256
 #define IOPORT_VIRTIO_NET  0xe200  /* Virtio network device */
 #define IOPORT_VIRTIO_NET_SIZE 256
 
diff --git a/tools/kvm/virtio/console.c b/tools/kvm/virtio/console.c
index 614f0d2..038e53f 100644
--- a/tools/kvm/virtio/console.c
+++ b/tools/kvm/virtio/console.c
@@ -36,7 +36,6 @@ static struct pci_device_header virtio_console_pci_device = {
.class  = 0x078000,
.subsys_vendor_id   = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
.subsys_id  = VIRTIO_ID_CONSOLE,
-   .bar[0] = IOPORT_VIRTIO_CONSOLE | 
PCI_BASE_ADDRESS_SPACE_IO,
 };
 
 struct con_dev {
@@ -50,6 +49,7 @@ struct con_dev {
u8  status;
u8  isr;
u16 queue_selector;
+   u16 base_addr;
 
void*jobs[VIRTIO_CONSOLE_NUM_QUEUES];
 };
@@ -113,7 +113,7 @@ static bool virtio_console_pci_io_device_specific_in(void 
*data, unsigned long o
 
 static bool virtio_console_pci_io_in(struct ioport *ioport, struct kvm *kvm, 
u16 port, void *data, int size, u32 count)
 {
-   unsigned long offset = port - IOPORT_VIRTIO_CONSOLE;
+   unsigned long offset = port - cdev.base_addr;
bool ret = true;
 
mutex_lock(cdev.mutex);
@@ -181,7 +181,7 @@ static void virtio_console_handle_callback(struct kvm *kvm, 
void *param)
 
 static bool virtio_console_pci_io_out(struct ioport *ioport, struct kvm *kvm, 
u16 port, void *data, int size, u32 count)
 {
-   unsigned long offset = port - IOPORT_VIRTIO_CONSOLE;
+   unsigned long offset = port - cdev.base_addr;
bool ret = true;
 
mutex_lock(cdev.mutex);
@@ -243,12 +243,15 @@ static struct ioport_operations virtio_console_io_ops = {
 void virtio_console__init(struct kvm *kvm)
 {
u8 dev, line, pin;
+   u16 console_base_addr;
 
if (irq__register_device(VIRTIO_ID_CONSOLE, dev, pin, line)  0)
return;
 
virtio_console_pci_device.irq_pin   = pin;
virtio_console_pci_device.irq_line  = line;
+   console_base_addr   = 
ioport__register(IOPORT_EMPTY, virtio_console_io_ops, IOPORT_SIZE, NULL);
+   virtio_console_pci_device.bar[0]= console_base_addr | 
PCI_BASE_ADDRESS_SPACE_IO;
+   cdev.base_addr  = console_base_addr;
pci__register(virtio_console_pci_device, dev);
-   ioport__register(IOPORT_VIRTIO_CONSOLE, virtio_console_io_ops, 
IOPORT_VIRTIO_CONSOLE_SIZE, NULL);
 }
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Video card passthrough success

2011-05-26 Thread Avi Kivity

On 05/20/2011 05:56 AM, Luke-Jr wrote:

Finally decided to see just how much of the Radeon was working (eg, if it was
*just* OpenCL or not), and I am happy to confirm that both video output and
OpenGL acceleration are fully functional inside KVM.


That's really surprising (and a really good surprise, too).  We should 
have a wiki page to summarize what has been tried, with what version, 
and with what results.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: x86: use proper port value when checking io instruction permission (v2)

2011-05-26 Thread Marcelo Tosatti
On Thu, May 26, 2011 at 09:49:21AM +0200, Paolo Bonzini wrote:
 On 05/26/2011 09:07 AM, Gleb Natapov wrote:
  Still, op_bytes is irrelevant for
  SrcDX, the 16-bit version is always used.
 
 If SrcDX/DstDX will be used only for decoding in/out instruction
 then yes. Otherwise it is nice to have more general decoder.

Yes, the use of op_bytes instead of 2 had that in mind.

 Not counting instructions that read/write many registers
 (rdmsr/wrmsr, mul/imul/div/idiv, rdtsc, etc.), I think the only
 other instruction with an implicit DstDX is cwd/cdq/cqo.  Since
 cwd/cdq/cqo needs c-dst.bytes = c-src.bytes (not op_bytes) I think
 DstDX is not really reusable beyond port instructions.
 
 Paolo

OK, will switch to 2 then, thanks.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Nikola Ciprich
OK, I tried copying the whole image to my test box running 0.14.0 again and it 
crashes with exactly the same BSOD!
So now I have the test environment to play with...
What should I check/try now?
n.
PS: I'm not sure what You mean by permissions, but I'm using LVM partitions and 
qemu-kvm is started by root user anyways..

On Thu, May 26, 2011 at 01:22:37PM +0300, Gleb Natapov wrote:
 On Thu, May 26, 2011 at 12:20:56PM +0200, Nikola Ciprich wrote:
   You can make a copy from your production image, install 0.14 version in
   different place from 0.13 and experiment.
  yup, I think I've tried it also with exact copy and wasn't able to 
  reproduce it, but I'll try it again and report soon.
 Then check image file permission please.
 
 --
   Gleb.
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-


pgphmXcFxNkYX.pgp
Description: PGP signature


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 01:50:35PM +0200, Nikola Ciprich wrote:
 OK, I tried copying the whole image to my test box running 0.14.0 again and 
 it crashes with exactly the same BSOD!
 So now I have the test environment to play with...
 What should I check/try now?
 n.
 PS: I'm not sure what You mean by permissions, but I'm using LVM partitions 
 and qemu-kvm is started by root user anyways..
 
This BSOD usually indicates that Windows can't write to the boot disk.
This is usually happens if qemu has no permission to write to the image
file. But if you are starting qemu as a root this is probably is not the
case. So what is your 0.14 command line?

 On Thu, May 26, 2011 at 01:22:37PM +0300, Gleb Natapov wrote:
  On Thu, May 26, 2011 at 12:20:56PM +0200, Nikola Ciprich wrote:
You can make a copy from your production image, install 0.14 version in
different place from 0.13 and experiment.
   yup, I think I've tried it also with exact copy and wasn't able to 
   reproduce it, but I'll try it again and report soon.
  Then check image file permission please.
  
  --
  Gleb.
  --
  To unsubscribe from this list: send the line unsubscribe kvm in
  the body of a message to majord...@vger.kernel.org
  More majordomo info at  http://vger.kernel.org/majordomo-info.html
  
 
 -- 
 -
 Ing. Nikola CIPRICH
 LinuxBox.cz, s.r.o.
 28. rijna 168, 709 01 Ostrava
 
 tel.:   +420 596 603 142
 fax:+420 596 621 273
 mobil:  +420 777 093 799
 
 www.linuxbox.cz
 
 mobil servis: +420 737 238 656
 email servis: ser...@linuxbox.cz
 -



--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


KVM: x86: use proper port value when checking io instruction permission (v3)

2011-05-26 Thread Marcelo Tosatti

Commit fa4491a6b667304 moved the permission check for io instructions
to the -check_perm callback. It failed to copy the port value from RDX
register for string and in,out ax,dx instructions.

Fix it by reading RDX register at decode stage when appropriate.

Fixes FC8.32 installation.

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 3bc6b7a..fc3d2d9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -47,7 +47,7 @@
 #define DstDI   (51) /* Destination is in ES:(E)DI */
 #define DstMem64(61) /* 64bit memory operand */
 #define DstImmUByte (71) /* 8-bit unsigned immediate operand */
-#define DstMask (71)
+#define DstMask ((71) | (118))
 /* Source operand type. */
 #define SrcNone (04) /* No source operand. */
 #define SrcReg  (14) /* Register operand. */
@@ -64,7 +64,7 @@
 #define SrcMemFAddr (0xc4)   /* Source is far address in memory */
 #define SrcAcc  (0xd4)   /* Source Accumulator */
 #define SrcImmU16   (0xe4)/* Immediate operand, unsigned, 16 bits */
-#define SrcMask (0xf4)
+#define SrcMask ((0xf4) | (119))
 /* Generic ModRM decode. */
 #define ModRM   (18)
 /* Destination is only written; never read. */
@@ -79,6 +79,8 @@
 #define Prefix  (314) /* Instruction varies with 66/f2/f3 prefix */
 #define RMExt   (414) /* Opcode extension in ModRM r/m if mod == 3 */
 #define Sse (117) /* SSE Vector instruction */
+#define DstDX   (118)/* Destination is in DX register */
+#define SrcDX   (119)/* Source is in DX register */
 /* Misc flags */
 #define Prot(121) /* instruction generates #UD if not in prot-mode */
 #define VendorSpecific (122) /* Vendor specific instruction */
@@ -3124,8 +3126,8 @@ static struct opcode opcode_table[256] = {
I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
I(SrcImmByte | Mov | Stack, em_push),
I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
-   D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */
-   D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, 
outsw/outsd */
+   D2bvIP(DstDI | SrcDX | Mov | String, ins, check_perm_in), /* insb, 
insw/insd */
+   D2bvIP(SrcSI | DstDX | String, outs, check_perm_out), /* outsb, 
outsw/outsd */
/* 0x70 - 0x7F */
X16(D(SrcImmByte)),
/* 0x80 - 0x87 */
@@ -3182,8 +3184,8 @@ static struct opcode opcode_table[256] = {
/* 0xE8 - 0xEF */
D(SrcImm | Stack), D(SrcImm | ImplicitOps),
D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps),
-   D2bvIP(SrcNone | DstAcc, in,  check_perm_in),
-   D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out),
+   D2bvIP(SrcDX | DstAcc, in,  check_perm_in),
+   D2bvIP(SrcAcc | DstDX, out, check_perm_out),
/* 0xF0 - 0xF7 */
N, DI(ImplicitOps, icebp), N, N,
DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
@@ -3580,6 +3582,12 @@ done_prefixes:
memop.bytes = c-op_bytes + 2;
goto srcmem_common;
break;
+   case SrcDX:
+   c-src.type = OP_REG;
+   c-src.bytes = 2;
+   c-src.addr.reg = c-regs[VCPU_REGS_RDX];
+   fetch_register_operand(c-src);
+   break;
}
 
if (rc != X86EMUL_CONTINUE)
@@ -3649,6 +3657,12 @@ done_prefixes:
c-dst.addr.mem.seg = VCPU_SREG_ES;
c-dst.val = 0;
break;
+   case DstDX:
+   c-dst.type = OP_REG;
+   c-dst.bytes = 2;
+   c-dst.addr.reg = c-regs[VCPU_REGS_RDX];
+   fetch_register_operand(c-dst);
+   break;
case ImplicitOps:
/* Special instructions do their own operand decoding. */
default:
@@ -3993,7 +4007,6 @@ special_insn:
break;
case 0xec: /* in al,dx */
case 0xed: /* in (e/r)ax,dx */
-   c-src.val = c-regs[VCPU_REGS_RDX];
do_io_in:
if (!pio_in_emulated(ctxt, c-dst.bytes, c-src.val,
 c-dst.val))
@@ -4001,7 +4014,6 @@ special_insn:
break;
case 0xee: /* out dx,al */
case 0xef: /* out dx,(e/r)ax */
-   c-dst.val = c-regs[VCPU_REGS_RDX];
do_io_out:
ops-pio_out_emulated(ctxt, c-src.bytes, c-dst.val,
  c-src.val, 1);
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Nikola Ciprich
 This BSOD usually indicates that Windows can't write to the boot disk.
 This is usually happens if qemu has no permission to write to the image
 file. But if you are starting qemu as a root this is probably is not the
OK, I see.
 case. So what is your 0.14 command line?
here it goes:
/usr/bin/qemu-kvm -S -M pc-0.14 -enable-kvm -m 4096 -smp 
1,sockets=1,cores=1,threads=1 -name vmtst04 -uuid 
1f8328b8-8849-11e0-91e9-00259009d78c -nodefconfig -nodefaults -chardev 
socket,id=charmonitor,path=/var/lib/libvirt/qemu/vmtst04.monitor,server,nowait 
-mon chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot c 
-drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw 
-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev 
tap,fd=14,id=hostnet0 -device 
rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:18:04:00,bus=pci.0,addr=0x3 -usb 
-vnc 0.0.0.0:24104 -vga cirrus -device 
virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4


 
  On Thu, May 26, 2011 at 01:22:37PM +0300, Gleb Natapov wrote:
   On Thu, May 26, 2011 at 12:20:56PM +0200, Nikola Ciprich wrote:
 You can make a copy from your production image, install 0.14 version 
 in
 different place from 0.13 and experiment.
yup, I think I've tried it also with exact copy and wasn't able to 
reproduce it, but I'll try it again and report soon.
   Then check image file permission please.
   
   --
 Gleb.
   --
   To unsubscribe from this list: send the line unsubscribe kvm in
   the body of a message to majord...@vger.kernel.org
   More majordomo info at  http://vger.kernel.org/majordomo-info.html
   
  
  -- 
  -
  Ing. Nikola CIPRICH
  LinuxBox.cz, s.r.o.
  28. rijna 168, 709 01 Ostrava
  
  tel.:   +420 596 603 142
  fax:+420 596 621 273
  mobil:  +420 777 093 799
  
  www.linuxbox.cz
  
  mobil servis: +420 737 238 656
  email servis: ser...@linuxbox.cz
  -
 
 
 
 --
   Gleb.
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-


pgpVWXFpwtBQ6.pgp
Description: PGP signature


Re: Restoring saved guest causes guest to reboot

2011-05-26 Thread Markus Schade

On 05/26/2011 08:44 AM, Avi Kivity wrote:

On 05/25/2011 09:49 AM, Markus Schade wrote:


Git bisect tells me that this is the first bad commit:
-%-
aff48baa34c033318ad322ecbf2e4bcd891b29ca is the first bad commit
commit aff48baa34c033318ad322ecbf2e4bcd891b29ca Author: Avi Kivity
a...@redhat.com Date: Sun Dec 5 18:56:11 2010 +0200

KVM: Fetch guest cr3 from hardware on demand

Instead of syncing the guest cr3 every exit, which is expensince on
vmx with ept enabled, sync it only on demand.

[sheng: fix incorrect cr3 seen by Windows XP]

Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by:
Avi Kivity a...@redhat.com



Does your machine have ept?

(cat /sys/module/kvm_intel/parameters/ept)



Sure. (Assuming that the Y means yes). I am no C developer, but I was 
wondering, if the issue is related to the difference in 
ept_update_paging_mode_cr0 between original patch in the kvm git and the 
linux-2.6. git tree.


Best regards,
Markus
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 02:03:57PM +0200, Nikola Ciprich wrote:
  This BSOD usually indicates that Windows can't write to the boot disk.
  This is usually happens if qemu has no permission to write to the image
  file. But if you are starting qemu as a root this is probably is not the
 OK, I see.
May be libvirt does something funny with selinux.

  case. So what is your 0.14 command line?
 here it goes:
 /usr/bin/qemu-kvm -S -M pc-0.14 -enable-kvm -m 4096 -smp 
 1,sockets=1,cores=1,threads=1 -name vmtst04 -uuid 
 1f8328b8-8849-11e0-91e9-00259009d78c -nodefconfig -nodefaults -chardev 
 socket,id=charmonitor,path=/var/lib/libvirt/qemu/vmtst04.monitor,server,nowait
  -mon chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot 
 c -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw 
 -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev 
 tap,fd=14,id=hostnet0 -device 
 rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:18:04:00,bus=pci.0,addr=0x3 -usb 
 -vnc 0.0.0.0:24104 -vga cirrus -device 
 virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4

Try to run with -M pc-0.13. 

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Nikola Ciprich
 May be libvirt does something funny with selinux.
it shouldn't, I don't have selinux enabled in host kernel at all..

 Try to run with -M pc-0.13. 
tried now, same result...



 
 --
   Gleb.
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-


pgpFCnydWiE6q.pgp
Description: PGP signature


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 02:30:00PM +0200, Nikola Ciprich wrote:
  May be libvirt does something funny with selinux.
 it shouldn't, I don't have selinux enabled in host kernel at all..
 
  Try to run with -M pc-0.13. 
 tried now, same result...
 
 
Hmm. And what if you start qemu directly (without using libvirt) with 0.13
command line?

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Nikola Ciprich
 Hmm. And what if you start qemu directly (without using libvirt) with 0.13
 command line?
/usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -boot c -drive 
file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device 
ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104

same result...




 
 --
   Gleb.
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-


pgpkaghaz1g42.pgp
Description: PGP signature


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 02:46:30PM +0200, Nikola Ciprich wrote:
  Hmm. And what if you start qemu directly (without using libvirt) with 0.13
  command line?
 /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -boot c -drive 
 file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device 
 ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104
 
 same result...
 
Should be more like that one with correct image path:

/usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096
-smp 1,sockets=1,cores=1,threads=1 -name vmwts02 -uuid
1e501300-dc48-11df-a690-00304834195b -nodefconfig -nodefaults -chardev
socket,id=charmonitor,path=/var/lib/libvirt/qemu/vmwts02.monitor,server,nowait
-mon chardev=charmonitor,id=monitor,mode=readline
-rtc base=localtime -boot c -drive
file=/dev/vgshared/vmwts02-1,if=none,id=drive-ide0-0-0,format=raw
-device
ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -drive
if=none,media=cdrom,id=drive-ide0-1-0,readonly=on,format=raw -device
ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
-netdev user,id=hostnet0 -device
rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:61:01:00,bus=pci.0,addr=0x3
-usb -vnc 0.0.0.0:30802 -vga cirrus -device
virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4



--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Nikola Ciprich
 Should be more like that one with correct image path:
huh, now I got a bit lost :)
I tried running both:
/usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -smp 
1,sockets=1,cores=1,threads=1 -name vmtst04 -uuid 
1f8328b8-8849-11e0-91e9-00259009d78c -nodefconfig -nodefaults -chardev 
socket,id=char
monitor,path=/var/lib/libvirt/qemu/vmtst04.monitor,server,nowait -mon 
chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot c -drive 
file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device 
ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev 
tap,fd=14,id=hostnet0 -device 
rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:18:04:00,bus=pci.0,addr=0x3 -usb 
-vnc 0.0.0.0:24104 -vga cirrus -device 
virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4

and simplified:
/usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -rtc base=localtime -boot c 
-drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw 
-device ide-drive,bus=ide.0,unit=0,driv
e=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104 /usr/bin/qemu-kvm -M pc-0.13 
-enable-kvm -m 4096 -boot c -drive 
file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device 
ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104  



where /dev/vgshared/vmtst04-1 is the copy of windows volume from production 
server.
so is this ok?
n.
  




-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-


pgpe5c27D1b1Q.pgp
Description: PGP signature


[AUTOTEST PATCH] Fix rhel5 install

2011-05-26 Thread Gerd Hoffmann
There is no ntpdate.rpm in RHEL-5, ntp.rpm has /usr/sbin/ntpdate.
---
 client/tests/kvm/unattended/RHEL-5-series.ks |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/client/tests/kvm/unattended/RHEL-5-series.ks 
b/client/tests/kvm/unattended/RHEL-5-series.ks
index 1d8d41b..f1568f2 100644
--- a/client/tests/kvm/unattended/RHEL-5-series.ks
+++ b/client/tests/kvm/unattended/RHEL-5-series.ks
@@ -22,7 +22,7 @@ poweroff
 @development-libs
 @development-tools
 kexec-tools
-ntpdate
+ntp
 watchdog
 
 %post --interpreter /usr/bin/python
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[AUTOTEST PATCH] Broken installs.

2011-05-26 Thread Gerd Hoffmann
  Hi,

Started playing around with autotest.  For starters tried to run the
basic install, boot + shutdown tests for a bunch of guests I have iso
images laying around here, expecting that working out-of-the-box.

Unfortunaly that wasn't the case.  RHEL 5.any does't install.  Fixed.
OpenSUSE 11.1 doesn't install too.  No idea why, fails to find the repos
on the cdrom.

cheers,
  Gerd

Gerd Hoffmann (1):
  Fix rhel5 install

 client/tests/kvm/unattended/RHEL-5-series.ks |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 02:56:40PM +0200, Nikola Ciprich wrote:
  Should be more like that one with correct image path:
 huh, now I got a bit lost :)
 I tried running both:
 /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -smp 
 1,sockets=1,cores=1,threads=1 -name vmtst04 -uuid 
 1f8328b8-8849-11e0-91e9-00259009d78c -nodefconfig -nodefaults -chardev 
 socket,id=char
 monitor,path=/var/lib/libvirt/qemu/vmtst04.monitor,server,nowait -mon 
 chardev=charmonitor,id=monitor,mode=readline -rtc base=localtime -boot c 
 -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw 
 -device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -netdev 
 tap,fd=14,id=hostnet0 -device 
 rtl8139,netdev=hostnet0,id=net0,mac=00:16:3e:18:04:00,bus=pci.0,addr=0x3 -usb 
 -vnc 0.0.0.0:24104 -vga cirrus -device 
 virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4
 
That one should fail to start.

 and simplified:
 /usr/bin/qemu-kvm -M pc-0.13 -enable-kvm -m 4096 -rtc base=localtime -boot c 
 -drive file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw 
 -device ide-drive,bus=ide.0,unit=0,driv
 e=drive-ide0-0-0,id=ide0-0-0 -vnc 0.0.0.0:24104 /usr/bin/qemu-kvm -M pc-0.13 
 -enable-kvm -m 4096 -boot c -drive 
 file=/dev/vgshared/vmtst04-1,if=none,id=drive-ide0-0-0,format=raw -device 
 ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 -vnc 
 0.0.0.0:24104 
  
 
 where /dev/vgshared/vmtst04-1 is the copy of windows volume from production 
 server.
 so is this ok?
No, try to run the one I gave you. Just replace the image file path. I
do no have much hope it will work though, but worth trying. 

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Nikola Ciprich
 No, try to run the one I gave you. Just replace the image file path. I
 do no have much hope it will work though, but worth trying. 
OK, I tried, I just had to remove the monitor device. Still, it fails the same 
way...
n.


 
 --
   Gleb.
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-


pgpnpjUPCJSjF.pgp
Description: PGP signature


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 03:15:19PM +0200, Nikola Ciprich wrote:
  No, try to run the one I gave you. Just replace the image file path. I
  do no have much hope it will work though, but worth trying. 
 OK, I tried, I just had to remove the monitor device. Still, it fails the 
 same way...
According to this:
http://social.msdn.microsoft.com/Forums/en-US/embeddedwindowscomponents/thread/09aae527-ff6d-4003-9e59-962d73d409ed
such bsod happens because Windows can't access boot device. Your boot
device is IDE. Nothing changed in this area from 13 to 14. Are you sure
your image was copied correctly and is not corrupted?

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Patch v3] Enable CPU SMEP feature for KVM

2011-05-26 Thread Yang, Wei Y

This patchset enables a new CPU feature SMEP (Supervisor Mode Execution
Protection) in KVM. SMEP prevents kernel from executing code in application.
Updated Intel SDM describes this CPU feature. The document will be published 
soon.

This patchset is based on Fenghua's SMEP patch series, as referred by:
https://lkml.org/lkml/2011/5/17/523

Changes since v2: enable SMEP for spt mode.

 Signed-off-by: Yang Wei wei.y.y...@intel.com
 Signed-off-by: Shan Haitao haitao.s...@intel.com

---
 arch/x86/include/asm/kvm_host.h |1 +
 arch/x86/kvm/paging_tmpl.h  |   15 +--
 arch/x86/kvm/vmx.c  |9 +
 arch/x86/kvm/x86.c  |7 +--
 4 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d2ac8e2..154287b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -306,6 +306,7 @@ struct kvm_vcpu_arch {
unsigned long cr3;
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
+   unsigned long cr4_reserved_bits;
unsigned long cr8;
u32 hflags;
u64 efer;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6c4dc01..7e0b2f8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -120,7 +120,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker 
*walker,
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gva_t addr, u32 access)
 {
-   pt_element_t pte;
+   pt_element_t pte, pte_smep;
pt_element_t __user *ptep_user;
gfn_t table_gfn;
unsigned index, pt_access, uninitialized_var(pte_access);
@@ -150,7 +150,11 @@ walk:
}
--walker-level;
}
+   pte_smep = ~0ULL;
+#else
+   pte_smep = ~0U;
 #endif
+
ASSERT((!is_long_mode(vcpu)  is_pae(vcpu)) ||
   (mmu-get_cr3(vcpu)  CR3_NONPAE_RESERVED_BITS) == 0);
 
@@ -234,6 +238,8 @@ walk:
 
walker-ptes[walker-level - 1] = pte;
 
+   pte_smep = pte;
+
if ((walker-level == PT_PAGE_TABLE_LEVEL) ||
((walker-level == PT_DIRECTORY_LEVEL) 
is_large_pte(pte) 
@@ -246,6 +252,11 @@ walk:
gfn_t gfn;
u32 ac;
 
+   if (unlikely(fetch_fault  !user_fault))
+   if ((vcpu-arch.cr4  X86_CR4_SMEP)
+(pte_smep  PT_USER_MASK))
+   eperm = true;
+
gfn = gpte_to_gfn_lvl(pte, lvl);
gfn += (addr  PT_LVL_OFFSET_MASK(lvl))  PAGE_SHIFT;
 
@@ -305,7 +316,7 @@ error:
 
walker-fault.error_code |= write_fault | user_fault;
 
-   if (fetch_fault  mmu-nx)
+   if (fetch_fault  (mmu-nx || (vcpu-arch.cr4  X86_CR4_SMEP)))
walker-fault.error_code |= PFERR_FETCH_MASK;
if (rsvd_fault)
walker-fault.error_code |= PFERR_RSVD_MASK;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4c3fa0f..7ad24fd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4507,6 +4507,15 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
}
}
}
+
+   best = kvm_find_cpuid_entry(vcpu, 7, 0);
+   if (best  (best-ebx  bit(X86_FEATURE_SMEP))) {
+   if (boot_cpu_has(X86_FEATURE_SMEP))
+   vcpu-arch.cr4_reserved_bits =
+   ~((unsigned long)X86_CR4_SMEP);
+   else
+   best-ebx = ~(bit(X86_FEATURE_SMEP));
+   }
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 77c9d86..6ead39e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -598,9 +598,10 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
unsigned long old_cr4 = kvm_read_cr4(vcpu);
-   unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
+   unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE
+| X86_CR4_PAE | X86_CR4_SMEP;
 
-   if (cr4  CR4_RESERVED_BITS)
+   if (cr4  vcpu-arch.cr4_reserved_bits)
return 1;
 
if (!guest_cpuid_has_xsave(vcpu)  (cr4  X86_CR4_OSXSAVE))
@@ -6222,6 +6223,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
kvm_async_pf_hash_reset(vcpu);
 
+   vcpu-arch.cr4_reserved_bits = CR4_RESERVED_BITS;
+
return 0;
 fail_free_mce_banks:
kfree(vcpu-arch.mce_banks);
-- 
1.7.4.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

[PATCH v3] Enable CPU SMEP feature support for QEMU-KVM

2011-05-26 Thread Yang, Wei Y

This patchset enables a new CPU feature SMEP (Supervisor Mode Execution
Protection) in QEMU-KVM. SMEP prevents kernel from executing code in 
application.
Updated Intel SDM describes this CPU feature. The document will be published 
soon.

SMEP is identified by CPUID leaf 7 EBX[7], which is 0 before. Get the
right value by query KVM kernel module, so that guest can get SMEP
through CPUID.

Changes since v2: no changes.

 Signed-off-by: Yang, Wei wei.y.y...@intel.com
 Singed-off-by: Shan, Haitao haitao.s...@intel.com

---
 target-i386/cpuid.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 091d812..cd20dbf 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -1115,6 +1115,14 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *ecx = 0;
 *edx = 0;
 break;
+case 7:
+if (kvm_enabled()) {
+*eax = kvm_arch_get_supported_cpuid(env, 0x7, count, R_EAX);
+*ebx = kvm_arch_get_supported_cpuid(env, 0x7, count, R_EBX);
+*ecx = kvm_arch_get_supported_cpuid(env, 0x7, count, R_ECX);
+*edx = kvm_arch_get_supported_cpuid(env, 0x7, count, R_EDX);
+}
+break;
 case 9:
 /* Direct Cache Access Information Leaf */
 *eax = 0; /* Bits 0-31 in DCA_CAP MSR */
-- 
1.7.4.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Nikola Ciprich
 According to this:
 http://social.msdn.microsoft.com/Forums/en-US/embeddedwindowscomponents/thread/09aae527-ff6d-4003-9e59-962d73d409ed
 such bsod happens because Windows can't access boot device. Your boot
 device is IDE. Nothing changed in this area from 13 to 14. Are you sure
 your image was copied correctly and is not corrupted?
definitly. I just tried downgrading qemu-kvm to 0.13.0 and it works again...
n.


 
 --
   Gleb.
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-


pgppxrkeAkXEy.pgp
Description: PGP signature


[PATCH 1/6] kvm tools: Prevent double assignment of guest memory info

2011-05-26 Thread Sasha Levin
Use values calculated and assigned to local variables instead
of ignoring them.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/kvm.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index 7284211..1d756e0 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -192,7 +192,7 @@ void kvm__init_ram(struct kvm *kvm)
phys_size  = kvm-ram_size;
host_mem   = kvm-ram_start;
 
-   kvm_register_mem_slot(kvm, 0, 0, kvm-ram_size, kvm-ram_start);
+   kvm_register_mem_slot(kvm, 0, phys_start, phys_size, host_mem);
} else {
/* First RAM range from zero to the PCI gap: */
 
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/6] kvm tools: Exit VCPU thread only when SIGKVMEXIT is received

2011-05-26 Thread Sasha Levin
Currently the VCPU loop would exit when the thread received any signal.

Change behaviour to exit only when SIGKVMEXIT is received. This change
prevents from the guest to terminate when unrelated signals are processed
by the thread (for example, when attaching a debugger).

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/kvm-cpu.h |2 ++
 tools/kvm/kvm-cpu.c |   15 ++-
 tools/kvm/kvm-run.c |2 +-
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h
index f241e86..b2b6fce 100644
--- a/tools/kvm/include/kvm/kvm-cpu.h
+++ b/tools/kvm/include/kvm/kvm-cpu.h
@@ -21,6 +21,8 @@ struct kvm_cpu {
struct kvm_fpu  fpu;
 
struct kvm_msrs *msrs;  /* dynamically allocated */
+
+   u8  is_running;
 };
 
 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id);
diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c
index 331e025..de0591f 100644
--- a/tools/kvm/kvm-cpu.c
+++ b/tools/kvm/kvm-cpu.c
@@ -14,6 +14,8 @@
 #include errno.h
 #include stdio.h
 
+extern __thread struct kvm_cpu *current_kvm_cpu;
+
 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu)
 {
return vcpu-sregs.cr0  0x01;
@@ -87,6 +89,8 @@ struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long 
cpu_id)
if (vcpu-kvm_run == MAP_FAILED)
die(unable to mmap vcpu fd);
 
+   vcpu-is_running = true;
+
return vcpu;
 }
 
@@ -381,7 +385,10 @@ void kvm_cpu__run(struct kvm_cpu *vcpu)
 
 static void kvm_cpu_exit_handler(int signum)
 {
-   /* Don't do anything here */
+   if (current_kvm_cpu-is_running) {
+   current_kvm_cpu-is_running = false;
+   pthread_kill(pthread_self(), SIGKVMEXIT);
+   }
 }
 
 int kvm_cpu__start(struct kvm_cpu *cpu)
@@ -437,10 +444,8 @@ int kvm_cpu__start(struct kvm_cpu *cpu)
break;
}
case KVM_EXIT_INTR:
-   /*
-* Currently we only handle exit signal, which means
-* we just exit if KVM_RUN exited due to a signal.
-*/
+   if (cpu-is_running)
+   break;
goto exit_kvm;
case KVM_EXIT_SHUTDOWN:
goto exit_kvm;
diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c
index adbb25b..d757761 100644
--- a/tools/kvm/kvm-run.c
+++ b/tools/kvm/kvm-run.c
@@ -48,7 +48,7 @@
 
 static struct kvm *kvm;
 static struct kvm_cpu *kvm_cpus[KVM_NR_CPUS];
-static __thread struct kvm_cpu *current_kvm_cpu;
+__thread struct kvm_cpu *current_kvm_cpu;
 
 static u64 ram_size;
 static u8  image_count;
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/6] kvm tools: Protect IRQ allocations by a mutex

2011-05-26 Thread Sasha Levin
Makes IRQ allocation for new devices thread-safe.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/irq.c |   20 +---
 1 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/tools/kvm/irq.c b/tools/kvm/irq.c
index 15f4702..f92123d 100644
--- a/tools/kvm/irq.c
+++ b/tools/kvm/irq.c
@@ -1,4 +1,5 @@
 #include kvm/irq.h
+#include kvm/mutex.h
 
 #include linux/types.h
 #include linux/rbtree.h
@@ -10,6 +11,7 @@
 static u8  next_line   = 3;
 static u8  next_dev= 1;
 static struct rb_root  pci_tree= RB_ROOT;
+static DEFINE_MUTEX(irq_lock);
 
 static struct pci_dev *search(struct rb_root *root, u32 id)
 {
@@ -58,7 +60,9 @@ static int insert(struct rb_root *root, struct pci_dev *data)
 
 int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
 {
-   struct pci_dev *node;
+   struct pci_dev *node = NULL;
+
+   mutex_lock(irq_lock);
 
node = search(pci_tree, dev);
 
@@ -66,7 +70,7 @@ int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 *line)
/* We haven't found a node - First device of it's kind */
node = malloc(sizeof(*node));
if (node == NULL)
-   return -1;
+   goto exit_fail;
 
*node = (struct pci_dev) {
.id = dev,
@@ -81,17 +85,15 @@ int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 
*line)
 
INIT_LIST_HEAD(node-lines);
 
-   if (insert(pci_tree, node) != 1) {
-   free(node);
-   return -1;
-   }
+   if (insert(pci_tree, node) != 1)
+   goto exit_fail;
}
 
if (node) {
/* This device already has a pin assigned, give out a new line 
and device id */
struct irq_line *new = malloc(sizeof(*new));
if (new == NULL)
-   return -1;
+   goto exit_fail;
 
new-line   = next_line++;
*line   = new-line;
@@ -100,9 +102,13 @@ int irq__register_device(u32 dev, u8 *num, u8 *pin, u8 
*line)
 
list_add(new-node, node-lines);
 
+   mutex_unlock(irq_lock);
return 0;
}
 
+exit_fail:
+   free(node);
+   mutex_unlock(irq_lock);
return -1;
 }
 
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/6] kvm tools: Protect MMIO tree by rwsem

2011-05-26 Thread Sasha Levin
Make MMIO code thread-safe.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/mmio.c |   24 +---
 1 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/tools/kvm/mmio.c b/tools/kvm/mmio.c
index ef986bf..59512c3 100644
--- a/tools/kvm/mmio.c
+++ b/tools/kvm/mmio.c
@@ -1,5 +1,6 @@
 #include kvm/kvm.h
 #include kvm/rbtree-interval.h
+#include kvm/rwsem.h
 
 #include stdio.h
 #include stdlib.h
@@ -15,6 +16,7 @@ struct mmio_mapping {
 };
 
 static struct rb_root mmio_tree = RB_ROOT;
+static DECLARE_RWSEM(mmio_tree_sem);
 
 static struct mmio_mapping *mmio_search(struct rb_root *root, u64 addr, u64 
len)
 {
@@ -55,35 +57,51 @@ static const char *to_direction(u8 is_write)
 bool kvm__register_mmio(u64 phys_addr, u64 phys_addr_len, void 
(*kvm_mmio_callback_fn)(u64 addr, u8 *data, u32 len, u8 is_write))
 {
struct mmio_mapping *mmio;
+   int ret;
 
mmio = malloc(sizeof(*mmio));
if (mmio == NULL)
return false;
 
+   down_write(mmio_tree_sem);
+
*mmio = (struct mmio_mapping) {
.node = RB_INT_INIT(phys_addr, phys_addr + phys_addr_len),
.kvm_mmio_callback_fn = kvm_mmio_callback_fn,
};
 
-   return mmio_insert(mmio_tree, mmio);
+   ret = mmio_insert(mmio_tree, mmio);
+   
+   up_write(mmio_tree_sem);
+
+   return ret;
 }
 
 bool kvm__deregister_mmio(u64 phys_addr)
 {
struct mmio_mapping *mmio;
 
+   down_write(mmio_tree_sem);
mmio = mmio_search_single(mmio_tree, phys_addr);
-   if (mmio == NULL)
+   if (mmio == NULL) {
+   up_write(mmio_tree_sem);
return false;
+   }
 
rb_int_erase(mmio_tree, mmio-node);
free(mmio);
+   up_write(mmio_tree_sem);
+   
return true;
 }
 
 bool kvm__emulate_mmio(struct kvm *kvm, u64 phys_addr, u8 *data, u32 len, u8 
is_write)
 {
-   struct mmio_mapping *mmio = mmio_search(mmio_tree, phys_addr, len);
+   struct mmio_mapping *mmio;
+
+   down_read(mmio_tree_sem);
+   mmio = mmio_search(mmio_tree, phys_addr, len);
+   up_read(mmio_tree_sem);
 
if (mmio)
mmio-kvm_mmio_callback_fn(phys_addr, data, len, is_write);
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/6] kvm tools: Add rwlock wrapper

2011-05-26 Thread Sasha Levin
Adds a rwlock wrapper which like the mutex wrapper makes rwlock calls
similar to their kernel counterparts.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/include/kvm/rwsem.h |   39 +++
 1 files changed, 39 insertions(+), 0 deletions(-)
 create mode 100644 tools/kvm/include/kvm/rwsem.h

diff --git a/tools/kvm/include/kvm/rwsem.h b/tools/kvm/include/kvm/rwsem.h
new file mode 100644
index 000..75a22f8
--- /dev/null
+++ b/tools/kvm/include/kvm/rwsem.h
@@ -0,0 +1,39 @@
+#ifndef KVM__RWSEM_H
+#define KVM__RWSEM_H
+
+#include pthread.h
+
+#include kvm/util.h
+
+/*
+ * Kernel-alike rwsem API - to make it easier for kernel developers
+ * to write user-space code! :-)
+ */
+
+#define DECLARE_RWSEM(sem) pthread_rwlock_t sem = PTHREAD_RWLOCK_INITIALIZER
+
+static inline void down_read(pthread_rwlock_t *rwsem)
+{
+   if (pthread_rwlock_rdlock(rwsem) != 0)
+   die(unexpected pthread_rwlock_rdlock() failure!);
+}
+
+static inline void down_write(pthread_rwlock_t *rwsem)
+{
+   if (pthread_rwlock_wrlock(rwsem) != 0)
+   die(unexpected pthread_rwlock_wrlock() failure!);
+}
+
+static inline void up_read(pthread_rwlock_t *rwsem)
+{
+   if (pthread_rwlock_unlock(rwsem) != 0)
+   die(unexpected pthread_rwlock_unlock() failure!);
+}
+
+static inline void up_write(pthread_rwlock_t *rwsem)
+{
+   if (pthread_rwlock_unlock(rwsem) != 0)
+   die(unexpected pthread_rwlock_unlock() failure!);
+}
+
+#endif /* KVM__RWSEM_H */
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/6] kvm tools: Protect IOPORT tree by rwsem

2011-05-26 Thread Sasha Levin
Makes ioport thread-safe.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 tools/kvm/ioport.c |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c
index 1f13960..db9ff0f 100644
--- a/tools/kvm/ioport.c
+++ b/tools/kvm/ioport.c
@@ -3,6 +3,7 @@
 #include kvm/kvm.h
 #include kvm/util.h
 #include kvm/rbtree-interval.h
+#include kvm/rwsem.h
 
 #include linux/kvm.h /* for KVM_EXIT_* */
 #include linux/types.h
@@ -22,6 +23,7 @@ struct ioport_entry {
 
 static struct rb_root ioport_tree = RB_ROOT;
 bool ioport_debug;
+static DECLARE_RWSEM(ioport_tree_sem);
 
 static struct ioport_entry *ioport_search(struct rb_root *root, u64 addr)
 {
@@ -71,6 +73,7 @@ void ioport__register(u16 port, struct ioport_operations 
*ops, int count)
 {
struct ioport_entry *entry;
 
+   down_write(ioport_tree_sem);
entry = ioport_search(ioport_tree, port);
if (entry) {
pr_warning(ioport re-registered: %x, port);
@@ -87,6 +90,8 @@ void ioport__register(u16 port, struct ioport_operations 
*ops, int count)
};
 
ioport_insert(ioport_tree, entry);
+
+   up_write(ioport_tree_sem);
 }
 
 static const char *to_direction(int direction)
@@ -108,7 +113,9 @@ bool kvm__emulate_io(struct kvm *kvm, u16 port, void *data, 
int direction, int s
bool ret;
struct ioport_entry *entry;
 
+   down_read(ioport_tree_sem);
entry = ioport_search(ioport_tree, port);
+   up_read(ioport_tree_sem);
if (!entry)
goto error;
 
-- 
1.7.5.rc3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] kvm tools: Add virtio-9p

2011-05-26 Thread Venkateswararao Jujjuri

On 05/18/2011 02:05 AM, Sasha Levin wrote:

On Tue, 2011-05-17 at 20:18 -0500, Eric Van Hensbergen wrote:

On Tue, May 17, 2011 at 3:27 PM, Sasha Levinlevinsasha...@gmail.com  wrote:

On Tue, 2011-05-17 at 22:08 +0300, Sasha Levin wrote:

'kvm_9p' isn't created as a device under /dev, it's just a name used
internally by 9pnet_virtio (and located under sysfs).

I couldn't figure out which params the kernel would expect to boot using
9p over virtio (theres no device name to begin with).

I've also couldn't find anything that suggested it's possible to boot
using virtio-9p as rootfs.

Ignore that.

Naming the virtio transport /dev/root and passing proper params to the
kernel makes it work:

[1.844983] VFS: Mounted root (9p filesystem) on device 0:11.

I'll make some changes to the virtio-9p patch to make it easier for the
user to do that.


Any progress on this?  May I get more detailed instructions on how
you did this trick? Basically booting on 9P/VirtIO.

Thanks,
JV


This is really sweet.  Thanks for beating me to the punch of porting
the 9p support to kvm tools.

Clear RFC and good source code to refer to within 9p modules made this
easy (and fun) :)


- Multiple virtio-9p devices.

This should be pretty straightforward.

Yes, Most of the work here is within the kvm tool.


- Ugly hack in virtio_p9_stat() (See desc in code).
   /*
+* HACK: For some reason the p9 virtio transport reads a u16 and 
discards
+* it before reading the p9_rstat struct. I couldn't find a logical 
reason for
+* that, so we just add an extra u16 before the struct.
+*/

This is part of the protocol spec (from
http://ericvh.github.com/9p-rfc/rfc9p2000.html#anchor32):
To make the contents of a directory, such as returned by read(5),
easy to parse, each directory entry begins with a size field. For
consistency, the entries in Twstat and Rstat messages also contain
their size, which means the size appears twice. For example, the Rstat
message is formatted as ``(4+1+2+2+n)[4] Rstat tag[2] n[2] (n-2)[2]
type[2] dev[4]...,'' where n is the value returned by convD2M.
It's appropriate to duplicate the size.  I think the Linux client
ignores it, but others implementations may complain.

Thanks for the explanation!
Yes, Linux implementation just throws it away - which was what confused
me initially.
Why not add a u16 to the beginning of 'struct p9_rstat'?


- Update atime/mtime in p9_wstat, not really needed.

The underlying storage may handle this for you, I think 9p avoids
updating atime by default, at least in caching scenarios -- too much
unnecessary protocol traffic.

My assumption was that the storage I read/write to will take care of it
for me, and unless it bothers anyone in the future I'll assume it's
doing a good job at it.


- Pass usernames in p9_stat, not really needed and not really sure how p9 
expects to handle them.

The username, group name issue is one of the principle reasons behind
the extended protocol operations (.u and .L) -- of course, if there
was a Plan 9 or Inferno guest they would be quite happy with the
usernames, but Linux (and other UNIX variants) will want the ids.  To
really keep things simple we could add a client option that would let
you pass the various ids as strings.  Although no doubt folks will
want the other extensions (symlinks, links, device nodes, etc.) before
long.  When we built the qemu server for .L, the team tried to keep
everything in a library, but there is some entanglement with the qemu
APIs -- it'd be nice if we could reuse that code here, maybe we need
an abstract glue layer so that the core code can be used by both the
kvm tool and qemu.  I'm copy the lead of that team on this message
just so he's aware how far you've come.

I'd prefer using a tested lib which also implements .L over what we have
now, assuming it's not tangled into qemu too hard.



  -eric




--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] kvm tools: Add virtio-9p

2011-05-26 Thread Sasha Levin
On Thu, 2011-05-26 at 07:28 -0700, Venkateswararao Jujjuri wrote:
 Any progress on this?  May I get more detailed instructions on how
 you did this trick? Basically booting on 9P/VirtIO.
 
 Thanks,
 JV
 

Ofcourse. This change didn't go into tools/kvm/ since we only support
the legacy 9p2000 protocol at the moment, which means that even though
we can boot - it's quite unusable to work with.

The trick is pretty simple: You need to name your virtio transport
/dev/root (I think it's currently named local in qemu). Once it's
named this way, boot with the following kernel cmdline added:
root=/dev/root rootflags=rw,trans=virtio,version=9p2000 rootfstype=9p
rw (You should be able to change version to one of the 9p2000
extensions).

I've noticed that the transport *has* to be named /dev/root, naming it
something else (and adjusting the root= parameter) doesn't seem to
work.

Also, if it's named /dev/root I couldn't mount it as a simple
filesystem from within a guest - not as root.

-- 

Sasha.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Gleb Natapov
On Thu, May 26, 2011 at 03:32:43PM +0200, Nikola Ciprich wrote:
  According to this:
  http://social.msdn.microsoft.com/Forums/en-US/embeddedwindowscomponents/thread/09aae527-ff6d-4003-9e59-962d73d409ed
  such bsod happens because Windows can't access boot device. Your boot
  device is IDE. Nothing changed in this area from 13 to 14. Are you sure
  your image was copied correctly and is not corrupted?
 definitly. I just tried downgrading qemu-kvm to 0.13.0 and it works again...
In this case I am very puzzled :)

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Restoring saved guest causes guest to reboot

2011-05-26 Thread Markus Schade

On 05/26/2011 01:28 PM, Markus Schade wrote:

On 05/26/2011 08:44 AM, Avi Kivity wrote:

On 05/25/2011 09:49 AM, Markus Schade wrote:


Git bisect tells me that this is the first bad commit:
-%-
aff48baa34c033318ad322ecbf2e4bcd891b29ca is the first bad commit


Does your machine have ept?

(cat /sys/module/kvm_intel/parameters/ept)



Sure. (Assuming that the Y means yes). I am no C developer, but I was
wondering, if the issue is related to the difference in
ept_update_paging_mode_cr0 between original patch in the kvm git and the
linux-2.6. git tree.


I have re-added the missing 4 lines to ept_update_paging_mode_cr0 in 
vmx.c, which resolves this issue for in Kernel 2.6,37 and 2.6.39.

I haven't tested all guests, but neither Squeeze nor 2008 R2 reboot anymore.

Markus

static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -1921,6 +1940,8 @@
unsigned long cr0,
struct kvm_vcpu *vcpu)
 {
+   ulong cr3;
+
vmx_decache_cr3(vcpu);
if (!(cr0  X86_CR0_PG)) {
/* From paging/starting to nonpaging */
@@ -1936,8 +1957,11 @@
 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) 
 ~(CPU_BASED_CR3_LOAD_EXITING |
   CPU_BASED_CR3_STORE_EXITING));
+   /* Must fetch cr3 before updating cr0 */
+   cr3 = kvm_read_cr3(vcpu);
vcpu-arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
+   vmx_set_cr3(vcpu, cr3);



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] kvm tools: Add virtio-9p

2011-05-26 Thread Venkateswararao Jujjuri

On 05/26/2011 07:36 AM, Sasha Levin wrote:

On Thu, 2011-05-26 at 07:28 -0700, Venkateswararao Jujjuri wrote:

Any progress on this?  May I get more detailed instructions on how
you did this trick? Basically booting on 9P/VirtIO.

Thanks,
JV


Ofcourse. This change didn't go into tools/kvm/ since we only support
the legacy 9p2000 protocol at the moment, which means that even though
we can boot - it's quite unusable to work with.

The trick is pretty simple: You need to name your virtio transport
/dev/root (I think it's currently named local in qemu). Once it's
named this way, boot with the following kernel cmdline added:
root=/dev/root rootflags=rw,trans=virtio,version=9p2000 rootfstype=9p
rw (You should be able to change version to one of the 9p2000
extensions).

Ah I guess you are making use of rootfstype.
So in this setup basically the virtio transport you create
is /dev/root instead of kvm_9p correct?
Also your dir will be / ?

i.e Start KVM with '--virtio-9p /'. ?

I've noticed that the transport *has* to be named /dev/root, naming it
something else (and adjusting the root= parameter) doesn't seem to
work.

Also, if it's named /dev/root I couldn't mount it as a simple
filesystem from within a guest - not as root.



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Restoring saved guest causes guest to reboot

2011-05-26 Thread Joerg Roedel
On Thu, May 26, 2011 at 05:20:32PM +0200, Markus Schade wrote:
 On 05/26/2011 01:28 PM, Markus Schade wrote:
 On 05/26/2011 08:44 AM, Avi Kivity wrote:
 On 05/25/2011 09:49 AM, Markus Schade wrote:

 Git bisect tells me that this is the first bad commit:
 -%-
 aff48baa34c033318ad322ecbf2e4bcd891b29ca is the first bad commit

 Does your machine have ept?

 (cat /sys/module/kvm_intel/parameters/ept)


 Sure. (Assuming that the Y means yes). I am no C developer, but I was
 wondering, if the issue is related to the difference in
 ept_update_paging_mode_cr0 between original patch in the kvm git and the
 linux-2.6. git tree.

 I have re-added the missing 4 lines to ept_update_paging_mode_cr0 in  
 vmx.c, which resolves this issue for in Kernel 2.6,37 and 2.6.39.
 I haven't tested all guests, but neither Squeeze nor 2008 R2 reboot anymore.

 Markus

 static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 @@ -1921,6 +1940,8 @@
 unsigned long cr0,
 struct kvm_vcpu *vcpu)
  {
 +   ulong cr3;
 +
 vmx_decache_cr3(vcpu);
 if (!(cr0  X86_CR0_PG)) {
 /* From paging/starting to nonpaging */
 @@ -1936,8 +1957,11 @@
  vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) 
  ~(CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING));
 +   /* Must fetch cr3 before updating cr0 */
 +   cr3 = kvm_read_cr3(vcpu);
 vcpu-arch.cr0 = cr0;
 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
 +   vmx_set_cr3(vcpu, cr3);

Hmm, not 100% sure, but the issue might be that the arch.cr3 value does
not make it into the vmcs after cr3 was changed from user-space? This
would also be fixed with the change above.

Joerg

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V5 2/6 net-next] netdevice.h: Add zero-copy flag in netdevice

2011-05-26 Thread Shirley Ma
On Thu, 2011-05-26 at 11:49 +0300, Michael S. Tsirkin wrote:
 On Wed, May 25, 2011 at 03:49:40PM -0700, Shirley Ma wrote:
  On Fri, 2011-05-20 at 02:41 +0300, Michael S. Tsirkin wrote:
   So the requirements are
   - data must be released in a timely fashion (e.g. unlike
 virtio-net
 tun or bridge)
  The current patch doesn't enable tun zero-copy. tun will copy data
 It's
  not an issue now.
  We can disallow macvtap attach to bridge when
  zero-copy is enabled.
 
 Attach macvtap to a tun device though. Or e.g. veth device ...
 So there should be so generic way to disable zerocopy.
 It can either be a whitelist or a blacklist.
  
   - SG support
   - HIGHDMA support (on arches where this makes sense)
  
  This can be checked by device flags.
 
 OK, but pls note that SG can get turned off dynamically.
 
   - no filtering based on data (data is mapped in guest)
  
   - on fast path no calls to skb_copy, skb_clone, pskb_copy,
 pskb_expand_head as these are slow
  
  Any calls to skb_copy, skb_clone, pskb_copy, pskb_expand_head will
 do a
  copy. The performance should be the same as none zero-copy case
 before.
 
 I'm guessing a copy is cheaper than get_user_pages+copy+put_page.
 But maybe not by much. Care checking that?

That's I have done already. Patch is going out for review.

  I have done/tested the patch V6, will send it out for review
 tomorrow.
  
  I am looking at where there are some cases, skb remains the same for
  filtering.
 
 To reliably filter on data I think we'll need to copy it first,
 otherwise
 guest can change it. Most filters only look at the header though.
 
   First 2 requirements are a must, all other requirements
   are just dependencies to make sure zero copy will be faster
   than non zero copy.
   Using a new feature bit is probably the simplest approach to
   this. macvtap on top of most physical NICs most likely works
   correctly so it seems a bit more work than it needs to be,
   but it's also the safest one I think ... 
  
  For macvtap/vhost zero-copy we can use SG  HIGHDMA to enable it,
 it
  looks safe to me once patching skb_copy, skb_clone, pskb_copy,
  pskb_expand_head.
  
  To extend zero-copy in other usages, we can have a new feature bit
  later.
  
  Is that reasonable?
  
  Thanks
  Shirley
 
 Is the problem is extra work needed to extend feature bits?

There is no problem to use it, Mahesh is working on this patch. I just
want to remove macvtap/vhost zero-copy patch dependency.

Thanks
Shirley

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PERF RESULTS] virtio and vhost-net performance enhancements

2011-05-26 Thread Krishna Kumar2
Michael S. Tsirkin m...@redhat.com wrote on 05/20/2011 04:40:07 AM:

 OK, here is the large patchset that implements the virtio spec update
 that I sent earlier (the spec itself needs a minor update, will send
 that out too next week, but I think we are on the same page here
 already). It supercedes the PUBLISH_USED_IDX patches I sent
 out earlier.

I was able to get this tested by applying the v2 patches
to git-next tree (somehow MST's git tree hung on my guest
which never got resolved). Testing was from Guest - Remote
node, using an ixgbe 10g card. The test results are
*excellent* (table: #netperf sesssions, BW% improvement,
SD% improvement, CPU% improvement):

___
   512 byte I/O
# BW% SD%  CPU%

1 151.6   -65.1-10.7
2 180.6   -66.6-6.4
4 15.5-35.8-26.1
8 1.8 -28.4-26.7
163.1 -29.0-26.5
321.1 -27.4-27.5
643.8 -30.9-26.7
965.4 -21.7-24.2
128   5.7 -24.4-25.5

BW: 16.6%   SD: -24.6%CPU: -25.5%



1K I/O
# BW% SD%  CPU%

1 233.9   -76.5-18.0
2 112.2   -64.0-23.2
4 9.2 -31.6-26.1
8-1.7 -26.8-30.3
163.5 -31.5-30.6
324.8 -25.2-30.5
645.7 -31.0-28.9
965.3 -32.2-31.7
128   4.6 -38.2-33.6

BW: 16.4%   SD: -35.%CPU: -31.5%



 16K I/O
# BW% SD%  CPU%

1 18.8-27.2-18.3
2 14.8-36.7-27.7
4 12.7-45.2-38.1
8 4.4 -56.4-54.4
164.8 -38.3-36.1
32078.0 79.2
643.8 -38.1-37.5
967.3 -35.2-31.1
128   3.4 -31.1-32.1

BW: 7.6%   SD: -30.1%   CPU: -23.7%


I plan to run some more tests tomorrow. Please let
me know if any other scenario will help.

Thanks,

- KK

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] kvm tools: Add virtio-9p

2011-05-26 Thread Sasha Levin
On Thu, 2011-05-26 at 08:22 -0700, Venkateswararao Jujjuri wrote:
 On 05/26/2011 07:36 AM, Sasha Levin wrote:
  On Thu, 2011-05-26 at 07:28 -0700, Venkateswararao Jujjuri wrote:
  Any progress on this?  May I get more detailed instructions on how
  you did this trick? Basically booting on 9P/VirtIO.
 
  Thanks,
  JV
 
  Ofcourse. This change didn't go into tools/kvm/ since we only support
  the legacy 9p2000 protocol at the moment, which means that even though
  we can boot - it's quite unusable to work with.
 
  The trick is pretty simple: You need to name your virtio transport
  /dev/root (I think it's currently named local in qemu). Once it's
  named this way, boot with the following kernel cmdline added:
  root=/dev/root rootflags=rw,trans=virtio,version=9p2000 rootfstype=9p
  rw (You should be able to change version to one of the 9p2000
  extensions).
 Ah I guess you are making use of rootfstype.
 So in this setup basically the virtio transport you create
 is /dev/root instead of kvm_9p correct?

Yes, exactly.

 Also your dir will be / ?
 
 i.e Start KVM with '--virtio-9p /'. ?

It can be '/', but I'd suggest against trying to boot your hosts root
device within a guest (unless in RO mode maybe).

I've mounted a disk image to some directory on the host and booted that
directory for testing.

  I've noticed that the transport *has* to be named /dev/root, naming it
  something else (and adjusting the root= parameter) doesn't seem to
  work.
 
  Also, if it's named /dev/root I couldn't mount it as a simple
  filesystem from within a guest - not as root.
 
 

-- 

Sasha.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/6] kvm tools: Protect IOPORT tree by rwsem

2011-05-26 Thread Pekka Enberg

On Thu, 26 May 2011, Sasha Levin wrote:

Makes ioport thread-safe.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
tools/kvm/ioport.c |7 +++
1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c
index 1f13960..db9ff0f 100644
--- a/tools/kvm/ioport.c
+++ b/tools/kvm/ioport.c
@@ -3,6 +3,7 @@
#include kvm/kvm.h
#include kvm/util.h
#include kvm/rbtree-interval.h
+#include kvm/rwsem.h

#include linux/kvm.h/* for KVM_EXIT_* */
#include linux/types.h
@@ -22,6 +23,7 @@ struct ioport_entry {

static struct rb_root ioport_tree = RB_ROOT;
bool ioport_debug;
+static DECLARE_RWSEM(ioport_tree_sem);


Why do we need a new lock here? Can't we reuse the new ioport_mutex?

Pekka
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] kvm tools: Add rwlock wrapper

2011-05-26 Thread Pekka Enberg

On Thu, 26 May 2011, Sasha Levin wrote:

Adds a rwlock wrapper which like the mutex wrapper makes rwlock calls
similar to their kernel counterparts.

Signed-off-by: Sasha Levin levinsasha...@gmail.com


There's no explanation why a mutex isn't sufficient. The pthread locking 
primitives aren't all that great in practice so unless you have some 
correctness issue that requires a rwlock or some numbers, I'd prefer you 
go for a mutex.


Pekka
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PERF RESULTS] virtio and vhost-net performance enhancements

2011-05-26 Thread Krishna Kumar2
Shirley Ma x...@us.ibm.com wrote on 05/26/2011 09:12:22 PM:

 Could you please try TCP_RRs as well?

Right. Here's the result for TCP_RR:

__
#   RR% SD% CPU%
__
1   4.5   -31.4-27.9
2   5.1   -9.7  -5.4
4   60.4 -13.4 38.8
8   67.8 -13.5 45.0
16 55.8 -8.0   43.2
32 66.9 -14.1 43.3
64 47.2 -23.7 12.2
96 29.7 -11.8 14.3
1288.0   2.2   10.7
___
BW: 37.3%   SD: -6.7%   CPU: 15.7%
___

Thanks,

- KK

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] kvm tools: Add rwlock wrapper

2011-05-26 Thread Sasha Levin
On Thu, 2011-05-26 at 19:02 +0300, Pekka Enberg wrote:
 On Thu, 26 May 2011, Sasha Levin wrote:
  Adds a rwlock wrapper which like the mutex wrapper makes rwlock calls
  similar to their kernel counterparts.
 
  Signed-off-by: Sasha Levin levinsasha...@gmail.com
 
 There's no explanation why a mutex isn't sufficient. The pthread locking 
 primitives aren't all that great in practice so unless you have some 
 correctness issue that requires a rwlock or some numbers, I'd prefer you 
 go for a mutex.

I've added some rwlocks because of what Ingo said yesterday about
adding/removing devices after the first initialization phase.

Take MMIO lock for example: Since we can now run SMP guests, we may have
multiple MMIO exits (one from each VCPU thread). Each of those exits
leads to searching the MMIO rbtree.

We can use a mutex to lock it, but it just means that those threads will
be blocked there instead of concurrently searching the MMIO tree which
makes the search linear instead of parallel.

It's hard to bring 'real' numbers at this stage because the only 'real'
device we have which uses MMIO is the VESA driver, and we can't really
simulate many VCPUs writing to it :)

-- 

Sasha.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/6] kvm tools: Protect IOPORT tree by rwsem

2011-05-26 Thread Sasha Levin
On Thu, 2011-05-26 at 19:01 +0300, Pekka Enberg wrote:
 On Thu, 26 May 2011, Sasha Levin wrote:
  Makes ioport thread-safe.
 
  Signed-off-by: Sasha Levin levinsasha...@gmail.com
  ---
  tools/kvm/ioport.c |7 +++
  1 files changed, 7 insertions(+), 0 deletions(-)
 
  diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c
  index 1f13960..db9ff0f 100644
  --- a/tools/kvm/ioport.c
  +++ b/tools/kvm/ioport.c
  @@ -3,6 +3,7 @@
  #include kvm/kvm.h
  #include kvm/util.h
  #include kvm/rbtree-interval.h
  +#include kvm/rwsem.h
 
  #include linux/kvm.h  /* for KVM_EXIT_* */
  #include linux/types.h
  @@ -22,6 +23,7 @@ struct ioport_entry {
 
  static struct rb_root ioport_tree = RB_ROOT;
  bool ioport_debug;
  +static DECLARE_RWSEM(ioport_tree_sem);
 
 Why do we need a new lock here? Can't we reuse the new ioport_mutex?

ioport_mutex is used for allocations of ioports to devices, this lock is
intended to protect the ioport tree from being read while new devices
are added.

-- 

Sasha.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PERF RESULTS] virtio and vhost-net performance enhancements

2011-05-26 Thread Krishna Kumar2
Krishna Kumar2/India/IBM wrote on 05/26/2011 09:51:32 PM:

  Could you please try TCP_RRs as well?

 Right. Here's the result for TCP_RR:

The actual transaction rate/second numbers are:

_
# RR1  RR2 (%)  SD1SD2 (%)
_
1 9476 9903 (4.5)   28.9   19.8 (-31.4)
2 1733718225 (5.1)  92.7   83.7 (-9.7)
4 1738527902 (60.4) 364.8  315.8 (-13.4)
8 2556042912 (67.8) 1428.1 1234.0 (-13.5)
163589855934 (55.8) 4391.6 4038.1 (-8.0)
324804880228 (66.9) 17391.414932.0 (-14.1)
646041288929 (47.2) 71087.754230.1 (-23.7)
967126392439 (29.7) 145434.1   128214.0 (-11.8)
128   8420891014 (8.0)  233668.2   23.6 (2.2)
_
RR: 37.3% SD: -6.7%
_

Thanks,

- KK

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] kvm tools: Add rwlock wrapper

2011-05-26 Thread Ingo Molnar

* Sasha Levin levinsasha...@gmail.com wrote:

 On Thu, 2011-05-26 at 19:02 +0300, Pekka Enberg wrote:
  On Thu, 26 May 2011, Sasha Levin wrote:
   Adds a rwlock wrapper which like the mutex wrapper makes rwlock calls
   similar to their kernel counterparts.
  
   Signed-off-by: Sasha Levin levinsasha...@gmail.com
  
  There's no explanation why a mutex isn't sufficient. The pthread 
  locking primitives aren't all that great in practice so unless 
  you have some correctness issue that requires a rwlock or some 
  numbers, I'd prefer you go for a mutex.
 
 I've added some rwlocks because of what Ingo said yesterday about 
 adding/removing devices after the first initialization phase.
 
 Take MMIO lock for example: Since we can now run SMP guests, we may 
 have multiple MMIO exits (one from each VCPU thread). Each of those 
 exits leads to searching the MMIO rbtree.
 
 We can use a mutex to lock it, but it just means that those threads 
 will be blocked there instead of concurrently searching the MMIO 
 tree which makes the search linear instead of parallel.
 
 It's hard to bring 'real' numbers at this stage because the only 
 'real' device we have which uses MMIO is the VESA driver, and we 
 can't really simulate many VCPUs writing to it :)

I'd suggest keeping it simple first - rwlocks are nasty and will 
bounce a cacheline just as much.

If lookup scalability is an issue we can extend RCU to tools/kvm/.

Thanks,

Ingo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] kvm tools: Add rwlock wrapper

2011-05-26 Thread Avi Kivity

On 05/26/2011 09:05 PM, Ingo Molnar wrote:


  I've added some rwlocks because of what Ingo said yesterday about
  adding/removing devices after the first initialization phase.

  Take MMIO lock for example: Since we can now run SMP guests, we may
  have multiple MMIO exits (one from each VCPU thread). Each of those
  exits leads to searching the MMIO rbtree.

  We can use a mutex to lock it, but it just means that those threads
  will be blocked there instead of concurrently searching the MMIO
  tree which makes the search linear instead of parallel.

  It's hard to bring 'real' numbers at this stage because the only
  'real' device we have which uses MMIO is the VESA driver, and we
  can't really simulate many VCPUs writing to it :)

I'd suggest keeping it simple first - rwlocks are nasty and will
bounce a cacheline just as much.


Well, this is the first case where tools/kvm can do better than qemu 
with its global lock, so I think it's worth it.



If lookup scalability is an issue we can extend RCU to tools/kvm/.


Definitely rcu is a perfect patch for mmio dispatch.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6] kvm tools: Add rwlock wrapper

2011-05-26 Thread Pekka Enberg
On Thu, May 26, 2011 at 9:11 PM, Avi Kivity a...@redhat.com wrote:
 On 05/26/2011 09:05 PM, Ingo Molnar wrote:

 
   I've added some rwlocks because of what Ingo said yesterday about
   adding/removing devices after the first initialization phase.
 
   Take MMIO lock for example: Since we can now run SMP guests, we may
   have multiple MMIO exits (one from each VCPU thread). Each of those
   exits leads to searching the MMIO rbtree.
 
   We can use a mutex to lock it, but it just means that those threads
   will be blocked there instead of concurrently searching the MMIO
   tree which makes the search linear instead of parallel.
 
   It's hard to bring 'real' numbers at this stage because the only
   'real' device we have which uses MMIO is the VESA driver, and we
   can't really simulate many VCPUs writing to it :)

 I'd suggest keeping it simple first - rwlocks are nasty and will
 bounce a cacheline just as much.

 Well, this is the first case where tools/kvm can do better than qemu with
 its global lock, so I think it's worth it.

 If lookup scalability is an issue we can extend RCU to tools/kvm/.

 Definitely rcu is a perfect patch for mmio dispatch.

Userspace RCU code is here, Sasha, if you feel like tackling this:

http://lttng.org/urcu

:-)

I'm CC'ing Paul and Mathieu as well for urcu.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: qemu-kvm-0.14.x regression - windows 2K8 R2 stopped booting

2011-05-26 Thread Nikola Ciprich
 In this case I am very puzzled :)
well, so am I :)
but at least, I now know there seems to be problem with disk access. I'll ask 
some of our windows guys to try some failsave mode or something and find out 
what is going on...
I'll report when I know something.
thanks for Your help!
n.

 
 --
   Gleb.
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

-- 
-
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28. rijna 168, 709 01 Ostrava

tel.:   +420 596 603 142
fax:+420 596 621 273
mobil:  +420 777 093 799

www.linuxbox.cz

mobil servis: +420 737 238 656
email servis: ser...@linuxbox.cz
-


pgp9091LC9pNf.pgp
Description: PGP signature


Re: [PATCH 4/6] kvm tools: Add rwlock wrapper

2011-05-26 Thread Sasha Levin
On Thu, 2011-05-26 at 21:21 +0300, Pekka Enberg wrote:
 On Thu, May 26, 2011 at 9:11 PM, Avi Kivity a...@redhat.com wrote:
  On 05/26/2011 09:05 PM, Ingo Molnar wrote:
 
  
I've added some rwlocks because of what Ingo said yesterday about
adding/removing devices after the first initialization phase.
  
Take MMIO lock for example: Since we can now run SMP guests, we may
have multiple MMIO exits (one from each VCPU thread). Each of those
exits leads to searching the MMIO rbtree.
  
We can use a mutex to lock it, but it just means that those threads
will be blocked there instead of concurrently searching the MMIO
tree which makes the search linear instead of parallel.
  
It's hard to bring 'real' numbers at this stage because the only
'real' device we have which uses MMIO is the VESA driver, and we
can't really simulate many VCPUs writing to it :)
 
  I'd suggest keeping it simple first - rwlocks are nasty and will
  bounce a cacheline just as much.
 
  Well, this is the first case where tools/kvm can do better than qemu with
  its global lock, so I think it's worth it.
 
  If lookup scalability is an issue we can extend RCU to tools/kvm/.
 
  Definitely rcu is a perfect patch for mmio dispatch.
 
 Userspace RCU code is here, Sasha, if you feel like tackling this:
 
 http://lttng.org/urcu
 
 :-)
 
 I'm CC'ing Paul and Mathieu as well for urcu.

Sounds good!

Should be quite an addition and could be used in more places than just
the MMIO dispatcher.

-- 

Sasha.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


ioeventfd question

2011-05-26 Thread Sasha Levin
Hi Avi,

I'm working on adding ioeventfd support into tools/kvm/.
Currently the implementation creates ioeventfd entries at the
'VIRTIO_PCI_QUEUE_NOTIFY' of each device and waits on all of them using
epoll().
The basics are working - when IO is triggered I receive a notification
using the event instead of an exit.

I couldn't find a way to retrieve the value written to the PIO port -
the guest memory at that location doesn't change, 'reading' the event
just returns 1 (as expected) and I couldn't find anything else which
might suggest what value was written to the PIO port.

This is an issue with devices which have multiple virtio-rings, such as
virtio-net.

-- 

Sasha.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V5 2/6 net-next] netdevice.h: Add zero-copy flag in netdevice

2011-05-26 Thread Shirley Ma
On Thu, 2011-05-26 at 11:49 +0300, Michael S. Tsirkin wrote:
  
   - SG support
   - HIGHDMA support (on arches where this makes sense)
  
  This can be checked by device flags.
 
 OK, but pls note that SG can get turned off dynamically.

Tested the patch w/i SG dynmically on/off and tcpdump suspended.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V6 0/4]

2011-05-26 Thread Shirley Ma
This patchset add supports for TX zero-copy between guest and host
kernel through vhost. It significantly reduces CPU utilization on the
local host on which the guest is located (It reduced about 50% CPU usage
for single stream test on the host, while 4K message size BW has
increased about 50%). The patchset is based on previous submission and
comments from the community regarding when/how to handle guest kernel
buffers to be released. This is the simplest approach I can think of
after comparing with several other solutions.

This patchset has integrated V3 review comments from community: 

1. Add more comments on how to use device ZEROCOPY flag;

2. Change device ZEROCOPY to available bit 31

3. Fix skb header linear allocation when virtio_net GSO is not enabled

It has integrated V4 review comments from MST and Sridhar:

1. In vhost, using socket poll wake up for outstanding DMAs

2. Add detailed comments for vhost_zerocopy_signal_used call

3. Add sleep in vhost shutting down instead of busy-wait for outstanding
   DMAs.

4. Copy small packets, don't do zero-copy callback in mavtap, mark it's
   DMA done in vhost

5. change zerocopy to bool in macvtap.

It integrates V5 review comments from MST and 


Michał Mirosław mir...@gmail.com

1. Prevent userspace apps from holding skb userspace buffers by copying
userspace buffers to kernel in skb_clone, skb_copy, pskb_copy,
pskb_expand_head.

2. It is also used HIGHDMA, SG feature bits to enable ZEROCOPY to remove
the dependency of a new feature bit, we can add it later when new
feature bit is available.

This patchset includes:

1/4: Add a new sock zero-copy flag, SOCK_ZEROCOPY;

2/4: Add a new struct skb_ubuf_info in skb_share_info for userspace
buffers release callback when lower device DMA has done for that skb,
which is the last reference count gone; Or whenever skb_clone, skb_copy,
pskb_copy, pskb_expand_head get call from tcpdump, filtering, these userspace
buffers will be copied into kernel ... we don't want userspace apps to hold
userspace buffers too long.

3/4: Add vhost zero-copy callback in vhost when skb last refcnt is gone;
add vhost_zerocopy_signal_used to notify guest to release TX skb
buffers.

4/4: Add macvtap zero-copy in lower device when sending packet is
greater than 256 bytes.

The patchset is built against most recent net-next linux 2.6.39-rc7. It
has passed netperf/netserver multiple streams stress test, tcpdump
suspended test, dynamically SG change test.

Single TCP_STREAM 120 secs test results over ixgbe 10Gb NIC results:

Message BW(Gb/s)qemu-kvm (NumCPU)vhost-net(NumCPU) PerfTop irq/s
4K  7408.57 92.1%   22.6%   1229
4K(Orig)4913.17 118.1%  84.1%   2086
8K  9129.90 89.3%   23.3%   1141
8K(Orig)7094.55 115.9%  84.7%   2157
16K 9178.81 89.1%   23.3%   1139
16K(Orig)8927.1 118.7%  83.4%   2262
64K 9171.43 88.4%   24.9%   1253
64K(Orig)9085.85115.9%  82.4%   2229

For message size less or equal than 2K, there is a known KVM guest TX
overrun issue. With this zero-copy patch, the issue becomes more severe,
guest io_exits has tripled than before, so the performance is not good.
Once the TX overrun problem has been addressed, I will retest the small
message size performance.

 drivers/net/macvtap.c  |  132 ---
 drivers/vhost/net.c|   44 +-
 drivers/vhost/vhost.c  |   49 +++
 drivers/vhost/vhost.h  |   13 
 include/linux/netdevice.h  |   10 +++
 include/linux/skbuff.h |   26 
 include/net/sock.h |1 +
 net/core/skbuff.c  |   81 -
 8 files changed, 345 insertions(+), 17 deletions(-)


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V6 1/4 net-next] sock.h: Add a new sock zero-copy flag

2011-05-26 Thread Shirley Ma
Signed-off-by: Shirley Ma x...@us.ibm.com
---

 include/net/sock.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 01810a3..ab09097 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -562,6 +562,7 @@ enum sock_flags {
SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
SOCK_FASYNC, /* fasync() active */
SOCK_RXQ_OVFL,
+   SOCK_ZEROCOPY, /* buffers from userspace */
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V6 2/4 net-next] skbuff: Add userspace zero-copy buffers in skb

2011-05-26 Thread Shirley Ma
This patch adds userspace buffers support in skb shared info. A new 
struct skb_ubuf_info is needed to maintain the userspace buffers
argument and index, a callback is used to notify userspace to release
the buffers once lower device has done DMA (Last reference to that skb
has gone).

If there is any userspace apps to reference these userspace buffers,
then these userspaces buffers will be copied into kernel. This way we
can prevent userspace apps to hold these userspace buffers too long.

Signed-off-by: Shirley Ma x...@us.ibm.com
---

 include/linux/skbuff.h |   26 +++
 net/core/skbuff.c  |   80 ++-
 2 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d0ae90a..025de5c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -189,6 +189,18 @@ enum {
SKBTX_DRV_NEEDS_SK_REF = 1  3,
 };
 
+/*
+ * The callback notifies userspace to release buffers when skb DMA is done in
+ * lower device, the skb last reference should be 0 when calling this.
+ * The desc is used to track userspace buffer index.
+ */
+struct skb_ubuf_info {
+   /* support buffers allocation from userspace */
+   void(*callback)(struct sk_buff *);
+   void*arg;
+   size_t  desc;
+};
+
 /* This data is invariant across clones and lives at
  * the end of the header data, ie. at skb-end.
  */
@@ -211,6 +223,10 @@ struct skb_shared_info {
/* Intermediate layers must ensure that destructor_arg
 * remains valid until skb destructor */
void *  destructor_arg;
+
+   /* DMA mapping from/to userspace buffers */
+   struct skb_ubuf_info ubuf;
+
/* must be last field, see pskb_expand_head() */
skb_frag_t  frags[MAX_SKB_FRAGS];
 };
@@ -2261,5 +2277,15 @@ static inline void skb_checksum_none_assert(struct 
sk_buff *skb)
 }
 
 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
+
+/*
+ * skb_ubuf - is the buffer from userspace
+ * @skb: buffer to check
+ */
+static inline int skb_ubuf(const struct sk_buff *skb)
+{
+   return (skb_shinfo(skb)-ubuf.callback != NULL);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SKBUFF_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7ebeed0..890447c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -210,6 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t 
gfp_mask,
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(shinfo-dataref, 1);
+   shinfo-ubuf.callback = NULL;
+   shinfo-ubuf.arg = NULL;
kmemcheck_annotate_variable(shinfo-destructor_arg);
 
if (fclone) {
@@ -328,6 +330,14 @@ static void skb_release_data(struct sk_buff *skb)
put_page(skb_shinfo(skb)-frags[i].page);
}
 
+   /*
+* if skb buf is from userspace, we need to notify the caller
+* the lower device DMA has done;
+*/
+   if (skb_ubuf(skb)) {
+   skb_shinfo(skb)-ubuf.callback(skb);
+   skb_shinfo(skb)-ubuf.callback = NULL;
+   }
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
 
@@ -480,6 +490,9 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size)
if (irqs_disabled())
return false;
 
+   if (skb_ubuf(skb))
+   return false;
+
if (skb_is_nonlinear(skb) || skb-fclone != SKB_FCLONE_UNAVAILABLE)
return false;
 
@@ -572,6 +585,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, 
struct sk_buff *skb)
atomic_set(n-users, 1);
 
atomic_inc((skb_shinfo(skb)-dataref));
+   skb_shinfo(skb)-ubuf.callback = NULL;
skb-cloned = 1;
 
return n;
@@ -595,6 +609,48 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct 
sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
+/* skb frags copy userspace buffers to kernel */
+static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
+{
+   int i;
+   int num_frags = skb_shinfo(skb)-nr_frags;
+   struct page *page, *head = NULL;
+
+   for (i = 0; i  num_frags; i++) {
+   u8 *vaddr;
+   skb_frag_t *f = skb_shinfo(skb)-frags[i];
+
+   page = alloc_page(GFP_ATOMIC);
+   if (!page) {
+   while (head) {
+   put_page(head);
+   head = (struct page *)head-private;
+   }
+   return -ENOMEM;
+   }
+   vaddr = kmap_skb_frag(skb_shinfo(skb)-frags[i]);
+   memcpy(page_address(page), vaddr + f-page_offset, f-size);
+   kunmap_skb_frag(vaddr);
+   page-private = (unsigned long)head;
+ 

[PATCH V6 3/4]macvtap: macvtap TX zero-copy support

2011-05-26 Thread Shirley Ma
Only when buffer size is greater than GOODCOPY_LEN (256), macvtap
enables zero-copy.

Signed-off-by: Shirley Ma x...@us.ibm.com
---

 drivers/net/macvtap.c |  132 
 1 files changed, 121 insertions(+), 11 deletions(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 6696e56..97ad224 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -60,6 +60,7 @@ static struct proto macvtap_proto = {
  */
 static dev_t macvtap_major;
 #define MACVTAP_NUM_DEVS 65536
+#define GOODCOPY_LEN 256
 static struct class *macvtap_class;
 static struct cdev macvtap_cdev;
 
@@ -340,6 +341,7 @@ static int macvtap_open(struct inode *inode, struct file 
*file)
 {
struct net *net = current-nsproxy-net_ns;
struct net_device *dev = dev_get_by_index(net, iminor(inode));
+   struct macvlan_dev *vlan = netdev_priv(dev);
struct macvtap_queue *q;
int err;
 
@@ -369,6 +371,16 @@ static int macvtap_open(struct inode *inode, struct file 
*file)
q-flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
q-vnet_hdr_sz = sizeof(struct virtio_net_hdr);
 
+   /*
+* so far only KVM virtio_net uses macvtap, enable zero copy between
+* guest kernel and host kernel when lower device supports highdma
+* and sg
+*/
+   if (vlan) {
+   if (vlan-lowerdev-features  (NETIF_F_HIGHDMA | NETIF_F_SG))
+   sock_set_flag(q-sk, SOCK_ZEROCOPY);
+   }
+
err = macvtap_set_queue(dev, file, q);
if (err)
sock_put(q-sk);
@@ -433,6 +445,80 @@ static inline struct sk_buff *macvtap_alloc_skb(struct 
sock *sk, size_t prepad,
return skb;
 }
 
+/* set skb frags from iovec, this can move to core network code for reuse */
+static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec 
*from,
+ int offset, size_t count)
+{
+   int len = iov_length(from, count) - offset;
+   int copy = skb_headlen(skb);
+   int size, offset1 = 0;
+   int i = 0;
+   skb_frag_t *f;
+
+   /* Skip over from offset */
+   while (count  (offset = from-iov_len)) {
+   offset -= from-iov_len;
+   ++from;
+   --count;
+   }
+
+   /* copy up to skb headlen */
+   while (count  (copy  0)) {
+   size = min_t(unsigned int, copy, from-iov_len - offset);
+   if (copy_from_user(skb-data + offset1, from-iov_base + offset,
+  size))
+   return -EFAULT;
+   if (copy  size) {
+   ++from;
+   --count;
+   }
+   copy -= size;
+   offset1 += size;
+   offset = 0;
+   }
+
+   if (len == offset1)
+   return 0;
+
+   while (count--) {
+   struct page *page[MAX_SKB_FRAGS];
+   int num_pages;
+   unsigned long base;
+
+   len = from-iov_len - offset1;
+   if (!len) {
+   offset1 = 0;
+   ++from;
+   continue;
+   }
+   base = (unsigned long)from-iov_base + offset1;
+   size = ((base  ~PAGE_MASK) + len + ~PAGE_MASK)  PAGE_SHIFT;
+   num_pages = get_user_pages_fast(base, size, 0, page[i]);
+   if ((num_pages != size) ||
+   (num_pages  MAX_SKB_FRAGS - skb_shinfo(skb)-nr_frags))
+   /* put_page is in skb free */
+   return -EFAULT;
+   skb-data_len += len;
+   skb-len += len;
+   skb-truesize += len;
+   atomic_add(len, skb-sk-sk_wmem_alloc);
+   while (len) {
+   f = skb_shinfo(skb)-frags[i];
+   f-page = page[i];
+   f-page_offset = base  ~PAGE_MASK;
+   f-size = min_t(int, len, PAGE_SIZE - f-page_offset);
+   skb_shinfo(skb)-nr_frags++;
+   /* increase sk_wmem_alloc */
+   base += f-size;
+   len -= f-size;
+   i++;
+   }
+   offset1 = 0;
+   ++from;
+   }
+   return 0;
+}
+
 /*
  * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should
  * be shared with the tun/tap driver.
@@ -515,16 +601,18 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff 
*skb,
 
 
 /* Get packet from user space buffer */
-static ssize_t macvtap_get_user(struct macvtap_queue *q,
-   const struct iovec *iv, size_t count,
-   int noblock)
+static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
+   const struct iovec *iv, unsigned long total_len,
+   size_t 

[PATCH V6 4/4 net-next] vhost: vhost TX zero-copy support

2011-05-26 Thread Shirley Ma
Hello Michael,

Let me anything I might miss from your preview's review.

Thanks
Shirley

---

This patch maintains the outstanding userspace buffers in the 
sequence it is delivered to vhost. The outstanding userspace buffers 
will be marked as done once the lower device buffers DMA has finished. 
This is monitored through last reference of kfree_skb callback. Two
buffer index are used for this purpose.

The vhost passes the userspace buffers info to lower device skb 
through message control. Since there will be some done DMAs when
entering vhost handle_tx. The worse case is all buffers in the vq are
in pending/done status, so we need to notify guest to release DMA done 
buffers first before get any new buffers from the vq.

Signed-off-by: Shirley x...@us.ibm.com
---

 drivers/vhost/net.c   |   44 +++-
 drivers/vhost/vhost.c |   49 +
 drivers/vhost/vhost.h |   13 +
 3 files changed, 105 insertions(+), 1 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 2f7c76a..b27ba64 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -32,6 +32,10 @@
  * Using this limit prevents one virtqueue from starving others. */
 #define VHOST_NET_WEIGHT 0x8
 
+/* MAX number of TX used buffers for outstanding zerocopy */
+#define VHOST_MAX_PEND 128
+#define VHOST_GOODCOPY_LEN 256
+
 enum {
VHOST_NET_VQ_RX = 0,
VHOST_NET_VQ_TX = 1,
@@ -129,6 +133,7 @@ static void handle_tx(struct vhost_net *net)
int err, wmem;
size_t hdr_size;
struct socket *sock;
+   struct skb_ubuf_info pend;
 
/* TODO: check that we are running from vhost_worker? */
sock = rcu_dereference_check(vq-private_data, 1);
@@ -151,6 +156,10 @@ static void handle_tx(struct vhost_net *net)
hdr_size = vq-vhost_hlen;
 
for (;;) {
+   /* Release DMAs done buffers first */
+   if (atomic_read(vq-refcnt)  VHOST_MAX_PEND)
+   vhost_zerocopy_signal_used(vq, false);
+
head = vhost_get_vq_desc(net-dev, vq, vq-iov,
 ARRAY_SIZE(vq-iov),
 out, in,
@@ -166,6 +175,12 @@ static void handle_tx(struct vhost_net *net)
set_bit(SOCK_ASYNC_NOSPACE, sock-flags);
break;
}
+   /* If more outstanding DMAs, queue the work */
+   if (atomic_read(vq-refcnt)  VHOST_MAX_PEND) {
+   tx_poll_start(net, sock);
+   set_bit(SOCK_ASYNC_NOSPACE, sock-flags);
+   break;
+   }
if (unlikely(vhost_enable_notify(vq))) {
vhost_disable_notify(vq);
continue;
@@ -188,6 +203,24 @@ static void handle_tx(struct vhost_net *net)
   iov_length(vq-hdr, s), hdr_size);
break;
}
+   /* use msg_control to pass vhost zerocopy ubuf info to skb */
+   if (sock_flag(sock-sk, SOCK_ZEROCOPY)) {
+   vq-heads[vq-upend_idx].id = head;
+   if (len  VHOST_GOODCOPY_LEN)
+   /* copy don't need to wait for DMA done */
+   vq-heads[vq-upend_idx].len =
+   VHOST_DMA_DONE_LEN;
+   else {
+   vq-heads[vq-upend_idx].len = len;
+   pend.callback = vhost_zerocopy_callback;
+   pend.arg = vq;
+   pend.desc = vq-upend_idx;
+   msg.msg_control = pend;
+   msg.msg_controllen = sizeof(pend);
+   }
+   atomic_inc(vq-refcnt);
+   vq-upend_idx = (vq-upend_idx + 1) % UIO_MAXIOV;
+   }
/* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock-ops-sendmsg(NULL, sock, msg, len);
if (unlikely(err  0)) {
@@ -198,12 +231,21 @@ static void handle_tx(struct vhost_net *net)
if (err != len)
pr_debug(Truncated TX packet: 
  len %d != %zd\n, err, len);
-   vhost_add_used_and_signal(net-dev, vq, head, 0);
+   if (!sock_flag(sock-sk, SOCK_ZEROCOPY))
+   vhost_add_used_and_signal(net-dev, vq, head, 0);
total_len += len;
if (unlikely(total_len = VHOST_NET_WEIGHT)) {
vhost_poll_queue(vq-poll);
break;
}
+   /* if upend_idx is full, 

  1   2   >