date:20130328

Re: [PATCH] tcm_vhost: Avoid VIRTIO_RING_F_EVENT_IDX feature bit

2013-03-28 Thread Michael S. Tsirkin

On Thu, Mar 28, 2013 at 12:27:10AM +, Nicholas A. Bellinger wrote:
 From: Nicholas Bellinger n...@linux-iscsi.org
 
 This patch adds a VHOST_TCM_FEATURES mask minus VIRTIO_RING_F_EVENT_IDX
 so that vhost-scsi-pci userspace will strip this feature bit once
 GET_FEATURES reports it as being unsupported on the host.
 
 This is to avoid a bug where -handle_kicks() are missed when EVENT_IDX
 is enabled by default in userspace code.
 
 Cc: Michael S. Tsirkin m...@redhat.com
 Cc: Asias He as...@redhat.com
 Cc: Paolo Bonzini pbonz...@redhat.com
 Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org

No chance we can debug this properly?

 ---
  drivers/vhost/tcm_vhost.c |   10 --
  1 files changed, 8 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 0524267..b127edc 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -60,6 +60,12 @@ enum {
   VHOST_SCSI_VQ_IO = 2,
  };
  
 +enum {
 + VHOST_TCM_FEATURES = (1ULL  VIRTIO_F_NOTIFY_ON_EMPTY) |
 +  (1ULL  VIRTIO_RING_F_INDIRECT_DESC) |
 +  (1ULL  VHOST_F_LOG_ALL)

All the rest of the code uses VHOST_SCSI so rename this too?

 +};
 +

Please do something like

+/*
+* VIRTIO_RING_F_EVENT_IDX seems broken. Not sure the bug is in
+* kernel but disabling it helps.
+* TODO: debug and remove the workaround.
+*/
+enum {
+ VHOST_SCSI_FEATURES = VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)
+}

  #define VHOST_SCSI_MAX_TARGET256
  #define VHOST_SCSI_MAX_VQ128
  
 @@ -981,7 +987,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
  
  static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
  {
 - if (features  ~VHOST_FEATURES)
 + if (features  ~VHOST_TCM_FEATURES)
   return -EOPNOTSUPP;
  
   mutex_lock(vs-dev.mutex);
 @@ -1027,7 +1033,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned 
 int ioctl,
   return -EFAULT;
   return 0;
   case VHOST_GET_FEATURES:
 - features = VHOST_FEATURES;
 + features = VHOST_TCM_FEATURES;
   if (copy_to_user(featurep, features, sizeof features))
   return -EFAULT;
   return 0;
 -- 
 1.7.2.5
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V2 2/2] tcm_vhost: Use vq-private_data to indicate if the endpoint is setup

2013-03-28 Thread Michael S. Tsirkin

On Thu, Mar 28, 2013 at 10:17:28AM +0800, Asias He wrote:
 Currently, vs-vs_endpoint is used indicate if the endpoint is setup or
 not. It is set or cleared in vhost_scsi_set_endpoint() or
 vhost_scsi_clear_endpoint() under the vs-dev.mutex lock. However, when
 we check it in vhost_scsi_handle_vq(), we ignored the lock.
 
 Instead of using the vs-vs_endpoint and the vs-dev.mutex lock to
 indicate the status of the endpoint, we use per virtqueue
 vq-private_data to indicate it. In this way, we can only take the
 vq-mutex lock which is per queue and make the concurrent multiqueue
 process having less lock contention. Further, in the read side of
 vq-private_data, we can even do not take only lock if it is accessed in
 the vhost worker thread, because it is protected by vhost rcu.
 
 Signed-off-by: Asias He as...@redhat.com
 ---
  drivers/vhost/tcm_vhost.c | 38 +-
  1 file changed, 33 insertions(+), 5 deletions(-)
 
 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 5e3d4487..0524267 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -67,7 +67,6 @@ struct vhost_scsi {
   /* Protected by vhost_scsi-dev.mutex */
   struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
   char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
 - bool vs_endpoint;
  
   struct vhost_dev dev;
   struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ];
 @@ -91,6 +90,24 @@ static int iov_num_pages(struct iovec *iov)
  ((unsigned long)iov-iov_base  PAGE_MASK))  PAGE_SHIFT;
  }
  
 +static bool tcm_vhost_check_endpoint(struct vhost_virtqueue *vq)
 +{
 + bool ret = false;
 +
 + /*
 +  * We can handle the vq only after the endpoint is setup by calling the
 +  * VHOST_SCSI_SET_ENDPOINT ioctl.
 +  *
 +  * TODO: Check that we are running from vhost_worker which acts
 +  * as read-side critical section for vhost kind of RCU.
 +  * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
 +  */
 + if (rcu_dereference_check(vq-private_data, 1))
 + ret = true;
 +
 + return ret;
 +}
 +
  static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
  {
   return 1;
 @@ -581,8 +598,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
   int head, ret;
   u8 target;
  
 - /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
 - if (unlikely(!vs-vs_endpoint))
 + if (!tcm_vhost_check_endpoint(vq))
   return;


I would just move the check to under vq mutex,
and avoid rcu completely. In vhost-net we are using
private data outside lock so we can't do this,
no such issue here.
  
   mutex_lock(vq-mutex);
 @@ -829,11 +845,12 @@ static int vhost_scsi_set_endpoint(
  sizeof(vs-vs_vhost_wwpn));
   for (i = 0; i  VHOST_SCSI_MAX_VQ; i++) {
   vq = vs-vqs[i];
 + /* Flushing the vhost_work acts as synchronize_rcu */
   mutex_lock(vq-mutex);
 + rcu_assign_pointer(vq-private_data, vs);
   vhost_init_used(vq);
   mutex_unlock(vq-mutex);
   }
 - vs-vs_endpoint = true;
   ret = 0;
   } else {
   ret = -EEXIST;


There's also some weird smp_mb__after_atomic_inc() with no
atomic in sight just above ... Nicholas what was the point there?


 @@ -849,6 +866,8 @@ static int vhost_scsi_clear_endpoint(
  {
   struct tcm_vhost_tport *tv_tport;
   struct tcm_vhost_tpg *tv_tpg;
 + struct vhost_virtqueue *vq;
 + bool match = false;
   int index, ret, i;
   u8 target;
  
 @@ -884,9 +903,18 @@ static int vhost_scsi_clear_endpoint(
   }
   tv_tpg-tv_tpg_vhost_count--;
   vs-vs_tpg[target] = NULL;
 - vs-vs_endpoint = false;
 + match = true;
   mutex_unlock(tv_tpg-tv_tpg_mutex);
   }
 + if (match) {
 + for (i = 0; i  VHOST_SCSI_MAX_VQ; i++) {
 + vq = vs-vqs[i];
 + /* Flushing the vhost_work acts as synchronize_rcu */
 + mutex_lock(vq-mutex);
 + rcu_assign_pointer(vq-private_data, NULL);
 + mutex_unlock(vq-mutex);
 + }
 + }

I'm trying to understand what's going on here.
Does vhost_scsi only have a single target?
Because the moment you clear one target you
also set private_data to NULL ...


   mutex_unlock(vs-dev.mutex);
   return 0;
  
 -- 
 1.8.1.4
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/3] vhost-scsi: new device supporting the tcm_vhost Linux kernel module

2013-03-28 Thread Michael S. Tsirkin

On Wed, Mar 27, 2013 at 04:24:22PM -0700, Nicholas A. Bellinger wrote:
 On Thu, 2013-03-28 at 00:53 +0200, Michael S. Tsirkin wrote:
  On Thu, Mar 28, 2013 at 12:50:21AM +0200, Michael S. Tsirkin wrote:
   On Wed, Mar 27, 2013 at 03:45:42PM -0700, Nicholas A. Bellinger wrote:
On Thu, 2013-03-28 at 00:28 +0200, Michael S. Tsirkin wrote:
 On Wed, Mar 27, 2013 at 09:59:45PM +, Nicholas A. Bellinger wrote:
  From: Paolo Bonzini pbonz...@redhat.com
  
  The WWPN specified in configfs is passed to -device 
  vhost-scsi-pci.
  The tgpt field of the SET_ENDPOINT ioctl is obsolete now, so it is 
  not
  available from the QEMU command-line.  Instead, I hardcode it to 
  zero.
  
  Changes in V4:
 - Set event_idx=off by default (nab, thanks asias)
 
 Why? What's going on here?
 

Not disabling event_idx by default, or disabling from the command line
ends up resulting in -handle_kick() not getting called for subsequent
commands..

I spent some time trying to track this down recently with no luck, and
AFAICT it's always been required in order for vhost-scsi to function.
   
   Hmm this is a bug in kernel then. A better work-around is
   to disable EVENT_IDX in kernel. Let's do it for 3.9?
  
  But before we do, can you check that SET_FEATURES is
  called with this bit set if you enable event_idx?
  If not that's your bug then ...
  
 
 Ok, SET_FEATURES is currently not setting any bits at all based upon
 vhost_dev-features, so it looks like a vhost-scsi-pci bug..
 
 

If you call SET_FEATURES, does it start working even without
disabling EVENT_IDX in kernel?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] tcm_vhost: Avoid VIRTIO_RING_F_EVENT_IDX feature bit

2013-03-28 Thread Nicholas A. Bellinger

On Thu, 2013-03-28 at 08:04 +0200, Michael S. Tsirkin wrote:
 On Thu, Mar 28, 2013 at 12:27:10AM +, Nicholas A. Bellinger wrote:
  From: Nicholas Bellinger n...@linux-iscsi.org
  
  This patch adds a VHOST_TCM_FEATURES mask minus VIRTIO_RING_F_EVENT_IDX
  so that vhost-scsi-pci userspace will strip this feature bit once
  GET_FEATURES reports it as being unsupported on the host.
  
  This is to avoid a bug where -handle_kicks() are missed when EVENT_IDX
  is enabled by default in userspace code.
  
  Cc: Michael S. Tsirkin m...@redhat.com
  Cc: Asias He as...@redhat.com
  Cc: Paolo Bonzini pbonz...@redhat.com
  Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
 
 No chance we can debug this properly?
 

Well, I assumed this was a low priority item ahead of tracking down the
other blocking seabios related issue.

I'm fine with dropping this patch and requiring the event_idx=off option
for now for an initial QEMU merge, but thought this made things a little
easier for CLI users to avoid confusion.

  ---
   drivers/vhost/tcm_vhost.c |   10 --
   1 files changed, 8 insertions(+), 2 deletions(-)
  
  diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
  index 0524267..b127edc 100644
  --- a/drivers/vhost/tcm_vhost.c
  +++ b/drivers/vhost/tcm_vhost.c
  @@ -60,6 +60,12 @@ enum {
  VHOST_SCSI_VQ_IO = 2,
   };
   
  +enum {
  +   VHOST_TCM_FEATURES = (1ULL  VIRTIO_F_NOTIFY_ON_EMPTY) |
  +(1ULL  VIRTIO_RING_F_INDIRECT_DESC) |
  +(1ULL  VHOST_F_LOG_ALL)
 
 All the rest of the code uses VHOST_SCSI so rename this too?
 
  +};
  +
 
 Please do something like
 
 +/*
 +* VIRTIO_RING_F_EVENT_IDX seems broken. Not sure the bug is in
 +* kernel but disabling it helps.
 +* TODO: debug and remove the workaround.
 +*/
 +enum {
 + VHOST_SCSI_FEATURES = VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)
 +}
 

Patch updated.

Thanks,

--nab

   #define VHOST_SCSI_MAX_TARGET  256
   #define VHOST_SCSI_MAX_VQ  128
   
  @@ -981,7 +987,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
   
   static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
   {
  -   if (features  ~VHOST_FEATURES)
  +   if (features  ~VHOST_TCM_FEATURES)
  return -EOPNOTSUPP;
   
  mutex_lock(vs-dev.mutex);
  @@ -1027,7 +1033,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned 
  int ioctl,
  return -EFAULT;
  return 0;
  case VHOST_GET_FEATURES:
  -   features = VHOST_FEATURES;
  +   features = VHOST_TCM_FEATURES;
  if (copy_to_user(featurep, features, sizeof features))
  return -EFAULT;
  return 0;
  -- 
  1.7.2.5


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/3] vhost-scsi: new device supporting the tcm_vhost Linux kernel module

2013-03-28 Thread Nicholas A. Bellinger

On Thu, 2013-03-28 at 08:19 +0200, Michael S. Tsirkin wrote:
 On Wed, Mar 27, 2013 at 04:24:22PM -0700, Nicholas A. Bellinger wrote:
  On Thu, 2013-03-28 at 00:53 +0200, Michael S. Tsirkin wrote:
   On Thu, Mar 28, 2013 at 12:50:21AM +0200, Michael S. Tsirkin wrote:
On Wed, Mar 27, 2013 at 03:45:42PM -0700, Nicholas A. Bellinger wrote:
 On Thu, 2013-03-28 at 00:28 +0200, Michael S. Tsirkin wrote:
  On Wed, Mar 27, 2013 at 09:59:45PM +, Nicholas A. Bellinger 
  wrote:
   From: Paolo Bonzini pbonz...@redhat.com
   
   The WWPN specified in configfs is passed to -device 
   vhost-scsi-pci.
   The tgpt field of the SET_ENDPOINT ioctl is obsolete now, so it 
   is not
   available from the QEMU command-line.  Instead, I hardcode it to 
   zero.
   
   Changes in V4:
  - Set event_idx=off by default (nab, thanks asias)
  
  Why? What's going on here?
  
 
 Not disabling event_idx by default, or disabling from the command line
 ends up resulting in -handle_kick() not getting called for subsequent
 commands..
 
 I spent some time trying to track this down recently with no luck, and
 AFAICT it's always been required in order for vhost-scsi to function.

Hmm this is a bug in kernel then. A better work-around is
to disable EVENT_IDX in kernel. Let's do it for 3.9?
   
   But before we do, can you check that SET_FEATURES is
   called with this bit set if you enable event_idx?
   If not that's your bug then ...
   
  
  Ok, SET_FEATURES is currently not setting any bits at all based upon
  vhost_dev-features, so it looks like a vhost-scsi-pci bug..
  
  
 
 If you call SET_FEATURES, does it start working even without
 disabling EVENT_IDX in kernel?
 

Unfortunately, no.  The same strangeness persists unless event_idx=off
is passed, or EVENT_IDX is disabled in the kernel and stripped off with
the vhost_scsi_get_features() patch.



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V3 WIP 3/3] disable vhost_verify_ring_mappings check

2013-03-28 Thread Nicholas A. Bellinger

On Wed, 2013-03-27 at 15:33 -0700, Nicholas A. Bellinger wrote:
 On Wed, 2013-03-27 at 23:56 +0200, Michael S. Tsirkin wrote:
  On Wed, Mar 27, 2013 at 02:31:27PM -0700, Nicholas A. Bellinger wrote:
   On Wed, 2013-03-20 at 11:51 +0200, Michael S. Tsirkin wrote:
On Tue, Mar 19, 2013 at 06:57:08PM -0700, Nicholas A. Bellinger wrote:
 On Tue, 2013-03-19 at 09:40 +0100, Stefan Hajnoczi wrote:
  On Tue, Mar 19, 2013 at 08:34:45AM +0800, Asias He wrote:
   ---
hw/vhost.c | 2 ++
1 file changed, 2 insertions(+)
   
   diff --git a/hw/vhost.c b/hw/vhost.c
   index 4d6aee3..0c52ec4 100644
   --- a/hw/vhost.c
   +++ b/hw/vhost.c
   @@ -421,10 +421,12 @@ static void vhost_set_memory(MemoryListener 
   *listener,
return;
}

   +#if 0
if (dev-started) {
r = vhost_verify_ring_mappings(dev, start_addr, size);
assert(r = 0);
}
   +#endif
  
  Please add a comment to explain why.
  
 
 Btw, the output that Asias added in the failure case at the behest of
 MST is here:
 
 http://www.spinics.net/lists/target-devel/msg04077.html

Yes I suspected we could get l  ring_size, but this is
not the case here.

   
   Hi MST  Co,
   
   A quick update here..
   
   So this issue appears to be related to performing the
   vhost_verify_ring_mappings() call after vhost_dev_unassign_memory() has
   been invoked with vhost_set_memory(..., add=false).
   
   AFAICT from the logs below, things appear to work as expected when
   vhost_verify_ring_mappings() is called only for the
   vhost_set_memory(..., add=true) case.
   
   Calling vhost_verify_ring_mappings() when dev-started == true +
   vhost_set_memory(..., add=false) appears to be a bug caused by fallout
   from:
   
   commit 24f4fe345c1b80bab1ee18573914123d8028a9e6
   Author: Michael S. Tsirkin m...@redhat.com
   Date:   Tue Dec 25 17:41:07 2012 +0200
   
   vhost: set started flag while start is in progress
   
   I'm including the following patch in the forth-coming vhost-scsi series.
   Please let me know if you have any concerns.
   
   diff --git a/hw/vhost.c b/hw/vhost.c
   index 4d6aee3..687a689 100644
   --- a/hw/vhost.c
   +++ b/hw/vhost.c
   @@ -421,7 +421,7 @@ static void vhost_set_memory(MemoryListener *listener,
return;
}

   -if (dev-started) {
   +if (dev-started  add) {
r = vhost_verify_ring_mappings(dev, start_addr, size);
assert(r = 0);
}
   
   Thanks!
   
   --nab
  
  Sorry NAK,
  I think this will shut down too much stuff:
  the main reason to check is when we delete a region.
  
 

SNIP

  I still do not understand how this happened.  Somehow a memory region
  was deleted after vhost_dev_start but before vhost_virtqueue_start was
  called?
 
 Not sure..
 
 To clarify, this is only happening during seabios setup+scan of
 virtio-scsi, and not during normal virtio_scsi LLD operation.
 
  Can you set a breakpoint there and see please?
  
  
 

A bit more context here..

After debugging seabios this evening, I've isolated the spot where
things begin to go south for vhost_verify_ring_mappings check()

Below are logs from qemu + seabios serial output mixed to (attempt) to
demonstrate what's going on..  

root@tifa:/usr/src# qemu-system-x86_64 -enable-kvm -smp 4 -m 2048
-serial file:/tmp/vhost-serial.txt
-hda /usr/src/qemu-paolo.git/debian_squeeze_amd64_standard.qcow2 -device
vhost-scsi-pci,wwpn=naa.600140579ad21088
qemu-system-x86_64: pci_add_option_rom: failed to find romfile efi-e1000.rom
Calling -region_add: section.size: 655360
vhost_set_memory: section: 0x7fff962c2580 section-size: 655360 add: 1
Calling -region_add: section.size: 131072
Calling -region_add: section.size: 131072
vhost_set_memory: section: 0x7fff962c2580 section-size: 131072 add: 1
Calling -region_add: section.size: 131072
vhost_set_memory: section: 0x7fff962c2580 section-size: 131072 add: 1
Calling -region_add: section.size: 2146435072
vhost_set_memory: section: 0x7fff962c2580 section-size: 2146435072 add: 1
Calling -region_add: section.size: 4096
Calling -region_add: section.size: 1024
Calling -region_add: section.size: 1048576
Calling -region_add: section.size: 262144
vhost_set_memory: section: 0x7fff962c2580 section-size: 262144 add: 1
vhost_scsi_init_pci Before virtio_init_pci
virtio_init_pci: size: 60
virtio_init_pci: new size: 64
vhost_set_memory: section: 0x7fe2801f2b60 section-size: 131072 add: 0
vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 32768 add: 1
vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 98304 add: 1
vhost_set_memory: section: 0x7fe2801f2b60 section-size: 32768 add: 0
vhost_set_memory: section: 0x7fe2801f2b60 section-size: 98304 add: 0
vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 65536 add: 1
vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 65536 add: 1

Re: [PATCH V3 WIP 3/3] disable vhost_verify_ring_mappings check

2013-03-28 Thread Nicholas A. Bellinger

On Wed, 2013-03-27 at 23:45 -0700, Nicholas A. Bellinger wrote:
 On Wed, 2013-03-27 at 15:33 -0700, Nicholas A. Bellinger wrote:
  On Wed, 2013-03-27 at 23:56 +0200, Michael S. Tsirkin wrote:
   On Wed, Mar 27, 2013 at 02:31:27PM -0700, Nicholas A. Bellinger wrote:

SNIP

Adding a bit more detailed seabios PCI setup info, and
make_bios_readonly_intel() debug output for Kevin  Co to review.

 
   I still do not understand how this happened.  Somehow a memory region
   was deleted after vhost_dev_start but before vhost_virtqueue_start was
   called?
  
  Not sure..
  
  To clarify, this is only happening during seabios setup+scan of
  virtio-scsi, and not during normal virtio_scsi LLD operation.
  
   Can you set a breakpoint there and see please?
   
   
  
 
 A bit more context here..
 
 After debugging seabios this evening, I've isolated the spot where
 things begin to go south for vhost_verify_ring_mappings check()
 
 Below are logs from qemu + seabios serial output mixed to (attempt) to
 demonstrate what's going on..  
 
 root@tifa:/usr/src# qemu-system-x86_64 -enable-kvm -smp 4 -m 2048
 -serial file:/tmp/vhost-serial.txt
 -hda /usr/src/qemu-paolo.git/debian_squeeze_amd64_standard.qcow2 -device
 vhost-scsi-pci,wwpn=naa.600140579ad21088
 qemu-system-x86_64: pci_add_option_rom: failed to find romfile efi-e1000.rom
 Calling -region_add: section.size: 655360
 vhost_set_memory: section: 0x7fff962c2580 section-size: 655360 add: 1
 Calling -region_add: section.size: 131072
 Calling -region_add: section.size: 131072
 vhost_set_memory: section: 0x7fff962c2580 section-size: 131072 add: 1
 Calling -region_add: section.size: 131072
 vhost_set_memory: section: 0x7fff962c2580 section-size: 131072 add: 1
 Calling -region_add: section.size: 2146435072
 vhost_set_memory: section: 0x7fff962c2580 section-size: 2146435072 add: 1
 Calling -region_add: section.size: 4096
 Calling -region_add: section.size: 1024
 Calling -region_add: section.size: 1048576
 Calling -region_add: section.size: 262144
 vhost_set_memory: section: 0x7fff962c2580 section-size: 262144 add: 1
 vhost_scsi_init_pci Before virtio_init_pci
 virtio_init_pci: size: 60
 virtio_init_pci: new size: 64
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 131072 add: 0
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 32768 add: 1
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 98304 add: 1
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 32768 add: 0
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 98304 add: 0
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 65536 add: 1
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 65536 add: 1
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 65536 add: 0
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 65536 add: 0
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 98304 add: 1
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 32768 add: 1
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 98304 add: 0
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 32768 add: 0
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 131072 add: 1
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 131072 add: 0
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 131072 add: 0
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 163840 add: 1
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 98304 add: 1
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 163840 add: 0
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 98304 add: 0
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 196608 add: 1
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 65536 add: 1
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 196608 add: 0
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 65536 add: 0
 vhost_set_memory: section: 0x7fe2801f2b60 section-size: 2146435072 add: 0
 vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 2146697216 add: 1
 vhost_set_memory: section: 0x7fe2801f29f0 section-size: 65536 add: 1
 vhost_set_memory: section: 0x7fe2801f2ab0 section-size: 65536 add: 0
 vhost_set_memory: section: 0x7fe2801f2a70 section-size: 8388608 add: 1
 Entering vhost_dev_start .
 Before vhost_virtqueue_start .
 After vhost_virtqueue_start .
 
 Start seabios serial output from init virtio-scsi code
 

=== PCI device probing ===
PCI probe
PCI device 00:00.0 (vd=8086:1237 c=0600)
PCI device 00:01.0 (vd=8086:7000 c=0601)
PCI device 00:01.1 (vd=8086:7010 c=0101)
PCI device 00:01.3 (vd=8086:7113 c=0680)
PCI device 00:02.0 (vd=1013:00b8 c=0300)
PCI device 00:03.0 (vd=8086:100e c=0200)
PCI device 00:04.0 (vd=1af4:1004 c=0100)
Found 7 PCI devices (max PCI bus is 00)
=== PCI new allocation pass #1 ===
PCI: check devices
=== PCI new allocation pass #2 ===
PCI: map device bdf=00:03.0  bar 1, addr c000, size 0040 [io]
PCI: map device bdf=00:04.0  bar 0, addr c040, size 0040 [io]
PCI: map device

Re: [PATCH V2 2/2] tcm_vhost: Use vq-private_data to indicate if the endpoint is setup

2013-03-28 Thread Asias He

On Thu, Mar 28, 2013 at 08:16:59AM +0200, Michael S. Tsirkin wrote:
 On Thu, Mar 28, 2013 at 10:17:28AM +0800, Asias He wrote:
  Currently, vs-vs_endpoint is used indicate if the endpoint is setup or
  not. It is set or cleared in vhost_scsi_set_endpoint() or
  vhost_scsi_clear_endpoint() under the vs-dev.mutex lock. However, when
  we check it in vhost_scsi_handle_vq(), we ignored the lock.
  
  Instead of using the vs-vs_endpoint and the vs-dev.mutex lock to
  indicate the status of the endpoint, we use per virtqueue
  vq-private_data to indicate it. In this way, we can only take the
  vq-mutex lock which is per queue and make the concurrent multiqueue
  process having less lock contention. Further, in the read side of
  vq-private_data, we can even do not take only lock if it is accessed in
  the vhost worker thread, because it is protected by vhost rcu.
  
  Signed-off-by: Asias He as...@redhat.com
  ---
   drivers/vhost/tcm_vhost.c | 38 +-
   1 file changed, 33 insertions(+), 5 deletions(-)
  
  diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
  index 5e3d4487..0524267 100644
  --- a/drivers/vhost/tcm_vhost.c
  +++ b/drivers/vhost/tcm_vhost.c
  @@ -67,7 +67,6 @@ struct vhost_scsi {
  /* Protected by vhost_scsi-dev.mutex */
  struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
  char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
  -   bool vs_endpoint;
   
  struct vhost_dev dev;
  struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ];
  @@ -91,6 +90,24 @@ static int iov_num_pages(struct iovec *iov)
 ((unsigned long)iov-iov_base  PAGE_MASK))  PAGE_SHIFT;
   }
   
  +static bool tcm_vhost_check_endpoint(struct vhost_virtqueue *vq)
  +{
  +   bool ret = false;
  +
  +   /*
  +* We can handle the vq only after the endpoint is setup by calling the
  +* VHOST_SCSI_SET_ENDPOINT ioctl.
  +*
  +* TODO: Check that we are running from vhost_worker which acts
  +* as read-side critical section for vhost kind of RCU.
  +* See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
  +*/
  +   if (rcu_dereference_check(vq-private_data, 1))
  +   ret = true;
  +
  +   return ret;
  +}
  +
   static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
   {
  return 1;
  @@ -581,8 +598,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
  int head, ret;
  u8 target;
   
  -   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
  -   if (unlikely(!vs-vs_endpoint))
  +   if (!tcm_vhost_check_endpoint(vq))
  return;
 
 
 I would just move the check to under vq mutex,
 and avoid rcu completely. In vhost-net we are using
 private data outside lock so we can't do this,
 no such issue here.

Are you talking about:

   handle_tx:
   /* TODO: check that we are running from vhost_worker? */
   sock = rcu_dereference_check(vq-private_data, 1);
   if (!sock)
   return;
   
   wmem = atomic_read(sock-sk-sk_wmem_alloc);
   if (wmem = sock-sk-sk_sndbuf) {
   mutex_lock(vq-mutex);
   tx_poll_start(net, sock);
   mutex_unlock(vq-mutex);
   return;
   }
   mutex_lock(vq-mutex);

Why not do the atomic_read and tx_poll_start under the vq-mutex, and thus do
the check under the lock as well.
   
   handle_rx:
   mutex_lock(vq-mutex);
   
   /* TODO: check that we are running from vhost_worker? */
   struct socket *sock = rcu_dereference_check(vq-private_data, 1);
   
   if (!sock)
   return;
   
   mutex_lock(vq-mutex);

Can't we can do the check under the vq-mutex here?

The rcu is still there but it makes the code easier to read. IMO, If we want to
use rcu, use it explicitly and avoid the vhost rcu completely. 

  mutex_lock(vq-mutex);
  @@ -829,11 +845,12 @@ static int vhost_scsi_set_endpoint(
 sizeof(vs-vs_vhost_wwpn));
  for (i = 0; i  VHOST_SCSI_MAX_VQ; i++) {
  vq = vs-vqs[i];
  +   /* Flushing the vhost_work acts as synchronize_rcu */
  mutex_lock(vq-mutex);
  +   rcu_assign_pointer(vq-private_data, vs);
  vhost_init_used(vq);
  mutex_unlock(vq-mutex);
  }
  -   vs-vs_endpoint = true;
  ret = 0;
  } else {
  ret = -EEXIST;
 
 
 There's also some weird smp_mb__after_atomic_inc() with no
 atomic in sight just above ... Nicholas what was the point there?
 
 
  @@ -849,6 +866,8 @@ static int vhost_scsi_clear_endpoint(
   {
  struct tcm_vhost_tport *tv_tport;
  struct tcm_vhost_tpg *tv_tpg;
  +   struct vhost_virtqueue *vq;
  +   bool match = false;
  int index, ret, i;
  u8 target;
   
  @@ -884,9 +903,18 @@ static int vhost_scsi_clear_endpoint(
  }

Re: [PATCH V2 2/2] tcm_vhost: Use vq-private_data to indicate if the endpoint is setup

2013-03-28 Thread Michael S. Tsirkin

On Thu, Mar 28, 2013 at 04:10:02PM +0800, Asias He wrote:
 On Thu, Mar 28, 2013 at 08:16:59AM +0200, Michael S. Tsirkin wrote:
  On Thu, Mar 28, 2013 at 10:17:28AM +0800, Asias He wrote:
   Currently, vs-vs_endpoint is used indicate if the endpoint is setup or
   not. It is set or cleared in vhost_scsi_set_endpoint() or
   vhost_scsi_clear_endpoint() under the vs-dev.mutex lock. However, when
   we check it in vhost_scsi_handle_vq(), we ignored the lock.
   
   Instead of using the vs-vs_endpoint and the vs-dev.mutex lock to
   indicate the status of the endpoint, we use per virtqueue
   vq-private_data to indicate it. In this way, we can only take the
   vq-mutex lock which is per queue and make the concurrent multiqueue
   process having less lock contention. Further, in the read side of
   vq-private_data, we can even do not take only lock if it is accessed in
   the vhost worker thread, because it is protected by vhost rcu.
   
   Signed-off-by: Asias He as...@redhat.com
   ---
drivers/vhost/tcm_vhost.c | 38 +-
1 file changed, 33 insertions(+), 5 deletions(-)
   
   diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
   index 5e3d4487..0524267 100644
   --- a/drivers/vhost/tcm_vhost.c
   +++ b/drivers/vhost/tcm_vhost.c
   @@ -67,7 +67,6 @@ struct vhost_scsi {
 /* Protected by vhost_scsi-dev.mutex */
 struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
 char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
   - bool vs_endpoint;

 struct vhost_dev dev;
 struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ];
   @@ -91,6 +90,24 @@ static int iov_num_pages(struct iovec *iov)
((unsigned long)iov-iov_base  PAGE_MASK))  PAGE_SHIFT;
}

   +static bool tcm_vhost_check_endpoint(struct vhost_virtqueue *vq)
   +{
   + bool ret = false;
   +
   + /*
   +  * We can handle the vq only after the endpoint is setup by calling the
   +  * VHOST_SCSI_SET_ENDPOINT ioctl.
   +  *
   +  * TODO: Check that we are running from vhost_worker which acts
   +  * as read-side critical section for vhost kind of RCU.
   +  * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
   +  */
   + if (rcu_dereference_check(vq-private_data, 1))
   + ret = true;
   +
   + return ret;
   +}
   +
static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
{
 return 1;
   @@ -581,8 +598,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi 
   *vs,
 int head, ret;
 u8 target;

   - /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
   - if (unlikely(!vs-vs_endpoint))
   + if (!tcm_vhost_check_endpoint(vq))
 return;
  
  
  I would just move the check to under vq mutex,
  and avoid rcu completely. In vhost-net we are using
  private data outside lock so we can't do this,
  no such issue here.
 
 Are you talking about:
 
handle_tx:
/* TODO: check that we are running from vhost_worker? */
sock = rcu_dereference_check(vq-private_data, 1);
if (!sock)
return;

wmem = atomic_read(sock-sk-sk_wmem_alloc);
if (wmem = sock-sk-sk_sndbuf) {
mutex_lock(vq-mutex);
tx_poll_start(net, sock);
mutex_unlock(vq-mutex);
return;
}
mutex_lock(vq-mutex);
 
 Why not do the atomic_read and tx_poll_start under the vq-mutex, and thus do
 the check under the lock as well.

handle_rx:
mutex_lock(vq-mutex);

/* TODO: check that we are running from vhost_worker? */
struct socket *sock = rcu_dereference_check(vq-private_data, 1);

if (!sock)
return;

mutex_lock(vq-mutex);
 
 Can't we can do the check under the vq-mutex here?
 
 The rcu is still there but it makes the code easier to read. IMO, If we want 
 to
 use rcu, use it explicitly and avoid the vhost rcu completely. 

The point is to make spurios wakeups as lightweight as possible.
The seemed to happen a lot with -net.
Should not happen with -scsi at all.


 mutex_lock(vq-mutex);
   @@ -829,11 +845,12 @@ static int vhost_scsi_set_endpoint(
sizeof(vs-vs_vhost_wwpn));
 for (i = 0; i  VHOST_SCSI_MAX_VQ; i++) {
 vq = vs-vqs[i];
   + /* Flushing the vhost_work acts as synchronize_rcu */
 mutex_lock(vq-mutex);
   + rcu_assign_pointer(vq-private_data, vs);
 vhost_init_used(vq);
 mutex_unlock(vq-mutex);
 }
   - vs-vs_endpoint = true;
 ret = 0;
 } else {
 ret = -EEXIST;
  
  
  There's also some weird smp_mb__after_atomic_inc() with no
  atomic in sight just above ... Nicholas what was the point there?
  
  
   @@ -849,6 +866,8 @@ static int vhost_scsi_clear_endpoint(
{
 struct tcm_vhost_tport *tv_tport;

Re: [PATCH V2 2/2] tcm_vhost: Use vq-private_data to indicate if the endpoint is setup

2013-03-28 Thread Asias He

On Thu, Mar 28, 2013 at 10:33:30AM +0200, Michael S. Tsirkin wrote:
 On Thu, Mar 28, 2013 at 04:10:02PM +0800, Asias He wrote:
  On Thu, Mar 28, 2013 at 08:16:59AM +0200, Michael S. Tsirkin wrote:
   On Thu, Mar 28, 2013 at 10:17:28AM +0800, Asias He wrote:
Currently, vs-vs_endpoint is used indicate if the endpoint is setup or
not. It is set or cleared in vhost_scsi_set_endpoint() or
vhost_scsi_clear_endpoint() under the vs-dev.mutex lock. However, when
we check it in vhost_scsi_handle_vq(), we ignored the lock.

Instead of using the vs-vs_endpoint and the vs-dev.mutex lock to
indicate the status of the endpoint, we use per virtqueue
vq-private_data to indicate it. In this way, we can only take the
vq-mutex lock which is per queue and make the concurrent multiqueue
process having less lock contention. Further, in the read side of
vq-private_data, we can even do not take only lock if it is accessed in
the vhost worker thread, because it is protected by vhost rcu.

Signed-off-by: Asias He as...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 38 +-
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 5e3d4487..0524267 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -67,7 +67,6 @@ struct vhost_scsi {
/* Protected by vhost_scsi-dev.mutex */
struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
-   bool vs_endpoint;
 
struct vhost_dev dev;
struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ];
@@ -91,6 +90,24 @@ static int iov_num_pages(struct iovec *iov)
   ((unsigned long)iov-iov_base  PAGE_MASK))  
PAGE_SHIFT;
 }
 
+static bool tcm_vhost_check_endpoint(struct vhost_virtqueue *vq)
+{
+   bool ret = false;
+
+   /*
+* We can handle the vq only after the endpoint is setup by 
calling the
+* VHOST_SCSI_SET_ENDPOINT ioctl.
+*
+* TODO: Check that we are running from vhost_worker which acts
+* as read-side critical section for vhost kind of RCU.
+* See the comments in struct vhost_virtqueue in 
drivers/vhost/vhost.h
+*/
+   if (rcu_dereference_check(vq-private_data, 1))
+   ret = true;
+
+   return ret;
+}
+
 static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
 {
return 1;
@@ -581,8 +598,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi 
*vs,
int head, ret;
u8 target;
 
-   /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
-   if (unlikely(!vs-vs_endpoint))
+   if (!tcm_vhost_check_endpoint(vq))
return;
   
   
   I would just move the check to under vq mutex,
   and avoid rcu completely. In vhost-net we are using
   private data outside lock so we can't do this,
   no such issue here.
  
  Are you talking about:
  
 handle_tx:
 /* TODO: check that we are running from vhost_worker? */
 sock = rcu_dereference_check(vq-private_data, 1);
 if (!sock)
 return;
 
 wmem = atomic_read(sock-sk-sk_wmem_alloc);
 if (wmem = sock-sk-sk_sndbuf) {
 mutex_lock(vq-mutex);
 tx_poll_start(net, sock);
 mutex_unlock(vq-mutex);
 return;
 }
 mutex_lock(vq-mutex);
  
  Why not do the atomic_read and tx_poll_start under the vq-mutex, and thus 
  do
  the check under the lock as well.
 
 handle_rx:
 mutex_lock(vq-mutex);
 
 /* TODO: check that we are running from vhost_worker? */
 struct socket *sock = rcu_dereference_check(vq-private_data, 1);
 
 if (!sock)
 return;
 
 mutex_lock(vq-mutex);
  
  Can't we can do the check under the vq-mutex here?
  
  The rcu is still there but it makes the code easier to read. IMO, If we 
  want to
  use rcu, use it explicitly and avoid the vhost rcu completely. 
 
 The point is to make spurios wakeups as lightweight as possible.
 The seemed to happen a lot with -net.
 Should not happen with -scsi at all.

I am wondering:

1. Why there is a lot of spurios wakeups

2. What performance impact it would give if we take the lock to check
   vq-private_data. Sinc, at any time, either handle_tx or handle_rx
   can be running. So we can almost always take the vq-mutex mutex.
   Did you managed to measure real perf difference?

 
mutex_lock(vq-mutex);
@@ -829,11 +845,12 @@ static int vhost_scsi_set_endpoint(
   sizeof(vs-vs_vhost_wwpn));
for (i = 0;

Re: [PATCH V3 WIP 3/3] disable vhost_verify_ring_mappings check

2013-03-28 Thread Michael S. Tsirkin

On Thu, Mar 28, 2013 at 12:35:42AM -0700, Nicholas A. Bellinger wrote:
 On Wed, 2013-03-27 at 23:45 -0700, Nicholas A. Bellinger wrote:
  On Wed, 2013-03-27 at 15:33 -0700, Nicholas A. Bellinger wrote:
   On Wed, 2013-03-27 at 23:56 +0200, Michael S. Tsirkin wrote:
On Wed, Mar 27, 2013 at 02:31:27PM -0700, Nicholas A. Bellinger wrote:
 
 SNIP
 
 Adding a bit more detailed seabios PCI setup info, and
 make_bios_readonly_intel() debug output for Kevin  Co to review.
 
  
I still do not understand how this happened.  Somehow a memory region
was deleted after vhost_dev_start but before vhost_virtqueue_start was
called?
   
   Not sure..
   
   To clarify, this is only happening during seabios setup+scan of
   virtio-scsi, and not during normal virtio_scsi LLD operation.
   
Can you set a breakpoint there and see please?


   
  
  A bit more context here..
  
  After debugging seabios this evening, I've isolated the spot where
  things begin to go south for vhost_verify_ring_mappings check()
  
  Below are logs from qemu + seabios serial output mixed to (attempt) to
  demonstrate what's going on..  
  
  root@tifa:/usr/src# qemu-system-x86_64 -enable-kvm -smp 4 -m 2048
  -serial file:/tmp/vhost-serial.txt
  -hda /usr/src/qemu-paolo.git/debian_squeeze_amd64_standard.qcow2 -device
  vhost-scsi-pci,wwpn=naa.600140579ad21088
  qemu-system-x86_64: pci_add_option_rom: failed to find romfile 
  efi-e1000.rom
  Calling -region_add: section.size: 655360
  vhost_set_memory: section: 0x7fff962c2580 section-size: 655360 add: 1
  Calling -region_add: section.size: 131072
  Calling -region_add: section.size: 131072
  vhost_set_memory: section: 0x7fff962c2580 section-size: 131072 add: 1
  Calling -region_add: section.size: 131072
  vhost_set_memory: section: 0x7fff962c2580 section-size: 131072 add: 1
  Calling -region_add: section.size: 2146435072
  vhost_set_memory: section: 0x7fff962c2580 section-size: 2146435072 add: 1
  Calling -region_add: section.size: 4096
  Calling -region_add: section.size: 1024
  Calling -region_add: section.size: 1048576
  Calling -region_add: section.size: 262144
  vhost_set_memory: section: 0x7fff962c2580 section-size: 262144 add: 1
  vhost_scsi_init_pci Before virtio_init_pci
  virtio_init_pci: size: 60
  virtio_init_pci: new size: 64
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 131072 add: 0
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 32768 add: 1
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 98304 add: 1
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 32768 add: 0
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 98304 add: 0
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 65536 add: 1
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 65536 add: 1
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 65536 add: 0
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 65536 add: 0
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 98304 add: 1
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 32768 add: 1
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 98304 add: 0
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 32768 add: 0
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 131072 add: 1
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 131072 add: 0
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 131072 add: 0
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 163840 add: 1
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 98304 add: 1
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 163840 add: 0
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 98304 add: 0
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 196608 add: 1
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 65536 add: 1
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 196608 add: 0
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 65536 add: 0
  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 2146435072 add: 0
  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 2146697216 add: 1
  vhost_set_memory: section: 0x7fe2801f29f0 section-size: 65536 add: 1
  vhost_set_memory: section: 0x7fe2801f2ab0 section-size: 65536 add: 0
  vhost_set_memory: section: 0x7fe2801f2a70 section-size: 8388608 add: 1
  Entering vhost_dev_start .
  Before vhost_virtqueue_start .
  After vhost_virtqueue_start .
  
  Start seabios serial output from init virtio-scsi code
  
 
 === PCI device probing ===
 PCI probe
 PCI device 00:00.0 (vd=8086:1237 c=0600)
 PCI device 00:01.0 (vd=8086:7000 c=0601)
 PCI device 00:01.1 (vd=8086:7010 c=0101)
 PCI device 00:01.3 (vd=8086:7113 c=0680)
 PCI device 00:02.0 (vd=1013:00b8 c=0300)
 PCI device 00:03.0 (vd=8086:100e c=0200)
 PCI device 00:04.0 (vd=1af4:1004 c=0100)
 Found 7 PCI devices (max PCI bus is 00)
 === PCI new allocation pass #1 ===
 PCI: check devices
 === PCI new

Re: [PATCH V2 2/2] tcm_vhost: Use vq-private_data to indicate if the endpoint is setup

2013-03-28 Thread Michael S. Tsirkin

On Thu, Mar 28, 2013 at 04:47:15PM +0800, Asias He wrote:
 On Thu, Mar 28, 2013 at 10:33:30AM +0200, Michael S. Tsirkin wrote:
  On Thu, Mar 28, 2013 at 04:10:02PM +0800, Asias He wrote:
   On Thu, Mar 28, 2013 at 08:16:59AM +0200, Michael S. Tsirkin wrote:
On Thu, Mar 28, 2013 at 10:17:28AM +0800, Asias He wrote:
 Currently, vs-vs_endpoint is used indicate if the endpoint is setup 
 or
 not. It is set or cleared in vhost_scsi_set_endpoint() or
 vhost_scsi_clear_endpoint() under the vs-dev.mutex lock. However, 
 when
 we check it in vhost_scsi_handle_vq(), we ignored the lock.
 
 Instead of using the vs-vs_endpoint and the vs-dev.mutex lock to
 indicate the status of the endpoint, we use per virtqueue
 vq-private_data to indicate it. In this way, we can only take the
 vq-mutex lock which is per queue and make the concurrent multiqueue
 process having less lock contention. Further, in the read side of
 vq-private_data, we can even do not take only lock if it is accessed 
 in
 the vhost worker thread, because it is protected by vhost rcu.
 
 Signed-off-by: Asias He as...@redhat.com
 ---
  drivers/vhost/tcm_vhost.c | 38 +-
  1 file changed, 33 insertions(+), 5 deletions(-)
 
 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 5e3d4487..0524267 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -67,7 +67,6 @@ struct vhost_scsi {
   /* Protected by vhost_scsi-dev.mutex */
   struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
   char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
 - bool vs_endpoint;
  
   struct vhost_dev dev;
   struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ];
 @@ -91,6 +90,24 @@ static int iov_num_pages(struct iovec *iov)
  ((unsigned long)iov-iov_base  PAGE_MASK))  
 PAGE_SHIFT;
  }
  
 +static bool tcm_vhost_check_endpoint(struct vhost_virtqueue *vq)
 +{
 + bool ret = false;
 +
 + /*
 +  * We can handle the vq only after the endpoint is setup by 
 calling the
 +  * VHOST_SCSI_SET_ENDPOINT ioctl.
 +  *
 +  * TODO: Check that we are running from vhost_worker which acts
 +  * as read-side critical section for vhost kind of RCU.
 +  * See the comments in struct vhost_virtqueue in 
 drivers/vhost/vhost.h
 +  */
 + if (rcu_dereference_check(vq-private_data, 1))
 + ret = true;
 +
 + return ret;
 +}
 +
  static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
  {
   return 1;
 @@ -581,8 +598,7 @@ static void vhost_scsi_handle_vq(struct 
 vhost_scsi *vs,
   int head, ret;
   u8 target;
  
 - /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
 - if (unlikely(!vs-vs_endpoint))
 + if (!tcm_vhost_check_endpoint(vq))
   return;


I would just move the check to under vq mutex,
and avoid rcu completely. In vhost-net we are using
private data outside lock so we can't do this,
no such issue here.
   
   Are you talking about:
   
  handle_tx:
  /* TODO: check that we are running from vhost_worker? */
  sock = rcu_dereference_check(vq-private_data, 1);
  if (!sock)
  return;
  
  wmem = atomic_read(sock-sk-sk_wmem_alloc);
  if (wmem = sock-sk-sk_sndbuf) {
  mutex_lock(vq-mutex);
  tx_poll_start(net, sock);
  mutex_unlock(vq-mutex);
  return;
  }
  mutex_lock(vq-mutex);
   
   Why not do the atomic_read and tx_poll_start under the vq-mutex, and 
   thus do
   the check under the lock as well.
  
  handle_rx:
  mutex_lock(vq-mutex);
  
  /* TODO: check that we are running from vhost_worker? */
  struct socket *sock = rcu_dereference_check(vq-private_data, 
   1);
  
  if (!sock)
  return;
  
  mutex_lock(vq-mutex);
   
   Can't we can do the check under the vq-mutex here?
   
   The rcu is still there but it makes the code easier to read. IMO, If we 
   want to
   use rcu, use it explicitly and avoid the vhost rcu completely. 
  
  The point is to make spurios wakeups as lightweight as possible.
  The seemed to happen a lot with -net.
  Should not happen with -scsi at all.
 
 I am wondering:
 
 1. Why there is a lot of spurios wakeups
 
 2. What performance impact it would give if we take the lock to check
vq-private_data. Sinc, at any time, either handle_tx or handle_rx
can be running. So we can almost always take the vq-mutex mutex.
Did you managed to measure real perf difference?

At some point when this was

Re: [PATCH V2 2/2] tcm_vhost: Use vq-private_data to indicate if the endpoint is setup

2013-03-28 Thread Michael S. Tsirkin

On Thu, Mar 28, 2013 at 04:10:02PM +0800, Asias He wrote:
 On Thu, Mar 28, 2013 at 08:16:59AM +0200, Michael S. Tsirkin wrote:
  On Thu, Mar 28, 2013 at 10:17:28AM +0800, Asias He wrote:
   Currently, vs-vs_endpoint is used indicate if the endpoint is setup or
   not. It is set or cleared in vhost_scsi_set_endpoint() or
   vhost_scsi_clear_endpoint() under the vs-dev.mutex lock. However, when
   we check it in vhost_scsi_handle_vq(), we ignored the lock.
   
   Instead of using the vs-vs_endpoint and the vs-dev.mutex lock to
   indicate the status of the endpoint, we use per virtqueue
   vq-private_data to indicate it. In this way, we can only take the
   vq-mutex lock which is per queue and make the concurrent multiqueue
   process having less lock contention. Further, in the read side of
   vq-private_data, we can even do not take only lock if it is accessed in
   the vhost worker thread, because it is protected by vhost rcu.
   
   Signed-off-by: Asias He as...@redhat.com
   ---
drivers/vhost/tcm_vhost.c | 38 +-
1 file changed, 33 insertions(+), 5 deletions(-)
   
   diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
   index 5e3d4487..0524267 100644
   --- a/drivers/vhost/tcm_vhost.c
   +++ b/drivers/vhost/tcm_vhost.c
   @@ -67,7 +67,6 @@ struct vhost_scsi {
 /* Protected by vhost_scsi-dev.mutex */
 struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
 char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
   - bool vs_endpoint;

 struct vhost_dev dev;
 struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ];
   @@ -91,6 +90,24 @@ static int iov_num_pages(struct iovec *iov)
((unsigned long)iov-iov_base  PAGE_MASK))  PAGE_SHIFT;
}

   +static bool tcm_vhost_check_endpoint(struct vhost_virtqueue *vq)
   +{
   + bool ret = false;
   +
   + /*
   +  * We can handle the vq only after the endpoint is setup by calling the
   +  * VHOST_SCSI_SET_ENDPOINT ioctl.
   +  *
   +  * TODO: Check that we are running from vhost_worker which acts
   +  * as read-side critical section for vhost kind of RCU.
   +  * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
   +  */
   + if (rcu_dereference_check(vq-private_data, 1))
   + ret = true;
   +
   + return ret;
   +}
   +
static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
{
 return 1;
   @@ -581,8 +598,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi 
   *vs,
 int head, ret;
 u8 target;

   - /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
   - if (unlikely(!vs-vs_endpoint))
   + if (!tcm_vhost_check_endpoint(vq))
 return;
  
  
  I would just move the check to under vq mutex,
  and avoid rcu completely. In vhost-net we are using
  private data outside lock so we can't do this,
  no such issue here.
 
 Are you talking about:
 
handle_tx:
/* TODO: check that we are running from vhost_worker? */
sock = rcu_dereference_check(vq-private_data, 1);
if (!sock)
return;

wmem = atomic_read(sock-sk-sk_wmem_alloc);
if (wmem = sock-sk-sk_sndbuf) {
mutex_lock(vq-mutex);
tx_poll_start(net, sock);
mutex_unlock(vq-mutex);
return;
}
mutex_lock(vq-mutex);
 
 Why not do the atomic_read and tx_poll_start under the vq-mutex, and thus do
 the check under the lock as well.

handle_rx:
mutex_lock(vq-mutex);

/* TODO: check that we are running from vhost_worker? */
struct socket *sock = rcu_dereference_check(vq-private_data, 1);

if (!sock)
return;

mutex_lock(vq-mutex);
 
 Can't we can do the check under the vq-mutex here?
 
 The rcu is still there but it makes the code easier to read. IMO, If we want 
 to
 use rcu, use it explicitly and avoid the vhost rcu completely. 
 
 mutex_lock(vq-mutex);
   @@ -829,11 +845,12 @@ static int vhost_scsi_set_endpoint(
sizeof(vs-vs_vhost_wwpn));
 for (i = 0; i  VHOST_SCSI_MAX_VQ; i++) {
 vq = vs-vqs[i];
   + /* Flushing the vhost_work acts as synchronize_rcu */
 mutex_lock(vq-mutex);
   + rcu_assign_pointer(vq-private_data, vs);
 vhost_init_used(vq);
 mutex_unlock(vq-mutex);
 }
   - vs-vs_endpoint = true;
 ret = 0;
 } else {
 ret = -EEXIST;
  
  
  There's also some weird smp_mb__after_atomic_inc() with no
  atomic in sight just above ... Nicholas what was the point there?
  
  
   @@ -849,6 +866,8 @@ static int vhost_scsi_clear_endpoint(
{
 struct tcm_vhost_tport *tv_tport;
 struct tcm_vhost_tpg *tv_tpg;
   + struct vhost_virtqueue *vq;
   + bool match = false;
 int index, ret, i;
 u8 target;

   @@

Re: [PATCH V3 WIP 3/3] disable vhost_verify_ring_mappings check

2013-03-28 Thread Paolo Bonzini

Il 28/03/2013 10:04, Michael S. Tsirkin ha scritto:
   Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
   Checking vq: 1 ring_phys: 0 ring_size: 1028 .
   Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
   Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
   Calling l: 5124 for start_addr: c for vq 2
   Unable to map ring buffer for ring 2
   l: 4096 ring_size: 5124
 okay so the ring address is within ROM.
 Unsurprisingly it fails.
 bios should stop device before write protect.
 

The above log is very early, when everything is RAM:

  vhost_set_memory: section: 0x7fe2801f2b60 section-size: 2146697216 add: 0
  Before vhost_verify_ring_mappings: start_addr: c size: 2146697216

The rings are not within ROM.  ROM is at 0xc-0xcc000 according to the
PAM registers.

The way I followed the debug output, Got ranges_overlap means 
actually bailing out because ranges do not overlap.  In particular, 
here all three virtqueues fail the test, because this is the ROM area 
0xc..0xc7fff:

  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 32768 add: 1
  Before vhost_verify_ring_mappings: start_addr: c size: 32768
  Checking vq: 0 ring_phys: 0 ring_size: 1028 .
  Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
  Checking vq: 1 ring_phys: 0 ring_size: 1028 .
  Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
  Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
  Got ranges_overlap for vq: 2 ring_phys: ed000 ring_size: 5124

Just below, vhost looks at the large RAM area starting at 0xc8000
(it's large because 0xf..0xf is still RAM):

  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 2146664448 add: 1
  Before vhost_verify_ring_mappings: start_addr: c8000 size: 2146664448
  Checking vq: 0 ring_phys: 0 ring_size: 1028 .
  Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
  Checking vq: 1 ring_phys: 0 ring_size: 1028 .
  Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
  Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
  Calling l: 5124 for start_addr: c8000 for vq 2

Here vq 0 and 1 fail the test because they are in low RAM, vq 2 passes.

After 0xf..0xf is marked readonly, vhost looks at the RAM
between 0xc9000 and 0xf:

  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 159744 add: 1
  Before vhost_verify_ring_mappings: start_addr: c9000 size: 159744
  Checking vq: 0 ring_phys: 0 ring_size: 1028 .
  Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
  Checking vq: 1 ring_phys: 0 ring_size: 1028 .
  Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
  Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
  Calling l: 5124 for start_addr: c9000 for vq 2

and the ROM between 0xf and 0xf, which no ring overlaps with:

  vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 65536 add: 1
  Before vhost_verify_ring_mappings: start_addr: f size: 65536
  Checking vq: 0 ring_phys: 0 ring_size: 1028 .
  Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
  Checking vq: 1 ring_phys: 0 ring_size: 1028 .
  Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
  Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
  Got ranges_overlap for vq: 2 ring_phys: ed000 ring_size: 5124



SeaBIOS is indeed not initializing vqs 0/1 (the control and event 
queues), so their ring_phys is 0.  But the one that is failing is vq 2, 
the first request queue.

Your patch seems good, but shouldn't fix this problem.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V3 WIP 3/3] disable vhost_verify_ring_mappings check

2013-03-28 Thread Paolo Bonzini

 I think it's the right thing to do, but maybe not the right place
 to do this, need to reset after all IO is done, before
 ring memory is write protected.

Our emails are crossing each other unfortunately, but I want to
reinforce this: ring memory is not write protected.  Remember that
SeaBIOS can even provide virtio-scsi access to DOS, so you must
not reset the device.  It must remain functional all the time,
and the OS's own driver will reset it when it's started.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 3/4 v2] Rename EMULATE_DO_PAPR to EMULATE_EXIT_USER

2013-03-28 Thread Alexander Graf


On 21.03.2013, at 07:25, Bharat Bhushan wrote:

 From: Bharat Bhushan bharat.bhus...@freescale.com
 
 Instruction emulation return EMULATE_DO_PAPR when it requires
 exit to userspace on book3s. Similar return is required
 for booke. EMULATE_DO_PAPR reads out to be confusing so it is
 renamed to EMULATE_EXIT_USER.

Please update the patch description.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

KVM EPT implementation

2013-03-28 Thread Tony Roberts

Hello list,

(Apologies if this appears twice!)

I'm currently doing some research into guest memory allocation,
specifically trying to determine when guests write data into certain
memory locations, and I'm trying to get my head around how KVM updates
the extended page tables, and where within the KVM code the actual
updates occur.  I'm working on an Intel box with VT extensions, and
Debian 3.6.6 kernel.

After going through the code, I can see that a lot of the existing
shadow page table code is resued, however I'm a little confused over
how exactly that is.

As an example, I can see the function vmx_set_cr3 (vmx.c) being
called, which is setting the host CR3 to the base of the PML4 table.

Then from that address, the EPTP is created, essentially setting the
bottom 12 bits to various flags.

Then, handle_ept_violation is called which contains the GPA that
generated the page fault.  I've looked into the function
kvm_mmu_page_fault which contains the value in the CR2, I'm assuming
this to be the guest's CR2 value, which I think is the guest physical
address that caused the page fault.

However this is where I lose the chase slightly.  I know from studying
the Intel developers manuals that the top level of the 4 level
hierarchy for the EPTs is the PML4 table, which can contain a maximum
of 512 64-bit entries, with each entry in turn pointing to the base
address of a PDPT.

The first address that the function pte_list_add sees is the base
address of the PML4 table, so I was expecting to be able to read 512
64-bit entries from that base address and see at least one 64-bit
entry written into that page.  However, after a number of different
attempts, I'm unable to determine the function that is actually
responsible for updating the EPTs.

I was hoping somebody might be able to point me to the correct
location within the KVM source code to track when EPT entries are
actually written to the various tables in the 4 level hierarchy.  The
function pte_list_add seems to do nothing more than change the value
of a pointer, but only the first address passed to it is page aligned
(the PML4 base) and the rest of the addresses appear to be pointers
into existing pages, often seeming to be outside of the PML4 page
range.

I might be completely misunderstanding something, but any advice on
how to effectively monitor EPT entries within KVM would be greatly
appreciated.

Thanks muchly.

Tony
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH] pmu: prepare for migration support

2013-03-28 Thread Paolo Bonzini

In order to migrate the PMU state correctly, we need to restore the
values of MSR_CORE_PERF_GLOBAL_STATUS (a read-only register) and
MSR_CORE_PERF_GLOBAL_OVF_CTRL (which has side effects when written).
We also need to write the full 40-bit value of the performance counter,
which would only be possible with a v3 architectural PMU's full-width
counter MSRs.

To distinguish host-initiated writes from the guest's, pass the
full struct msr_data to kvm_pmu_set_msr.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/pmu.c  | 14 +++---
 arch/x86/kvm/x86.c  |  4 ++--
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 36fba01..e2e09f3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1029,7 +1029,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu);
 void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
 bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index cfc258a..c53e797 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 
*data)
return 1;
 }
 
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
struct kvm_pmu *pmu = vcpu-arch.pmu;
struct kvm_pmc *pmc;
+   u32 index = msr_info-index;
+   u64 data = msr_info-data;
 
switch (index) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 
data)
}
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
+   if (msr_info-host_initiated) {
+   pmu-global_status = data;
+   return 0;
+   }
break; /* RO MSR */
case MSR_CORE_PERF_GLOBAL_CTRL:
if (pmu-global_ctrl == data)
@@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 
data)
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
if (!(data  (pmu-global_ctrl_mask  ~(3ull62 {
-   pmu-global_status = ~data;
+   if (!msr_info-host_initiated)
+   pmu-global_status = ~data;
pmu-global_ovf_ctrl = data;
return 0;
}
@@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 
data)
default:
if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
(pmc = get_fixed_pmc(pmu, index))) {
-   data = (s64)(s32)data;
+   if (!msr_info-host_initiated)
+   data = (s64)(s32)data;
pmc-counter += data - read_pmc(pmc);
return 0;
} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3e0a8ba..1d928af 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2042,7 +2042,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
if (kvm_pmu_msr(vcpu, msr))
-   return kvm_pmu_set_msr(vcpu, msr, data);
+   return kvm_pmu_set_msr(vcpu, msr_info);
 
if (pr || data != 0)
vcpu_unimpl(vcpu, disabled perfctr wrmsr: 
@@ -2088,7 +2088,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
if (msr  (msr == vcpu-kvm-arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
if (kvm_pmu_msr(vcpu, msr))
-   return kvm_pmu_set_msr(vcpu, msr, data);
+   return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) {
vcpu_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n,
msr, data);
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] pmu: prepare for migration support

2013-03-28 Thread Paolo Bonzini

In order to migrate the PMU state correctly, we need to restore the
values of MSR_CORE_PERF_GLOBAL_STATUS (a read-only register) and
MSR_CORE_PERF_GLOBAL_OVF_CTRL (which has side effects when written).
We also need to write the full 40-bit value of the performance counter,
 which would only be possible with a v3 architectural PMU's
 full-width counter MSRs.

 To distinguish host-initiated writes from the guest's, pass the
 full struct msr_data to kvm_pmu_set_msr.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/pmu.c  | 14 +++---
 arch/x86/kvm/x86.c  |  4 ++--
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 36fba01..e2e09f3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1029,7 +1029,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu);
 void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
 bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index cfc258a..c53e797 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 
*data)
return 1;
 }
 
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
struct kvm_pmu *pmu = vcpu-arch.pmu;
struct kvm_pmc *pmc;
+   u32 index = msr_info-index;
+   u64 data = msr_info-data;
 
switch (index) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 
data)
}
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
+   if (msr_info-host_initiated) {
+   pmu-global_status = data;
+   return 0;
+   }
break; /* RO MSR */
case MSR_CORE_PERF_GLOBAL_CTRL:
if (pmu-global_ctrl == data)
@@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 
data)
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
if (!(data  (pmu-global_ctrl_mask  ~(3ull62 {
-   pmu-global_status = ~data;
+   if (!msr_info-host_initiated)
+   pmu-global_status = ~data;
pmu-global_ovf_ctrl = data;
return 0;
}
@@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 
data)
default:
if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
(pmc = get_fixed_pmc(pmu, index))) {
-   data = (s64)(s32)data;
+   if (!msr_info-host_initiated)
+   data = (s64)(s32)data;
pmc-counter += data - read_pmc(pmc);
return 0;
} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3e0a8ba..1d928af 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2042,7 +2042,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
case MSR_P6_EVNTSEL0:
case MSR_P6_EVNTSEL1:
if (kvm_pmu_msr(vcpu, msr))
-   return kvm_pmu_set_msr(vcpu, msr, data);
+   return kvm_pmu_set_msr(vcpu, msr_info);
 
if (pr || data != 0)
vcpu_unimpl(vcpu, disabled perfctr wrmsr: 
@@ -2088,7 +2088,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
if (msr  (msr == vcpu-kvm-arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
if (kvm_pmu_msr(vcpu, msr))
-   return kvm_pmu_set_msr(vcpu, msr, data);
+   return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) {
vcpu_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n,
msr, data);
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support

2013-03-28 Thread Alexander Graf


On 21.03.2013, at 07:25, Bharat Bhushan wrote:

 From: Bharat Bhushan bharat.bhus...@freescale.com
 
 This patch adds the debug stub support on booke/bookehv.
 Now QEMU debug stub can use hw breakpoint, watchpoint and
 software breakpoint to debug guest.
 
 Debug registers are saved/restored on vcpu_put()/vcpu_get().
 Also the debug registers are saved restored only if guest is using
 debug resources.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 v2:
 - save/restore in vcpu_get()/vcpu_put()
 - some more minor cleanup based on review comments.
 
 arch/powerpc/include/asm/kvm_host.h |   10 ++
 arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
 arch/powerpc/kvm/booke.c|  252 ---
 arch/powerpc/kvm/e500_emulate.c |   10 ++
 4 files changed, 272 insertions(+), 22 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h 
 b/arch/powerpc/include/asm/kvm_host.h
 index f4ba881..8571952 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -504,7 +504,17 @@ struct kvm_vcpu_arch {
   u32 mmucfg;
   u32 epr;
   u32 crit_save;
 + /* guest debug registers*/
   struct kvmppc_booke_debug_reg dbg_reg;
 + /* shadow debug registers */
 + struct kvmppc_booke_debug_reg shadow_dbg_reg;
 + /* host debug registers*/
 + struct kvmppc_booke_debug_reg host_dbg_reg;
 + /*
 +  * Flag indicating that debug registers are used by guest
 +  * and requires save restore.
 + */
 + bool debug_save_restore;
 #endif
   gpa_t paddr_accessed;
   gva_t vaddr_accessed;
 diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
 b/arch/powerpc/include/uapi/asm/kvm.h
 index 15f9a00..d7ce449 100644
 --- a/arch/powerpc/include/uapi/asm/kvm.h
 +++ b/arch/powerpc/include/uapi/asm/kvm.h
 @@ -25,6 +25,7 @@
 /* Select powerpc specific features in linux/kvm.h */
 #define __KVM_HAVE_SPAPR_TCE
 #define __KVM_HAVE_PPC_SMT
 +#define __KVM_HAVE_GUEST_DEBUG
 
 struct kvm_regs {
   __u64 pc;
 @@ -267,7 +268,24 @@ struct kvm_fpu {
   __u64 fpr[32];
 };
 
 +/*
 + * Defines for h/w breakpoint, watchpoint (read, write or both) and
 + * software breakpoint.
 + * These are used as type in KVM_SET_GUEST_DEBUG ioctl and status
 + * for KVM_DEBUG_EXIT.
 + */
 +#define KVMPPC_DEBUG_NONE0x0
 +#define KVMPPC_DEBUG_BREAKPOINT  (1UL  1)
 +#define KVMPPC_DEBUG_WATCH_WRITE (1UL  2)
 +#define KVMPPC_DEBUG_WATCH_READ  (1UL  3)
 struct kvm_debug_exit_arch {
 + __u64 address;
 + /*
 +  * exiting to userspace because of h/w breakpoint, watchpoint
 +  * (read, write or both) and software breakpoint.
 +  */
 + __u32 status;
 + __u32 reserved;
 };
 
 /* for KVM_SET_GUEST_DEBUG */
 @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
* Type denotes h/w breakpoint, read watchpoint, write
* watchpoint or watchpoint (both read and write).
*/
 -#define KVMPPC_DEBUG_NOTYPE  0x0
 -#define KVMPPC_DEBUG_BREAKPOINT  (1UL  1)
 -#define KVMPPC_DEBUG_WATCH_WRITE (1UL  2)
 -#define KVMPPC_DEBUG_WATCH_READ  (1UL  3)
   __u32 type;
   __u32 reserved;
   } bp[16];
 diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
 index 1de93a8..bf20056 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 @@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
 +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
 +{
 + /* Synchronize guest's desire to get debug interrupts into shadow MSR */
 +#ifndef CONFIG_KVM_BOOKE_HV
 + vcpu-arch.shadow_msr = ~MSR_DE;
 + vcpu-arch.shadow_msr |= vcpu-arch.shared-msr  MSR_DE;
 +#endif
 +
 + /* Force enable debug interrupts when user space wants to debug */
 + if (vcpu-guest_debug) {
 +#ifdef CONFIG_KVM_BOOKE_HV
 + /*
 +  * Since there is no shadow MSR, sync MSR_DE into the guest
 +  * visible MSR. Do not allow guest to change MSR[DE].
 +  */
 + vcpu-arch.shared-msr |= MSR_DE;
 + mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);

This mtspr should really just be a bit or in shadow_mspr when guest_debug gets 
enabled. It should automatically get synchronized as soon as the next 
vpcu_load() happens.

Also, what happens when user space disables guest_debug?

 +#else
 + vcpu-arch.shadow_msr |= MSR_DE;
 + vcpu-arch.shared-msr = ~MSR_DE;
 +#endif
 + }
 +}
 +
 /*
  * Helper function for full MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
 @@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
   kvmppc_mmu_msr_notify(vcpu, old_msr);
   kvmppc_vcpu_sync_spe(vcpu);
   kvmppc_vcpu_sync_fpu(vcpu);
 + kvmppc_vcpu_sync_debug(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu

Re: [PATCH] tcm_vhost: Avoid VIRTIO_RING_F_EVENT_IDX feature bit

2013-03-28 Thread Michael S. Tsirkin

On Wed, Mar 27, 2013 at 11:25:02PM -0700, Nicholas A. Bellinger wrote:
 On Thu, 2013-03-28 at 08:04 +0200, Michael S. Tsirkin wrote:
  On Thu, Mar 28, 2013 at 12:27:10AM +, Nicholas A. Bellinger wrote:
   From: Nicholas Bellinger n...@linux-iscsi.org
   
   This patch adds a VHOST_TCM_FEATURES mask minus VIRTIO_RING_F_EVENT_IDX
   so that vhost-scsi-pci userspace will strip this feature bit once
   GET_FEATURES reports it as being unsupported on the host.
   
   This is to avoid a bug where -handle_kicks() are missed when EVENT_IDX
   is enabled by default in userspace code.
   
   Cc: Michael S. Tsirkin m...@redhat.com
   Cc: Asias He as...@redhat.com
   Cc: Paolo Bonzini pbonz...@redhat.com
   Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
  
  No chance we can debug this properly?
  
 
 Well, I assumed this was a low priority item ahead of tracking down the
 other blocking seabios related issue.
 
 I'm fine with dropping this patch and requiring the event_idx=off option
 for now for an initial QEMU merge, but thought this made things a little
 easier for CLI users to avoid confusion.
 
   ---
drivers/vhost/tcm_vhost.c |   10 --
1 files changed, 8 insertions(+), 2 deletions(-)
   
   diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
   index 0524267..b127edc 100644
   --- a/drivers/vhost/tcm_vhost.c
   +++ b/drivers/vhost/tcm_vhost.c
   @@ -60,6 +60,12 @@ enum {
 VHOST_SCSI_VQ_IO = 2,
};

   +enum {
   + VHOST_TCM_FEATURES = (1ULL  VIRTIO_F_NOTIFY_ON_EMPTY) |
   +  (1ULL  VIRTIO_RING_F_INDIRECT_DESC) |
   +  (1ULL  VHOST_F_LOG_ALL)
  
  All the rest of the code uses VHOST_SCSI so rename this too?
  
   +};
   +
  
  Please do something like
  
  +/*
  +* VIRTIO_RING_F_EVENT_IDX seems broken. Not sure the bug is in
  +* kernel but disabling it helps.
  +* TODO: debug and remove the workaround.
  +*/
  +enum {
  + VHOST_SCSI_FEATURES = VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)
  +}
  
 
 Patch updated.
 
 Thanks,
 
 --nab


Still haven't seen it, anyway
Acked-by: Michael S. Tsirkin m...@redhat.com



#define VHOST_SCSI_MAX_TARGET256
#define VHOST_SCSI_MAX_VQ128

   @@ -981,7 +987,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)

static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
{
   - if (features  ~VHOST_FEATURES)
   + if (features  ~VHOST_TCM_FEATURES)
 return -EOPNOTSUPP;

 mutex_lock(vs-dev.mutex);
   @@ -1027,7 +1033,7 @@ static long vhost_scsi_ioctl(struct file *f, 
   unsigned int ioctl,
 return -EFAULT;
 return 0;
 case VHOST_GET_FEATURES:
   - features = VHOST_FEATURES;
   + features = VHOST_TCM_FEATURES;
 if (copy_to_user(featurep, features, sizeof features))
 return -EFAULT;
 return 0;
   -- 
   1.7.2.5
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] tcm_vhost: Avoid VIRTIO_RING_F_EVENT_IDX feature bit

2013-03-28 Thread Michael S. Tsirkin

On Wed, Mar 27, 2013 at 11:25:02PM -0700, Nicholas A. Bellinger wrote:
 On Thu, 2013-03-28 at 08:04 +0200, Michael S. Tsirkin wrote:
  On Thu, Mar 28, 2013 at 12:27:10AM +, Nicholas A. Bellinger wrote:
   From: Nicholas Bellinger n...@linux-iscsi.org
   
   This patch adds a VHOST_TCM_FEATURES mask minus VIRTIO_RING_F_EVENT_IDX
   so that vhost-scsi-pci userspace will strip this feature bit once
   GET_FEATURES reports it as being unsupported on the host.
   
   This is to avoid a bug where -handle_kicks() are missed when EVENT_IDX
   is enabled by default in userspace code.
   
   Cc: Michael S. Tsirkin m...@redhat.com
   Cc: Asias He as...@redhat.com
   Cc: Paolo Bonzini pbonz...@redhat.com
   Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
  
  No chance we can debug this properly?
  
 
 Well, I assumed this was a low priority item ahead of tracking down the
 other blocking seabios related issue.
 
 I'm fine with dropping this patch and requiring the event_idx=off option
 for now for an initial QEMU merge, but thought this made things a little
 easier for CLI users to avoid confusion.

Yea, we can't drink the ocean.

   ---
drivers/vhost/tcm_vhost.c |   10 --
1 files changed, 8 insertions(+), 2 deletions(-)
   
   diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
   index 0524267..b127edc 100644
   --- a/drivers/vhost/tcm_vhost.c
   +++ b/drivers/vhost/tcm_vhost.c
   @@ -60,6 +60,12 @@ enum {
 VHOST_SCSI_VQ_IO = 2,
};

   +enum {
   + VHOST_TCM_FEATURES = (1ULL  VIRTIO_F_NOTIFY_ON_EMPTY) |
   +  (1ULL  VIRTIO_RING_F_INDIRECT_DESC) |
   +  (1ULL  VHOST_F_LOG_ALL)
  
  All the rest of the code uses VHOST_SCSI so rename this too?
  
   +};
   +
  
  Please do something like
  
  +/*
  +* VIRTIO_RING_F_EVENT_IDX seems broken. Not sure the bug is in
  +* kernel but disabling it helps.
  +* TODO: debug and remove the workaround.
  +*/
  +enum {
  + VHOST_SCSI_FEATURES = VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)
  +}
  
 
 Patch updated.
 
 Thanks,
 
 --nab
 
#define VHOST_SCSI_MAX_TARGET256
#define VHOST_SCSI_MAX_VQ128

   @@ -981,7 +987,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)

static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
{
   - if (features  ~VHOST_FEATURES)
   + if (features  ~VHOST_TCM_FEATURES)
 return -EOPNOTSUPP;

 mutex_lock(vs-dev.mutex);
   @@ -1027,7 +1033,7 @@ static long vhost_scsi_ioctl(struct file *f, 
   unsigned int ioctl,
 return -EFAULT;
 return 0;
 case VHOST_GET_FEATURES:
   - features = VHOST_FEATURES;
   + features = VHOST_TCM_FEATURES;
 if (copy_to_user(featurep, features, sizeof features))
 return -EFAULT;
 return 0;
   -- 
   1.7.2.5
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/3] vhost-scsi: new device supporting the tcm_vhost Linux kernel module

2013-03-28 Thread Michael S. Tsirkin

On Wed, Mar 27, 2013 at 11:28:56PM -0700, Nicholas A. Bellinger wrote:
 On Thu, 2013-03-28 at 08:19 +0200, Michael S. Tsirkin wrote:
  On Wed, Mar 27, 2013 at 04:24:22PM -0700, Nicholas A. Bellinger wrote:
   On Thu, 2013-03-28 at 00:53 +0200, Michael S. Tsirkin wrote:
On Thu, Mar 28, 2013 at 12:50:21AM +0200, Michael S. Tsirkin wrote:
 On Wed, Mar 27, 2013 at 03:45:42PM -0700, Nicholas A. Bellinger wrote:
  On Thu, 2013-03-28 at 00:28 +0200, Michael S. Tsirkin wrote:
   On Wed, Mar 27, 2013 at 09:59:45PM +, Nicholas A. Bellinger 
   wrote:
From: Paolo Bonzini pbonz...@redhat.com

The WWPN specified in configfs is passed to -device 
vhost-scsi-pci.
The tgpt field of the SET_ENDPOINT ioctl is obsolete now, so it 
is not
available from the QEMU command-line.  Instead, I hardcode it 
to zero.

Changes in V4:
   - Set event_idx=off by default (nab, thanks asias)
   
   Why? What's going on here?
   
  
  Not disabling event_idx by default, or disabling from the command 
  line
  ends up resulting in -handle_kick() not getting called for 
  subsequent
  commands..
  
  I spent some time trying to track this down recently with no luck, 
  and
  AFAICT it's always been required in order for vhost-scsi to 
  function.
 
 Hmm this is a bug in kernel then. A better work-around is
 to disable EVENT_IDX in kernel. Let's do it for 3.9?

But before we do, can you check that SET_FEATURES is
called with this bit set if you enable event_idx?
If not that's your bug then ...

   
   Ok, SET_FEATURES is currently not setting any bits at all based upon
   vhost_dev-features, so it looks like a vhost-scsi-pci bug..
   
   
  
  If you call SET_FEATURES, does it start working even without
  disabling EVENT_IDX in kernel?
  
 
 Unfortunately, no.  The same strangeness persists unless event_idx=off
 is passed, or EVENT_IDX is disabled in the kernel and stripped off with
 the vhost_scsi_get_features() patch.
 


I think it's important to fix it (usually exposes other bugs)
but I agree, lets get it working without first.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH-v2] tcm_vhost: Avoid VIRTIO_RING_F_EVENT_IDX feature bit

2013-03-28 Thread Nicholas A. Bellinger

From: Nicholas Bellinger n...@linux-iscsi.org

This patch adds a VHOST_SCSI_FEATURES mask minus VIRTIO_RING_F_EVENT_IDX
so that vhost-scsi-pci userspace will strip this feature bit once
GET_FEATURES reports it as being unsupported on the host.

This is to avoid a bug where -handle_kicks() are missed when EVENT_IDX
is enabled by default in userspace code.

(mst: Rename to VHOST_SCSI_FEATURES add comment)

Cc: Michael S. Tsirkin m...@redhat.com
Cc: Asias He as...@redhat.com
Cc: Paolo Bonzini pbonz...@redhat.com
Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
---
 drivers/vhost/tcm_vhost.c |   13 +++--
 1 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 0524267..4a68c1f 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -60,6 +60,15 @@ enum {
VHOST_SCSI_VQ_IO = 2,
 };
 
+/*
+ * VIRTIO_RING_F_EVENT_IDX seems broken. Not sure the bug is in
+ * kernel but disabling it helps.
+ * TODO: debug and remove the workaround.
+ */
+enum {
+   VHOST_SCSI_FEATURES = VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)
+};
+
 #define VHOST_SCSI_MAX_TARGET  256
 #define VHOST_SCSI_MAX_VQ  128
 
@@ -981,7 +990,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
 
 static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
 {
-   if (features  ~VHOST_FEATURES)
+   if (features  ~VHOST_SCSI_FEATURES)
return -EOPNOTSUPP;
 
mutex_lock(vs-dev.mutex);
@@ -1027,7 +1036,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int 
ioctl,
return -EFAULT;
return 0;
case VHOST_GET_FEATURES:
-   features = VHOST_FEATURES;
+   features = VHOST_SCSI_FEATURES;
if (copy_to_user(featurep, features, sizeof features))
return -EFAULT;
return 0;
-- 
1.7.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] tcm_vhost: Avoid VIRTIO_RING_F_EVENT_IDX feature bit

2013-03-28 Thread Nicholas A. Bellinger

On Thu, 2013-03-28 at 18:44 +0200, Michael S. Tsirkin wrote:
 On Wed, Mar 27, 2013 at 11:25:02PM -0700, Nicholas A. Bellinger wrote:
  On Thu, 2013-03-28 at 08:04 +0200, Michael S. Tsirkin wrote:
   On Thu, Mar 28, 2013 at 12:27:10AM +, Nicholas A. Bellinger wrote:
From: Nicholas Bellinger n...@linux-iscsi.org

This patch adds a VHOST_TCM_FEATURES mask minus VIRTIO_RING_F_EVENT_IDX
so that vhost-scsi-pci userspace will strip this feature bit once
GET_FEATURES reports it as being unsupported on the host.

This is to avoid a bug where -handle_kicks() are missed when EVENT_IDX
is enabled by default in userspace code.

Cc: Michael S. Tsirkin m...@redhat.com
Cc: Asias He as...@redhat.com
Cc: Paolo Bonzini pbonz...@redhat.com
Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
   
   No chance we can debug this properly?
   
  
  Well, I assumed this was a low priority item ahead of tracking down the
  other blocking seabios related issue.
  
  I'm fine with dropping this patch and requiring the event_idx=off option
  for now for an initial QEMU merge, but thought this made things a little
  easier for CLI users to avoid confusion.
  
---
 drivers/vhost/tcm_vhost.c |   10 --
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 0524267..b127edc 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -60,6 +60,12 @@ enum {
VHOST_SCSI_VQ_IO = 2,
 };
 
+enum {
+   VHOST_TCM_FEATURES = (1ULL  VIRTIO_F_NOTIFY_ON_EMPTY) |
+(1ULL  VIRTIO_RING_F_INDIRECT_DESC) |
+(1ULL  VHOST_F_LOG_ALL)
   
   All the rest of the code uses VHOST_SCSI so rename this too?
   
+};
+
   
   Please do something like
   
   +/*
   +* VIRTIO_RING_F_EVENT_IDX seems broken. Not sure the bug is in
   +* kernel but disabling it helps.
   +* TODO: debug and remove the workaround.
   +*/
   +enum {
   + VHOST_SCSI_FEATURES = VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)
   +}
   
  
  Patch updated.
  
  Thanks,
  
  --nab
 
 
 Still haven't seen it, anyway

Apologies, I had to attend to some other items this afternoon..

Sent this updated patch out a moment ago.

 Acked-by: Michael S. Tsirkin m...@redhat.com
 
 

Thanks, including this now.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH-v2 0/2] Add support for vhost-scsi-pci

2013-03-28 Thread Nicholas A. Bellinger

From: Nicholas Bellinger n...@linux-iscsi.org

Hi QEMU folks,

The following is the patch-v2 series to support vhost-scsi-pci within
the upstream QEMU tree.

This includes the refactoring of existing virtio-scsi code from Paolo
to allow a VirtIOSCSICommon structure that is shared amoungst existing
virtio-scsi-pci device and new vhost-scsi-pci device code.

Currently this code requires Asias's seabios bugfixes (commit 5a7730db5
and b44a7be17), and two other tcm_vhost patches to reset vq-last_used_idx
after seabios handoff and disable EVENT_IDX from GET_FEATURES that's
currently in flight for v3.9.0 kernel code.

There is a seperate issue with seabios virtio-scsi that's causing a failure
in vhost_verify_ring_mappings() is still being tracked down, but this series
should now be getting close to a merge minus this last outstanding item.

Changes in Patch-v2:
   - Move -get_features() assignment to virtio_scsi_init() instead of
 virtio_scsi_init_common() (nab)
   - Add vhost_scsi_get_features() in order to determine feature bits
 supports by host kernel (mst + nab)
   - Re-enable usage of DEFINE_VIRTIO_COMMON_FEATURES, and allow
 EVENT_IDX to be disabled by host in vhost_scsi_get_features() (mst)
   - Drop unused hotplug bit in DEFINE_VHOST_SCSI_PROPERTIES (mst)
   - Drop vhost_verify_ring_mappings() enable-only hack (mst)

A big thanks to Paolo, Asias, MST, and Stefan for all of their efforts
on this series.

Thank you,

Paolo Bonzini (2):
  virtio-scsi: create VirtIOSCSICommon
  vhost-scsi: new device supporting the tcm_vhost Linux kernel module

 configure  |   15 +++-
 hw/Makefile.objs   |5 +-
 hw/s390x/s390-virtio-bus.c |   35 ++
 hw/vhost-scsi.c|  264 
 hw/vhost-scsi.h|   64 +++
 hw/virtio-pci.c|   62 ++
 hw/virtio-scsi.c   |  192 
 hw/virtio-scsi.h   |  132 +-
 include/qemu/osdep.h   |4 +
 9 files changed, 623 insertions(+), 150 deletions(-)
 create mode 100644 hw/vhost-scsi.c
 create mode 100644 hw/vhost-scsi.h

-- 
1.7.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH-v2 2/2] vhost-scsi: new device supporting the tcm_vhost Linux kernel module

2013-03-28 Thread Nicholas A. Bellinger

From: Paolo Bonzini pbonz...@redhat.com

The WWPN specified in configfs is passed to -device vhost-scsi-pci.
The tgpt field of the SET_ENDPOINT ioctl is obsolete now, so it is not
available from the QEMU command-line.  Instead, I hardcode it to zero.

Changes in Patch-v2:
   - Add vhost_scsi_get_features() in order to determine feature bits
 supports by host kernel (mst + nab)
   - Re-enable usage of DEFINE_VIRTIO_COMMON_FEATURES, and allow
 EVENT_IDX to be disabled by host in vhost_scsi_get_features()
   - Drop unused hotplug bit in DEFINE_VHOST_SCSI_PROPERTIES

Changes in Patch-v1:
   - Set event_idx=off by default (nab, thanks asias)
   - Disable hotplug feature bit for v3.9 tcm_vhost kernel code, need to
 re-enable in v3.10 (nab)
   - Update to latest qemu.git/master HEAD

Changes in WIP-V3:
   - Drop ioeventfd vhost_scsi_properties (asias, thanks stefanha)
   - Add CONFIG_VHOST_SCSI (asias, thanks stefanha)
   - Add hotplug feature bit

Changes in WIP-V2:
   - Add backend guest masking support (nab)
   - Bump ABI_VERSION to 1 (nab)
   - Set up set_guest_notifiers (asias)
   - Set up vs-dev.vq_index (asias)
   - Drop vs-vs.vdev.{set,clear}_vhost_endpoint (asias)
   - Drop VIRTIO_CONFIG_S_DRIVER check in vhost_scsi_set_status (asias)

Howto:
   Use the latest seabios, at least commit b44a7be17b
   git clone git://git.seabios.org/seabios.git
   make
   cp out/bios.bin /usr/share/qemu/bios.bin
   qemu -device vhost-scsi-pci,wwpn=naa.6001405bd4e8476d,event_idx=off
...

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
Signed-off-by: Asias He as...@redhat.com
Cc: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
---
 configure  |   15 +++-
 hw/Makefile.objs   |5 +-
 hw/s390x/s390-virtio-bus.c |   35 ++
 hw/vhost-scsi.c|  264 
 hw/vhost-scsi.h|   64 +++
 hw/virtio-pci.c|   62 ++
 hw/virtio-scsi.h   |2 +
 7 files changed, 445 insertions(+), 2 deletions(-)
 create mode 100644 hw/vhost-scsi.c
 create mode 100644 hw/vhost-scsi.h

diff --git a/configure b/configure
index f2af714..6b67e35 100755
--- a/configure
+++ b/configure
@@ -169,6 +169,7 @@ libattr=
 xfs=
 
 vhost_net=no
+vhost_scsi=no
 kvm=no
 gprof=no
 debug_tcg=no
@@ -532,6 +533,7 @@ Haiku)
   usb=linux
   kvm=yes
   vhost_net=yes
+  vhost_scsi=yes
   if [ $cpu = i386 -o $cpu = x86_64 ] ; then
 audio_possible_drivers=$audio_possible_drivers fmod
   fi
@@ -858,6 +860,10 @@ for opt do
   ;;
   --enable-vhost-net) vhost_net=yes
   ;;
+  --disable-vhost-scsi) vhost_scsi=no
+  ;;
+  --enable-vhost-scsi) vhost_scsi=yes
+  ;;
   --disable-glx) glx=no
   ;;
   --enable-glx) glx=yes
@@ -3078,7 +3084,7 @@ fi
 # __sync_fetch_and_and requires at least -march=i486. Many toolchains
 # use i686 as default anyway, but for those that don't, an explicit
 # specification is necessary
-if test $vhost_net = yes  test $cpu = i386; then
+if (test $vhost_net = yes -o $vhost_scsi = yes)  test $cpu = 
i386; then
   cat  $TMPC  EOF
 static int sfaa(int *ptr)
 {
@@ -3424,6 +3430,7 @@ echo sigev_thread_id   $sigev_thread_id
 echo uuid support  $uuid
 echo libcap-ng support $cap_ng
 echo vhost-net support $vhost_net
+echo vhost-scsi support $vhost_scsi
 echo Trace backend $trace_backend
 echo Trace output file $trace_file-pid
 echo spice support $spice ($spice_protocol_version/$spice_server_version)
@@ -3697,6 +3704,9 @@ fi
 if test $virtfs = yes ; then
   echo CONFIG_VIRTFS=y  $config_host_mak
 fi
+if test $vhost_scsi = yes ; then
+  echo CONFIG_VHOST_SCSI=y  $config_host_mak
+fi
 if test $blobs = yes ; then
   echo INSTALL_BLOBS=yes  $config_host_mak
 fi
@@ -4175,6 +4185,9 @@ case $target_arch2 in
   if test $vhost_net = yes ; then
 echo CONFIG_VHOST_NET=y  $config_target_mak
   fi
+  if test $vhost_scsi = yes ; then
+echo CONFIG_VHOST_SCSI=y  $config_target_mak
+  fi
 fi
 esac
 case $target_arch2 in
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index d0b2ecb..0a38eb5 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -205,8 +205,11 @@ common-obj-$(CONFIG_XEN_BACKEND) += xen_console.o xenfb.o 
xen_disk.o xen_nic.o
 obj-$(CONFIG_VIRTIO) += dataplane/
 obj-$(CONFIG_VIRTIO) += virtio.o virtio-blk.o virtio-balloon.o virtio-net.o
 obj-$(CONFIG_VIRTIO) += virtio-serial-bus.o virtio-scsi.o
+ifeq ($(CONFIG_VIRTIO), y)
+obj-$(CONFIG_LINUX) += vhost-scsi.o
+endif
 obj-$(CONFIG_SOFTMMU) += vhost_net.o
-obj-$(CONFIG_VHOST_NET) += vhost.o
+obj-$(CONFIG_LINUX) += vhost.o
 obj-$(CONFIG_REALLY_VIRTFS) += 9pfs/
 obj-$(CONFIG_VGA) += vga.o
 
diff --git a/hw/s390x/s390-virtio-bus.c b/hw/s390x/s390-virtio-bus.c
index c5d5456..73d93fd 100644
--- a/hw/s390x/s390-virtio-bus.c
+++ b/hw/s390x/s390-virtio-bus.c
@@ -28,6 +28,8 @@
 #include hw/virtio-rng.h
 #include hw/virtio-serial.h
 #include hw/virtio-net.h
+#include hw/virtio-scsi.h
+#include hw/vhost-scsi.h

[PATCH-v2 1/2] virtio-scsi: create VirtIOSCSICommon

2013-03-28 Thread Nicholas A. Bellinger

From: Paolo Bonzini pbonz...@redhat.com

This patch refactors existing virtio-scsi code into VirtIOSCSICommon
in order to allow virtio_scsi_init_common() to be used by both internal
virtio_scsi_init() and external vhost-scsi-pci code.

Changes in Patch-v2:
   - Move -get_features() assignment to virtio_scsi_init() instead of
 virtio_scsi_init_common()

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
Cc: Michael S. Tsirkin m...@redhat.com
Cc: Asias He as...@redhat.com
Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
---
 hw/virtio-scsi.c |  192 +-
 hw/virtio-scsi.h |  130 --
 include/qemu/osdep.h |4 +
 3 files changed, 178 insertions(+), 148 deletions(-)

diff --git a/hw/virtio-scsi.c b/hw/virtio-scsi.c
index 8620712..c59e9c6 100644
--- a/hw/virtio-scsi.c
+++ b/hw/virtio-scsi.c
@@ -18,118 +18,6 @@
 #include hw/scsi.h
 #include hw/scsi-defs.h
 
-#define VIRTIO_SCSI_VQ_SIZE 128
-#define VIRTIO_SCSI_CDB_SIZE32
-#define VIRTIO_SCSI_SENSE_SIZE  96
-#define VIRTIO_SCSI_MAX_CHANNEL 0
-#define VIRTIO_SCSI_MAX_TARGET  255
-#define VIRTIO_SCSI_MAX_LUN 16383
-
-/* Response codes */
-#define VIRTIO_SCSI_S_OK   0
-#define VIRTIO_SCSI_S_OVERRUN  1
-#define VIRTIO_SCSI_S_ABORTED  2
-#define VIRTIO_SCSI_S_BAD_TARGET   3
-#define VIRTIO_SCSI_S_RESET4
-#define VIRTIO_SCSI_S_BUSY 5
-#define VIRTIO_SCSI_S_TRANSPORT_FAILURE6
-#define VIRTIO_SCSI_S_TARGET_FAILURE   7
-#define VIRTIO_SCSI_S_NEXUS_FAILURE8
-#define VIRTIO_SCSI_S_FAILURE  9
-#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED   10
-#define VIRTIO_SCSI_S_FUNCTION_REJECTED11
-#define VIRTIO_SCSI_S_INCORRECT_LUN12
-
-/* Controlq type codes.  */
-#define VIRTIO_SCSI_T_TMF  0
-#define VIRTIO_SCSI_T_AN_QUERY 1
-#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2
-
-/* Valid TMF subtypes.  */
-#define VIRTIO_SCSI_T_TMF_ABORT_TASK   0
-#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET   1
-#define VIRTIO_SCSI_T_TMF_CLEAR_ACA2
-#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET   3
-#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET  4
-#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET   5
-#define VIRTIO_SCSI_T_TMF_QUERY_TASK   6
-#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET   7
-
-/* Events.  */
-#define VIRTIO_SCSI_T_EVENTS_MISSED0x8000
-#define VIRTIO_SCSI_T_NO_EVENT 0
-#define VIRTIO_SCSI_T_TRANSPORT_RESET  1
-#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2
-#define VIRTIO_SCSI_T_PARAM_CHANGE 3
-
-/* Reasons for transport reset event */
-#define VIRTIO_SCSI_EVT_RESET_HARD 0
-#define VIRTIO_SCSI_EVT_RESET_RESCAN   1
-#define VIRTIO_SCSI_EVT_RESET_REMOVED  2
-
-/* SCSI command request, followed by data-out */
-typedef struct {
-uint8_t lun[8];  /* Logical Unit Number */
-uint64_t tag;/* Command identifier */
-uint8_t task_attr;   /* Task attribute */
-uint8_t prio;
-uint8_t crn;
-uint8_t cdb[];
-} QEMU_PACKED VirtIOSCSICmdReq;
-
-/* Response, followed by sense data and data-in */
-typedef struct {
-uint32_t sense_len;  /* Sense data length */
-uint32_t resid;  /* Residual bytes in data buffer */
-uint16_t status_qualifier;   /* Status qualifier */
-uint8_t status;  /* Command completion status */
-uint8_t response;/* Response values */
-uint8_t sense[];
-} QEMU_PACKED VirtIOSCSICmdResp;
-
-/* Task Management Request */
-typedef struct {
-uint32_t type;
-uint32_t subtype;
-uint8_t lun[8];
-uint64_t tag;
-} QEMU_PACKED VirtIOSCSICtrlTMFReq;
-
-typedef struct {
-uint8_t response;
-} QEMU_PACKED VirtIOSCSICtrlTMFResp;
-
-/* Asynchronous notification query/subscription */
-typedef struct {
-uint32_t type;
-uint8_t lun[8];
-uint32_t event_requested;
-} QEMU_PACKED VirtIOSCSICtrlANReq;
-
-typedef struct {
-uint32_t event_actual;
-uint8_t response;
-} QEMU_PACKED VirtIOSCSICtrlANResp;
-
-typedef struct {
-uint32_t event;
-uint8_t lun[8];
-uint32_t reason;
-} QEMU_PACKED VirtIOSCSIEvent;
-
-typedef struct {
-uint32_t num_queues;
-uint32_t seg_max;
-uint32_t max_sectors;
-uint32_t cmd_per_lun;
-uint32_t event_info_size;
-uint32_t sense_size;
-uint32_t cdb_size;
-uint16_t max_channel;
-uint16_t max_target;
-uint32_t max_lun;
-} QEMU_PACKED VirtIOSCSIConfig;
-
 typedef struct VirtIOSCSIReq {
 VirtIOSCSI *dev;
 VirtQueue *vq;
@@ -178,7 +66,7 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
 scsi_req_unref(req-sreq);
 }
 g_free(req);
-virtio_notify(s-vdev, vq);
+virtio_notify(s-vs.vdev, vq);
 }
 
 static void virtio_scsi_bad_req(void)

Re: [PATCH 2/4 v2] KVM: PPC: debug stub interface parameter defined

2013-03-28 Thread Alexander Graf


On 21.03.2013, at 07:24, Bharat Bhushan wrote:

 From: Bharat Bhushan bharat.bhus...@freescale.com
 
 This patch defines the interface parameter for KVM_SET_GUEST_DEBUG
 ioctl support. Follow up patches will use this for setting up
 hardware breakpoints, watchpoints and software breakpoints.
 
 Also kvm_arch_vcpu_ioctl_set_guest_debug() is brought one level below.
 This is because I am not sure what is required for book3s. So this ioctl
 behaviour will not change for book3s.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 v2:
 - No Change
 
 arch/powerpc/include/uapi/asm/kvm.h |   23 +++
 arch/powerpc/kvm/book3s.c   |6 ++
 arch/powerpc/kvm/booke.c|6 ++
 arch/powerpc/kvm/powerpc.c  |6 --
 4 files changed, 35 insertions(+), 6 deletions(-)
 
 diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
 b/arch/powerpc/include/uapi/asm/kvm.h
 index c2ff99c..15f9a00 100644
 --- a/arch/powerpc/include/uapi/asm/kvm.h
 +++ b/arch/powerpc/include/uapi/asm/kvm.h
 @@ -272,8 +272,31 @@ struct kvm_debug_exit_arch {
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
 + struct {
 + /* H/W breakpoint/watchpoint address */
 + __u64 addr;
 + /*
 +  * Type denotes h/w breakpoint, read watchpoint, write
 +  * watchpoint or watchpoint (both read and write).
 +  */
 +#define KVMPPC_DEBUG_NOTYPE  0x0
 +#define KVMPPC_DEBUG_BREAKPOINT  (1UL  1)
 +#define KVMPPC_DEBUG_WATCH_WRITE (1UL  2)
 +#define KVMPPC_DEBUG_WATCH_READ  (1UL  3)

Are you sure you want to introduce these here, just to remove them again in a 
later patch?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/4 v2] Added ONE_REG interface for debug instruction

2013-03-28 Thread Alexander Graf


On 21.03.2013, at 07:24, Bharat Bhushan wrote:

 This patch adds the one_reg interface to get the special instruction
 to be used for setting software breakpoint from userspace.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com

Thanks, applied to kvm-ppc-queue.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: KVM EPT implementation

2013-03-28 Thread Zhang, Yang Z

Tony Roberts wrote on 2013-03-29:
 Hello list,
 
 (Apologies if this appears twice!)
 
 I'm currently doing some research into guest memory allocation,
 specifically trying to determine when guests write data into certain
 memory locations, and I'm trying to get my head around how KVM updates
 the extended page tables, and where within the KVM code the actual
 updates occur.  I'm working on an Intel box with VT extensions, and
 Debian 3.6.6 kernel.
 
 After going through the code, I can see that a lot of the existing
 shadow page table code is resued, however I'm a little confused over
 how exactly that is.
 
 As an example, I can see the function vmx_set_cr3 (vmx.c) being
 called, which is setting the host CR3 to the base of the PML4 table.
 
 Then from that address, the EPTP is created, essentially setting the
 bottom 12 bits to various flags.
 
 Then, handle_ept_violation is called which contains the GPA that
 generated the page fault.  I've looked into the function
 kvm_mmu_page_fault which contains the value in the CR2, I'm assuming
 this to be the guest's CR2 value, which I think is the guest physical
 address that caused the page fault.
 
 However this is where I lose the chase slightly.  I know from studying
 the Intel developers manuals that the top level of the 4 level
 hierarchy for the EPTs is the PML4 table, which can contain a maximum
 of 512 64-bit entries, with each entry in turn pointing to the base
 address of a PDPT.
 
 The first address that the function pte_list_add sees is the base
 address of the PML4 table, so I was expecting to be able to read 512
 64-bit entries from that base address and see at least one 64-bit
 entry written into that page.  However, after a number of different
 attempts, I'm unable to determine the function that is actually
 responsible for updating the EPTs.
Are you trying to dump guest PML4 table or EPT PML4? If for EPT, just look up 
EPTP(root_hpa in vcpu-arch.mmu.root_hpa). If for guest, you need to translate 
the gpa to hpa firstly.

 
 I was hoping somebody might be able to point me to the correct location
 within the KVM source code to track when EPT entries are actually
 written to the various tables in the 4 level hierarchy.  The function
 pte_list_add seems to do nothing more than change the value of a
 pointer, but only the first address passed to it is page aligned (the
 PML4 base) and the rest of the addresses appear to be pointers into
 existing pages, often seeming to be outside of the PML4 page range.
 
 I might be completely misunderstanding something, but any advice on how
 to effectively monitor EPT entries within KVM would be greatly
 appreciated.
You may start with mmu_alloc_direct_roots(). EPTP is assigned value in this 
function.

Best regards,
Yang

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V3 WIP 3/3] disable vhost_verify_ring_mappings check

2013-03-28 Thread Nicholas A. Bellinger

On Thu, 2013-03-28 at 11:03 +0100, Paolo Bonzini wrote:
 Il 28/03/2013 10:04, Michael S. Tsirkin ha scritto:
Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
Checking vq: 1 ring_phys: 0 ring_size: 1028 .
Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
Calling l: 5124 for start_addr: c for vq 2
Unable to map ring buffer for ring 2
l: 4096 ring_size: 5124
  okay so the ring address is within ROM.
  Unsurprisingly it fails.
  bios should stop device before write protect.
  
 
 The above log is very early, when everything is RAM:
 
   vhost_set_memory: section: 0x7fe2801f2b60 section-size: 2146697216 add: 0
   Before vhost_verify_ring_mappings: start_addr: c size: 2146697216
 
 The rings are not within ROM.  ROM is at 0xc-0xcc000 according to the
 PAM registers.
 
 The way I followed the debug output, Got ranges_overlap means 
 actually bailing out because ranges do not overlap.

Yes, this is when !ranges_overlap() is hit in
vhost_verify_ring_mappings(), so the offending cpu_physical_memory_map()
is skipped..

 In particular, 
 here all three virtqueues fail the test, because this is the ROM area 
 0xc..0xc7fff:
 
   vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 32768 add: 1
   Before vhost_verify_ring_mappings: start_addr: c size: 32768
   Checking vq: 0 ring_phys: 0 ring_size: 1028 .
   Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
   Checking vq: 1 ring_phys: 0 ring_size: 1028 .
   Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
   Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
   Got ranges_overlap for vq: 2 ring_phys: ed000 ring_size: 5124
 
 Just below, vhost looks at the large RAM area starting at 0xc8000
 (it's large because 0xf..0xf is still RAM):
 
   vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 2146664448 add: 1
   Before vhost_verify_ring_mappings: start_addr: c8000 size: 2146664448
   Checking vq: 0 ring_phys: 0 ring_size: 1028 .
   Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
   Checking vq: 1 ring_phys: 0 ring_size: 1028 .
   Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
   Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
   Calling l: 5124 for start_addr: c8000 for vq 2
 
 Here vq 0 and 1 fail the test because they are in low RAM, vq 2 passes.
 
 After 0xf..0xf is marked readonly,

Btw, the first vhost_set_memory() and failing
vhost_verify_ring_mappings() do not occur until the
pci_config_writeb(..., 0x31) code is executed in
src/shadow.c:make_bios_readonly_intel() below:

static void
make_bios_readonly_intel(u16 bdf, u32 pam0)
{
// Flush any pending writes before locking memory.
wbinvd();

// Write protect roms from 0xc-0xf
u32 romend = rom_get_last(), romtop = rom_get_max();
int i;
for (i=0; i6; i++) {
u32 mem = BUILD_ROM_START + i * 32*1024;
u32 pam = pam0 + 1 + i;
if (romend = mem + 16*1024 || romtop = mem + 32*1024) {
if (romend  mem  romtop  mem + 16*1024)
pci_config_writeb(bdf, pam, 0x31);
^

break;
}
pci_config_writeb(bdf, pam, 0x11);
}

// Write protect 0xf-0x10
pci_config_writeb(bdf, pam0, 0x10);
}

Up until this point, vhost_verify_ring_mappings() is not called by
vhost_set_memory() as vhost_dev_start() has not been invoked to set
vdev-started yet..

  vhost looks at the RAM
 between 0xc9000 and 0xf:
 
   vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 159744 add: 1
   Before vhost_verify_ring_mappings: start_addr: c9000 size: 159744
   Checking vq: 0 ring_phys: 0 ring_size: 1028 .
   Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
   Checking vq: 1 ring_phys: 0 ring_size: 1028 .
   Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
   Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
   Calling l: 5124 for start_addr: c9000 for vq 2
 
 and the ROM between 0xf and 0xf, which no ring overlaps with:
 
   vhost_set_memory: section: 0x7fe2801f2aa0 section-size: 65536 add: 1
   Before vhost_verify_ring_mappings: start_addr: f size: 65536
   Checking vq: 0 ring_phys: 0 ring_size: 1028 .
   Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
   Checking vq: 1 ring_phys: 0 ring_size: 1028 .
   Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
   Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
   Got ranges_overlap for vq: 2 ring_phys: ed000 ring_size: 5124
 
 
 
 SeaBIOS is indeed not initializing vqs 0/1 (the control and event 
 queues), so their ring_phys is 0.  But the one that is failing is vq 2, 
 the first request queue.
 
 Your patch seems good, but shouldn't fix this problem.
 
 Paolo
 --
 To unsubscribe from this list: send the line unsubscribe target-devel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at

Re: [PATCH V3 WIP 3/3] disable vhost_verify_ring_mappings check

2013-03-28 Thread Nicholas A. Bellinger

On Thu, 2013-03-28 at 06:13 -0400, Paolo Bonzini wrote:
  I think it's the right thing to do, but maybe not the right place
  to do this, need to reset after all IO is done, before
  ring memory is write protected.
 
 Our emails are crossing each other unfortunately, but I want to
 reinforce this: ring memory is not write protected.

Understood.  However, AFAICT the act of write protecting these ranges
for ROM generates the offending callbacks to vhost_set_memory().

The part that I'm missing is if ring memory is not being write protected
by make_bios_readonly_intel(), why are the vhost_set_memory() calls
being invoked..?

 Remember that
 SeaBIOS can even provide virtio-scsi access to DOS, so you must
 not reset the device.  It must remain functional all the time,
 and the OS's own driver will reset it when it's started.
 

Mmmm, so a vp_reset() is out of the question then..

--nab


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 2/4 v2] KVM: PPC: debug stub interface parameter defined

2013-03-28 Thread Bhushan Bharat-R65777



 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, March 29, 2013 7:26 AM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421; Bhushan
 Bharat-R65777
 Subject: Re: [PATCH 2/4 v2] KVM: PPC: debug stub interface parameter defined
 
 
 On 21.03.2013, at 07:24, Bharat Bhushan wrote:
 
  From: Bharat Bhushan bharat.bhus...@freescale.com
 
  This patch defines the interface parameter for KVM_SET_GUEST_DEBUG
  ioctl support. Follow up patches will use this for setting up hardware
  breakpoints, watchpoints and software breakpoints.
 
  Also kvm_arch_vcpu_ioctl_set_guest_debug() is brought one level below.
  This is because I am not sure what is required for book3s. So this
  ioctl behaviour will not change for book3s.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  v2:
  - No Change
 
  arch/powerpc/include/uapi/asm/kvm.h |   23 +++
  arch/powerpc/kvm/book3s.c   |6 ++
  arch/powerpc/kvm/booke.c|6 ++
  arch/powerpc/kvm/powerpc.c  |6 --
  4 files changed, 35 insertions(+), 6 deletions(-)
 
  diff --git a/arch/powerpc/include/uapi/asm/kvm.h
  b/arch/powerpc/include/uapi/asm/kvm.h
  index c2ff99c..15f9a00 100644
  --- a/arch/powerpc/include/uapi/asm/kvm.h
  +++ b/arch/powerpc/include/uapi/asm/kvm.h
  @@ -272,8 +272,31 @@ struct kvm_debug_exit_arch {
 
  /* for KVM_SET_GUEST_DEBUG */
  struct kvm_guest_debug_arch {
  +   struct {
  +   /* H/W breakpoint/watchpoint address */
  +   __u64 addr;
  +   /*
  +* Type denotes h/w breakpoint, read watchpoint, write
  +* watchpoint or watchpoint (both read and write).
  +*/
  +#define KVMPPC_DEBUG_NOTYPE0x0
  +#define KVMPPC_DEBUG_BREAKPOINT(1UL  1)
  +#define KVMPPC_DEBUG_WATCH_WRITE   (1UL  2)
  +#define KVMPPC_DEBUG_WATCH_READ(1UL  3)
 
 Are you sure you want to introduce these here, just to remove them again in a
 later patch?

Up to this patch the scope was limited to this structure. So for clarity I 
defined here and later the scope expands so moved out of this structure. I do 
not think this really matters, let me know how you want to see ?

-Bharat

 
 
 Alex
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v6 4/6] KVM: Add reset/restore rtc_status support

2013-03-28 Thread Zhang, Yang Z

Paolo Bonzini wrote on 2013-03-26:
 Il 22/03/2013 06:24, Yang Zhang ha scritto:
 +static void rtc_irq_restore(struct kvm_ioapic *ioapic)
 +{
 +struct kvm_vcpu *vcpu;
 +int vector, i, pending_eoi = 0;
 +
 +if (RTC_GSI != 8)
 
 Please set it to -1U if not x86, and do
 
if (RTC_GSI = IOAPIC_NUM_PINS)
return;
 here.
Sure. It is more reasonable.

Best regards,
Yang


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v6 4/6] KVM: Add reset/restore rtc_status support

2013-03-28 Thread Zhang, Yang Z

Paolo Bonzini wrote on 2013-03-26:
 Il 22/03/2013 06:24, Yang Zhang ha scritto:
 +vector = ioapic-redirtbl[RTC_GSI].fields.vector;
 +kvm_for_each_vcpu(i, vcpu, ioapic-kvm) {
 +if (kvm_apic_pending_eoi(vcpu, vector)) {
 +pending_eoi++;
 +set_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map);
 
 Also, __set_bit.  If I understand correctly, dest_map is protected by
 the ioapic spinlock.
Yes, I already see it and changed it in version 7.

Best regards,
Yang


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH v6 6/6] KVM: Use eoi to track RTC interrupt delivery status

2013-03-28 Thread Zhang, Yang Z

Paolo Bonzini wrote on 2013-03-26:
 Il 22/03/2013 06:24, Yang Zhang ha scritto:
 +static void rtc_irq_ack_eoi(struct kvm_vcpu *vcpu,
 +struct rtc_status *rtc_status, int irq)
 +{
 +if (irq != RTC_GSI)
 +return;
 +
 +if (test_and_clear_bit(vcpu-vcpu_id, rtc_status-dest_map))
 +--rtc_status-pending_eoi;
 +
 +WARN_ON(rtc_status-pending_eoi  0);
 +}
 
 This is the only case where you're passing the struct rtc_status instead
 of the struct kvm_ioapic.  Please use the latter, and make it the first
 argument.

 @@ -244,7 +268,14 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int
 irq)
  irqe.level = 1;
  irqe.shorthand = 0;
 -return kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe, NULL);
 +if (irq == RTC_GSI) {
 +ret = kvm_irq_delivery_to_apic(ioapic-kvm, NULL, irqe,
 +ioapic-rtc_status.dest_map);
 +ioapic-rtc_status.pending_eoi = ret;
 
 I think you should either add a
 
 BUG_ON(ioapic-rtc_status.pending_eoi != 0);
 or use ioapic-rtc_status.pending_eoi += ret (or both).
 
There may malicious guest to write EOI more than once. And the pending_eoi will 
be negative. But it should not be a bug. Just WARN_ON is enough. And we already 
do it in ack_eoi. So don't need to do duplicated thing here.

Best regards,
Yang


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V3 WIP 3/3] disable vhost_verify_ring_mappings check

2013-03-28 Thread Nicholas A. Bellinger

On Thu, 2013-03-28 at 11:04 +0200, Michael S. Tsirkin wrote:
 On Thu, Mar 28, 2013 at 12:35:42AM -0700, Nicholas A. Bellinger wrote:
  On Wed, 2013-03-27 at 23:45 -0700, Nicholas A. Bellinger wrote:
   On Wed, 2013-03-27 at 15:33 -0700, Nicholas A. Bellinger wrote:
On Wed, 2013-03-27 at 23:56 +0200, Michael S. Tsirkin wrote:
 On Wed, Mar 27, 2013 at 02:31:27PM -0700, Nicholas A. Bellinger wrote:
  

SNIP

  locking shadow ram
  romend: 0x000cb800 romtop: 0x000ec000
  mem: 0x000c, pam: 0x005a
  Calling pci_config_writeb(0x11): bdf: 0x pam: 0x005a
  
   No QEMU output after pci_config_writeb(0x11) in make_bios_readonly..
   
   Calling pci_config_writeb(0x31): bdf: 0x pam: 0x005b
   
   
  
  mem: 0x000c8000, pam: 0x005b
  romend: 0x000cb800 mem + 16*1024: 0x000cc000
  romtop: 0x000ec000 mem + 32*1024: 0x000d
  
  romend: 0x000cb800, mem: 0x000c8000, romtop: 0x000ec000, mem + 16*1024: 
  0x000cc000
  Calling pci_config_writeb(0x31): bdf: 0x pam: 0x005b
  
  
   QEMU output after pci_config_writeb(0x31) in make_bios_readonly..
   
   vhost_set_memory: section: 0x7fe2801f2b60 section-size: 2146697216 add: 0
   Before vhost_verify_ring_mappings: start_addr: c size: 2146697216
   Checking vq: 0 ring_phys: 0 ring_size: 1028 .
 
 This is also a bug. -net always initializes VQs 0..N so this is what
 vhost assumed.  Please teach vhost that it should skip uninitialized
 VQs. There are more places to fix.
 Basically look for if (!virtio_queue_get_num(vdev, queue_no)),
 all of them need to be updated to skip uninitialized vqs.
 Probably switch to a new API checking PA too.
 See patch below.

nod

 
   Got ranges_overlap for vq: 0 ring_phys: 0 ring_size: 1028
   Checking vq: 1 ring_phys: 0 ring_size: 1028 .
   Got ranges_overlap for vq: 1 ring_phys: 0 ring_size: 1028
   Checking vq: 2 ring_phys: ed000 ring_size: 5124 .
   Calling l: 5124 for start_addr: c for vq 2
   Unable to map ring buffer for ring 2
   l: 4096 ring_size: 5124
 
 okay so the ring address is within ROM.
 Unsurprisingly it fails.
 bios should stop device before write protect.

SNIP

 ---
 
 virtio: add API to check that ring is setup
 
 virtio scsi makes it legal to only setup a subset of rings.  The only
 way to detect the ring is setup seems to be to check whether PA was
 written to.  Add API to do this, and teach code to use it instead of
 checking hardware queue size.
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 
 ---
 
 diff --git a/hw/virtio.c b/hw/virtio.c
 index 26fbc79..ac12c01 100644
 --- a/hw/virtio.c
 +++ b/hw/virtio.c
 @@ -651,6 +651,11 @@ int virtio_queue_get_num(VirtIODevice *vdev, int n)
  return vdev-vq[n].vring.num;
  }
  
 +bool virtio_queue_valid(VirtIODevice *vdev, int n)
 +{
 +return vdev-vq[n].vring.num  vdev-vq[n].vring.pa;
 +}

I assume you mean vring.desc here, right..?

Sending out these as a separate patch series shortly.

--nab

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: Allow cross page reads and writes from cached translations

2013-03-28 Thread Andrew Honig


This patch adds support for kvm_gfn_to_hva_cache_init functions for
reads and writes that will cross a page.  If the range falls within
the same memslot, then this will be a fast operation.  If the range
is split between two memslots, then the slower kvm_read_guest and
kvm_write_guest are used.

Tested: Test against kvm_clock unit tests.

Signed-off-by: Andrew Honig aho...@google.com
---
 arch/x86/kvm/lapic.c  |2 +-
 arch/x86/kvm/x86.c|   12 +---
 include/linux/kvm_host.h  |2 +-
 include/linux/kvm_types.h |2 ++
 virt/kvm/kvm_main.c   |   43 +--
 5 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 02b51dd..f77df1c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1857,7 +1857,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 
data)
if (!pv_eoi_enabled(vcpu))
return 0;
return kvm_gfn_to_hva_cache_init(vcpu-kvm, vcpu-arch.pv_eoi.data,
-addr);
+addr, sizeof(u8));
 }
 
 void kvm_lapic_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f19ac0a..0fd5ad8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1823,7 +1823,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, 
u64 data)
return 0;
}
 
-   if (kvm_gfn_to_hva_cache_init(vcpu-kvm, vcpu-arch.apf.data, gpa))
+   if (kvm_gfn_to_hva_cache_init(vcpu-kvm, vcpu-arch.apf.data, gpa, 4))
return 1;
 
vcpu-arch.apf.send_user_only = !(data  KVM_ASYNC_PF_SEND_ALWAYS);
@@ -1952,12 +1952,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 
gpa_offset = data  ~(PAGE_MASK | 1);
 
-   /* Check that the address is 32-byte aligned. */
-   if (gpa_offset  (sizeof(struct pvclock_vcpu_time_info) - 1))
-   break;
-
if (kvm_gfn_to_hva_cache_init(vcpu-kvm,
-vcpu-arch.pv_time, data  ~1ULL))
+vcpu-arch.pv_time, data  ~1ULL,
+sizeof(struct pvclock_vcpu_time_info)))
vcpu-arch.pv_time_enabled = false;
else
vcpu-arch.pv_time_enabled = true;
@@ -1977,7 +1974,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
return 1;
 
if (kvm_gfn_to_hva_cache_init(vcpu-kvm, vcpu-arch.st.stime,
-   data  
KVM_STEAL_VALID_BITS))
+   data  KVM_STEAL_VALID_BITS,
+   sizeof(struct kvm_steal_time)))
return 1;
 
vcpu-arch.st.msr_val = data;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cad77fe..c139582 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -518,7 +518,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void 
*data,
 int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
   void *data, unsigned long len);
 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- gpa_t gpa);
+ gpa_t gpa, unsigned long len);
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index fa7cc72..bd420d6 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -71,6 +71,8 @@ struct gfn_to_hva_cache {
u64 generation;
gpa_t gpa;
unsigned long hva;
+   unsigned long len;
+   int slow;
struct kvm_memory_slot *memslot;
 };
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index adc68fe..dcd3e57 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1541,21 +1541,38 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const 
void *data,
 }
 
 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- gpa_t gpa)
+ gpa_t gpa, unsigned long len)
 {
struct kvm_memslots *slots = kvm_memslots(kvm);
int offset = offset_in_page(gpa);
-   gfn_t gfn = gpa  PAGE_SHIFT;
+   gfn_t start_gfn = gpa  PAGE_SHIFT;
+   gfn_t end_gfn = (gpa + len - 1)  PAGE_SHIFT;
+   gfn_t nr_pages_needed = end_gfn - start_gfn + 1;
+   gfn_t nr_pages_avail;
 
ghc-gpa = gpa;
+   ghc-slow = 0;
ghc-generation = slots-generation;
-   ghc-memslot = gfn_to_memslot(kvm, gfn);
-   ghc-hva = gfn_to_hva_many(ghc-memslot, gfn, NULL);
-   if

[PATCH uq/master v2 0/2] Add some tracepoints for clarification of the cause of troubles

2013-03-28 Thread Kazuya Saito

This series adds tracepoints for helping us clarify the cause of
troubles. Virtualization on Linux is composed of some components such
as qemu, kvm, libvirt, and so on. So it is very important to clarify
firstly and swiftly the cause of troubles is on what component of
them. Although qemu has useful information of this because it stands
among kvm, libvirt and guest, it doesn't output the information by
trace or log system.
These patches add tracepoints which lead to reduce the time of the
clarification. We'd like to add the tracepoints as the first set
because, based on our experience, we've found out they must be useful
for an investigation in the future. Without those tracepoints,
we had a really hard time investigating a problem since the problem's
reproducibility was quite low and there was no clue in the dump of
qemu.

Changes from v1:
Add arg to kvm_ioctl, kvm_vm_ioctl, kvm_vcpu_ioctl tracepoints.
Add cpu_index to kvm_vcpu_ioctl, kvm_run_exit tracepoints.

Kazuya Saito (2):
  kvm-all: add kvm_ioctl, kvm_vm_ioctl, kvm_vcpu_ioctl tracepoints
  kvm-all: add kvm_run_exit tracepoint

 kvm-all.c|5 +
 trace-events |7 +++
 2 files changed, 12 insertions(+), 0 deletions(-)


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH uq/master v2 1/2] kvm-all: add kvm_ioctl, kvm_vm_ioctl, kvm_vcpu_ioctl tracepoints

2013-03-28 Thread Kazuya Saito

This patch adds tracepoints at ioctl to kvm. Tracing these ioctl is
useful for clarification whether the cause of troubles is qemu or kvm.

Signed-off-by: Kazuya Saito saito.kaz...@jp.fujitsu.com
---
 kvm-all.c|4 
 trace-events |5 +
 2 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index 9b433d3..fdb099c 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -33,6 +33,7 @@
 #include exec/memory.h
 #include exec/address-spaces.h
 #include qemu/event_notifier.h
+#include trace.h

 /* This check must be after config-host.h is included */
 #ifdef CONFIG_EVENTFD
@@ -1634,6 +1635,7 @@ int kvm_ioctl(KVMState *s, int type, ...)
 arg = va_arg(ap, void *);
 va_end(ap);

+trace_kvm_ioctl(type, arg);
 ret = ioctl(s-fd, type, arg);
 if (ret == -1) {
 ret = -errno;
@@ -1651,6 +1653,7 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
 arg = va_arg(ap, void *);
 va_end(ap);

+trace_kvm_vm_ioctl(type, arg);
 ret = ioctl(s-vmfd, type, arg);
 if (ret == -1) {
 ret = -errno;
@@ -1668,6 +1671,7 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
 arg = va_arg(ap, void *);
 va_end(ap);

+trace_kvm_vcpu_ioctl(cpu-cpu_index, type, arg);
 ret = ioctl(cpu-kvm_fd, type, arg);
 if (ret == -1) {
 ret = -errno;
diff --git a/trace-events b/trace-events
index 7f34112..3023744 100644
--- a/trace-events
+++ b/trace-events
@@ -1101,3 +1101,8 @@ virtio_ccw_new_device(int cssid, int ssid, int schid, int 
devno, const char *dev

 # migration.c
 migrate_set_state(int new_state) new state %d
+
+# kvm-all.c
+kvm_ioctl(int type, void *arg) type %d, arg %p
+kvm_vm_ioctl(int type, void *arg) type %d, arg %p
+kvm_vcpu_ioctl(int cpu_index, int type, void *arg) cpu_index %d, type %d, arg 
%p
-- 
1.7.1



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH uq/master v2 2/2] kvm-all: add kvm_run_exit tracepoint

2013-03-28 Thread Kazuya Saito

This patch enable us to know exit reason of KVM_RUN. It will help us
know where the trouble is caused.

Signed-off-by: Kazuya Saito saito.kaz...@jp.fujitsu.com
---
 kvm-all.c|1 +
 trace-events |2 ++
 2 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index fdb099c..325f5e7 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1574,6 +1574,7 @@ int kvm_cpu_exec(CPUArchState *env)
 abort();
 }

+trace_kvm_run_exit(cpu-cpu_index, run-exit_reason);
 switch (run-exit_reason) {
 case KVM_EXIT_IO:
 DPRINTF(handle_io\n);
diff --git a/trace-events b/trace-events
index 3023744..8fd6e80 100644
--- a/trace-events
+++ b/trace-events
@@ -1106,3 +1106,5 @@ migrate_set_state(int new_state) new state %d
 kvm_ioctl(int type, void *arg) type %d, arg %p
 kvm_vm_ioctl(int type, void *arg) type %d, arg %p
 kvm_vcpu_ioctl(int cpu_index, int type, void *arg) cpu_index %d, type %d, arg 
%p
+kvm_run_exit(int cpu_index, uint32_t reason) cpu_index %d, reason %d
+
-- 
1.7.1



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/3] virtio/vhost: Add checks for uninitialized VQs

2013-03-28 Thread Nicholas A. Bellinger

From: Nicholas Bellinger n...@linux-iscsi.org

Hi folks,

This series adds a virtio_queue_valid() for use by virtio-pci code in
order to prevent opreations upon uninitialized VQs, that is currently
expected to occur during seabios setup of virtio-scsi.

This also includes a vhost specific check for uninitialized VQs in
vhost_verify_ring_mappings() to avoid this same case.

Please review.

--nab

Michael S. Tsirkin (1):
  virtio: add API to check that ring is setup

Nicholas Bellinger (2):
  virtio-pci: Add virtio_queue_valid checks ahead of
virtio_queue_get_num
  vhost: Check+skip uninitialized VQs in vhost_verify_ring_mappings

 hw/vhost.c  |3 +++
 hw/virtio-pci.c |   27 +++
 hw/virtio.c |5 +
 hw/virtio.h |1 +
 4 files changed, 36 insertions(+), 0 deletions(-)

-- 
1.7.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/3] virtio: add API to check that ring is setup

2013-03-28 Thread Nicholas A. Bellinger

From: Michael S. Tsirkin m...@redhat.com

virtio scsi makes it legal to only setup a subset of rings.  The only
way to detect the ring is setup seems to be to check whether PA was
written to.  Add API to do this, and teach code to use it instead of
checking hardware queue size.

(nab: use .vring.desc instead of .vring.pa)

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Cc: Asias He as...@redhat.com
Cc: Paolo Bonzini pbonz...@redhat.com
Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
---
 hw/virtio.c |5 +
 hw/virtio.h |1 +
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/hw/virtio.c b/hw/virtio.c
index 26fbc79..65ba253 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -651,6 +651,11 @@ int virtio_queue_get_num(VirtIODevice *vdev, int n)
 return vdev-vq[n].vring.num;
 }
 
+bool virtio_queue_valid(VirtIODevice *vdev, int n)
+{
+return vdev-vq[n].vring.num  vdev-vq[n].vring.desc;
+}
+
 int virtio_queue_get_id(VirtQueue *vq)
 {
 VirtIODevice *vdev = vq-vdev;
diff --git a/hw/virtio.h b/hw/virtio.h
index fdbe931..3086798 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -227,6 +227,7 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t 
addr, uint32_t data);
 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr);
 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n);
 int virtio_queue_get_num(VirtIODevice *vdev, int n);
+bool virtio_queue_valid(VirtIODevice *vdev, int n);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
-- 
1.7.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/3] vhost: Check+skip uninitialized VQs in vhost_verify_ring_mappings

2013-03-28 Thread Nicholas A. Bellinger

From: Nicholas Bellinger n...@linux-iscsi.org

With the virtio_queue_valid() checks in place to skip uninitialized VQs
within virtio-pci code, go ahead and skip the same uninitialized VQs
during vhost_verify_ring_mappings().

Note this patch does not prevent vhost_virtqueue_start() from executing
by checking virtio_queue_valid(), as other logic during seabios -
virtio-scsi LLD guest hand-off appears to depend upon this execution.

Cc: Michael S. Tsirkin m...@redhat.com
Cc: Asias He as...@redhat.com
Cc: Paolo Bonzini pbonz...@redhat.com
Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
---
 hw/vhost.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/hw/vhost.c b/hw/vhost.c
index 4d6aee3..3a71aee 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -314,6 +314,9 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
 hwaddr l;
 void *p;
 
+if (!vq-ring_phys || !vq-ring_size) {
+continue;
+}
 if (!ranges_overlap(start_addr, size, vq-ring_phys, vq-ring_size)) {
 continue;
 }
-- 
1.7.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/3] virtio-pci: Add virtio_queue_valid checks ahead of virtio_queue_get_num

2013-03-28 Thread Nicholas A. Bellinger

From: Nicholas Bellinger n...@linux-iscsi.org

This patch adds a number of virtio_queue_valid() checks to virtio-pci
ahead of virtio_queue_get_num() usage in order to skip operation upon
the detection of an uninitialized VQ.

There is one exception in virtio_ioport_read():VIRTIO_PCI_QUEUE_NUM,
where virtio_queue_get_num() may still be called without a valid
vdev-vq[n].vring.desc physical address.

Cc: Michael S. Tsirkin m...@redhat.com
Cc: Asias He as...@redhat.com
Cc: Paolo Bonzini pbonz...@redhat.com
Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org
---
 hw/virtio-pci.c |   27 +++
 1 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 0d67b84..231ca0c 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -211,6 +211,9 @@ static void virtio_pci_start_ioeventfd(VirtIOPCIProxy 
*proxy)
 }
 
 for (n = 0; n  VIRTIO_PCI_QUEUE_MAX; n++) {
+if (!virtio_queue_valid(proxy-vdev, n)) {
+continue;
+}
 if (!virtio_queue_get_num(proxy-vdev, n)) {
 continue;
 }
@@ -225,6 +228,9 @@ static void virtio_pci_start_ioeventfd(VirtIOPCIProxy 
*proxy)
 
 assign_error:
 while (--n = 0) {
+if (!virtio_queue_valid(proxy-vdev, n)) {
+continue;
+}
 if (!virtio_queue_get_num(proxy-vdev, n)) {
 continue;
 }
@@ -246,6 +252,9 @@ static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
 }
 
 for (n = 0; n  VIRTIO_PCI_QUEUE_MAX; n++) {
+if (!virtio_queue_valid(proxy-vdev, n)) {
+continue;
+}
 if (!virtio_queue_get_num(proxy-vdev, n)) {
 continue;
 }
@@ -546,6 +555,9 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, 
int nvqs)
 MSIMessage msg;
 
 for (queue_no = 0; queue_no  nvqs; queue_no++) {
+if (!virtio_queue_valid(vdev, queue_no)) {
+continue;
+}
 if (!virtio_queue_get_num(vdev, queue_no)) {
 break;
 }
@@ -593,6 +605,9 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy 
*proxy, int nvqs)
 int queue_no;
 
 for (queue_no = 0; queue_no  nvqs; queue_no++) {
+if (!virtio_queue_valid(vdev, queue_no)) {
+continue;
+}
 if (!virtio_queue_get_num(vdev, queue_no)) {
 break;
 }
@@ -665,6 +680,9 @@ static int kvm_virtio_pci_vector_unmask(PCIDevice *dev, 
unsigned vector,
 int ret, queue_no;
 
 for (queue_no = 0; queue_no  proxy-nvqs_with_notifiers; queue_no++) {
+if (!virtio_queue_valid(vdev, queue_no)) {
+continue;
+}
 if (!virtio_queue_get_num(vdev, queue_no)) {
 break;
 }
@@ -695,6 +713,9 @@ static void kvm_virtio_pci_vector_mask(PCIDevice *dev, 
unsigned vector)
 int queue_no;
 
 for (queue_no = 0; queue_no  proxy-nvqs_with_notifiers; queue_no++) {
+if (!virtio_queue_valid(vdev, queue_no)) {
+continue;
+}
 if (!virtio_queue_get_num(vdev, queue_no)) {
 break;
 }
@@ -717,6 +738,9 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
 VirtQueue *vq;
 
 for (queue_no = 0; queue_no  proxy-nvqs_with_notifiers; queue_no++) {
+if (!virtio_queue_valid(vdev, queue_no)) {
+continue;
+}
 if (!virtio_queue_get_num(vdev, queue_no)) {
 break;
 }
@@ -790,6 +814,9 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, 
int nvqs, bool assign)
 }
 
 for (n = 0; n  nvqs; n++) {
+if (!virtio_queue_valid(vdev, n)) {
+continue;
+}
 if (!virtio_queue_get_num(vdev, n)) {
 break;
 }
-- 
1.7.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Revert KVM: MMU: Move kvm_mmu_free_some_pages() into kvm_mmu_alloc_page()

2013-03-28 Thread Takuya Yoshikawa

With the following commit, shadow pages can be zapped at random during
a shadow page talbe walk:
  KVM: MMU: Move kvm_mmu_free_some_pages() into kvm_mmu_alloc_page()
  7ddca7e43c8f28f9419da81a0e7730b66aa60fe9

This patch reverts it and fixes __direct_map() and FNAME(fetch)().

Signed-off-by: Takuya Yoshikawa yoshikawa_takuya...@lab.ntt.co.jp
---
 arch/x86/kvm/mmu.c |   11 +++
 arch/x86/kvm/paging_tmpl.h |1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 633e30c..004cc87 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1501,15 +1501,11 @@ static void drop_parent_pte(struct kvm_mmu_page *sp,
mmu_spte_clear_no_track(parent_pte);
 }
 
-static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
-
 static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
   u64 *parent_pte, int direct)
 {
struct kvm_mmu_page *sp;
 
-   make_mmu_pages_available(vcpu);
-
sp = mmu_memory_cache_alloc(vcpu-arch.mmu_page_header_cache);
sp-spt = mmu_memory_cache_alloc(vcpu-arch.mmu_page_cache);
if (!direct)
@@ -2806,6 +2802,7 @@ exit:
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 gva_t gva, pfn_t *pfn, bool write, bool *writable);
+static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
 
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 gfn_t gfn, bool prefault)
@@ -2847,6 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, 
u32 error_code,
spin_lock(vcpu-kvm-mmu_lock);
if (mmu_notifier_retry(vcpu-kvm, mmu_seq))
goto out_unlock;
+   make_mmu_pages_available(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, pfn, level);
r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
@@ -2924,6 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
spin_lock(vcpu-kvm-mmu_lock);
+   make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
  1, ACC_ALL, NULL);
++sp-root_count;
@@ -2935,6 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 
ASSERT(!VALID_PAGE(root));
spin_lock(vcpu-kvm-mmu_lock);
+   make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, i  (30 - PAGE_SHIFT),
  i  30,
  PT32_ROOT_LEVEL, 1, ACC_ALL,
@@ -2973,6 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
 
spin_lock(vcpu-kvm-mmu_lock);
+   make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
  0, ACC_ALL, NULL);
root = __pa(sp-spt);
@@ -3006,6 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
return 1;
}
spin_lock(vcpu-kvm-mmu_lock);
+   make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, i  30,
  PT32_ROOT_LEVEL, 0,
  ACC_ALL, NULL);
@@ -3311,6 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t 
gpa, u32 error_code,
spin_lock(vcpu-kvm-mmu_lock);
if (mmu_notifier_retry(vcpu-kvm, mmu_seq))
goto out_unlock;
+   make_mmu_pages_available(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, pfn, level);
r = __direct_map(vcpu, gpa, write, map_writable,
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index af143f0..da20860 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -627,6 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
addr, u32 error_code,
goto out_unlock;
 
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
+   make_mmu_pages_available(vcpu);
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, walker.gfn, pfn, level);
r = FNAME(fetch)(vcpu, addr, walker, write_fault,
-- 
1.7.5.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 3/4 v2] Rename EMULATE_DO_PAPR to EMULATE_EXIT_USER

2013-03-28 Thread Alexander Graf


On 21.03.2013, at 07:25, Bharat Bhushan wrote:

 From: Bharat Bhushan bharat.bhus...@freescale.com
 
 Instruction emulation return EMULATE_DO_PAPR when it requires
 exit to userspace on book3s. Similar return is required
 for booke. EMULATE_DO_PAPR reads out to be confusing so it is
 renamed to EMULATE_EXIT_USER.

Please update the patch description.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support

2013-03-28 Thread Alexander Graf


On 21.03.2013, at 07:25, Bharat Bhushan wrote:

 From: Bharat Bhushan bharat.bhus...@freescale.com
 
 This patch adds the debug stub support on booke/bookehv.
 Now QEMU debug stub can use hw breakpoint, watchpoint and
 software breakpoint to debug guest.
 
 Debug registers are saved/restored on vcpu_put()/vcpu_get().
 Also the debug registers are saved restored only if guest is using
 debug resources.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 v2:
 - save/restore in vcpu_get()/vcpu_put()
 - some more minor cleanup based on review comments.
 
 arch/powerpc/include/asm/kvm_host.h |   10 ++
 arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
 arch/powerpc/kvm/booke.c|  252 ---
 arch/powerpc/kvm/e500_emulate.c |   10 ++
 4 files changed, 272 insertions(+), 22 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h 
 b/arch/powerpc/include/asm/kvm_host.h
 index f4ba881..8571952 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -504,7 +504,17 @@ struct kvm_vcpu_arch {
   u32 mmucfg;
   u32 epr;
   u32 crit_save;
 + /* guest debug registers*/
   struct kvmppc_booke_debug_reg dbg_reg;
 + /* shadow debug registers */
 + struct kvmppc_booke_debug_reg shadow_dbg_reg;
 + /* host debug registers*/
 + struct kvmppc_booke_debug_reg host_dbg_reg;
 + /*
 +  * Flag indicating that debug registers are used by guest
 +  * and requires save restore.
 + */
 + bool debug_save_restore;
 #endif
   gpa_t paddr_accessed;
   gva_t vaddr_accessed;
 diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
 b/arch/powerpc/include/uapi/asm/kvm.h
 index 15f9a00..d7ce449 100644
 --- a/arch/powerpc/include/uapi/asm/kvm.h
 +++ b/arch/powerpc/include/uapi/asm/kvm.h
 @@ -25,6 +25,7 @@
 /* Select powerpc specific features in linux/kvm.h */
 #define __KVM_HAVE_SPAPR_TCE
 #define __KVM_HAVE_PPC_SMT
 +#define __KVM_HAVE_GUEST_DEBUG
 
 struct kvm_regs {
   __u64 pc;
 @@ -267,7 +268,24 @@ struct kvm_fpu {
   __u64 fpr[32];
 };
 
 +/*
 + * Defines for h/w breakpoint, watchpoint (read, write or both) and
 + * software breakpoint.
 + * These are used as type in KVM_SET_GUEST_DEBUG ioctl and status
 + * for KVM_DEBUG_EXIT.
 + */
 +#define KVMPPC_DEBUG_NONE0x0
 +#define KVMPPC_DEBUG_BREAKPOINT  (1UL  1)
 +#define KVMPPC_DEBUG_WATCH_WRITE (1UL  2)
 +#define KVMPPC_DEBUG_WATCH_READ  (1UL  3)
 struct kvm_debug_exit_arch {
 + __u64 address;
 + /*
 +  * exiting to userspace because of h/w breakpoint, watchpoint
 +  * (read, write or both) and software breakpoint.
 +  */
 + __u32 status;
 + __u32 reserved;
 };
 
 /* for KVM_SET_GUEST_DEBUG */
 @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
* Type denotes h/w breakpoint, read watchpoint, write
* watchpoint or watchpoint (both read and write).
*/
 -#define KVMPPC_DEBUG_NOTYPE  0x0
 -#define KVMPPC_DEBUG_BREAKPOINT  (1UL  1)
 -#define KVMPPC_DEBUG_WATCH_WRITE (1UL  2)
 -#define KVMPPC_DEBUG_WATCH_READ  (1UL  3)
   __u32 type;
   __u32 reserved;
   } bp[16];
 diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
 index 1de93a8..bf20056 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 @@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
 +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
 +{
 + /* Synchronize guest's desire to get debug interrupts into shadow MSR */
 +#ifndef CONFIG_KVM_BOOKE_HV
 + vcpu-arch.shadow_msr = ~MSR_DE;
 + vcpu-arch.shadow_msr |= vcpu-arch.shared-msr  MSR_DE;
 +#endif
 +
 + /* Force enable debug interrupts when user space wants to debug */
 + if (vcpu-guest_debug) {
 +#ifdef CONFIG_KVM_BOOKE_HV
 + /*
 +  * Since there is no shadow MSR, sync MSR_DE into the guest
 +  * visible MSR. Do not allow guest to change MSR[DE].
 +  */
 + vcpu-arch.shared-msr |= MSR_DE;
 + mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);

This mtspr should really just be a bit or in shadow_mspr when guest_debug gets 
enabled. It should automatically get synchronized as soon as the next 
vpcu_load() happens.

Also, what happens when user space disables guest_debug?

 +#else
 + vcpu-arch.shadow_msr |= MSR_DE;
 + vcpu-arch.shared-msr = ~MSR_DE;
 +#endif
 + }
 +}
 +
 /*
  * Helper function for full MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
 @@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
   kvmppc_mmu_msr_notify(vcpu, old_msr);
   kvmppc_vcpu_sync_spe(vcpu);
   kvmppc_vcpu_sync_fpu(vcpu);
 + kvmppc_vcpu_sync_debug(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu

Re: [PATCH 2/4 v2] KVM: PPC: debug stub interface parameter defined

2013-03-28 Thread Alexander Graf


On 21.03.2013, at 07:24, Bharat Bhushan wrote:

 From: Bharat Bhushan bharat.bhus...@freescale.com
 
 This patch defines the interface parameter for KVM_SET_GUEST_DEBUG
 ioctl support. Follow up patches will use this for setting up
 hardware breakpoints, watchpoints and software breakpoints.
 
 Also kvm_arch_vcpu_ioctl_set_guest_debug() is brought one level below.
 This is because I am not sure what is required for book3s. So this ioctl
 behaviour will not change for book3s.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 v2:
 - No Change
 
 arch/powerpc/include/uapi/asm/kvm.h |   23 +++
 arch/powerpc/kvm/book3s.c   |6 ++
 arch/powerpc/kvm/booke.c|6 ++
 arch/powerpc/kvm/powerpc.c  |6 --
 4 files changed, 35 insertions(+), 6 deletions(-)
 
 diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
 b/arch/powerpc/include/uapi/asm/kvm.h
 index c2ff99c..15f9a00 100644
 --- a/arch/powerpc/include/uapi/asm/kvm.h
 +++ b/arch/powerpc/include/uapi/asm/kvm.h
 @@ -272,8 +272,31 @@ struct kvm_debug_exit_arch {
 
 /* for KVM_SET_GUEST_DEBUG */
 struct kvm_guest_debug_arch {
 + struct {
 + /* H/W breakpoint/watchpoint address */
 + __u64 addr;
 + /*
 +  * Type denotes h/w breakpoint, read watchpoint, write
 +  * watchpoint or watchpoint (both read and write).
 +  */
 +#define KVMPPC_DEBUG_NOTYPE  0x0
 +#define KVMPPC_DEBUG_BREAKPOINT  (1UL  1)
 +#define KVMPPC_DEBUG_WATCH_WRITE (1UL  2)
 +#define KVMPPC_DEBUG_WATCH_READ  (1UL  3)

Are you sure you want to introduce these here, just to remove them again in a 
later patch?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/4 v2] Added ONE_REG interface for debug instruction

2013-03-28 Thread Alexander Graf


On 21.03.2013, at 07:24, Bharat Bhushan wrote:

 This patch adds the one_reg interface to get the special instruction
 to be used for setting software breakpoint from userspace.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com

Thanks, applied to kvm-ppc-queue.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 2/4 v2] KVM: PPC: debug stub interface parameter defined

2013-03-28 Thread Bhushan Bharat-R65777



 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, March 29, 2013 7:26 AM
 To: Bhushan Bharat-R65777
 Cc: kvm-ppc@vger.kernel.org; k...@vger.kernel.org; Wood Scott-B07421; Bhushan
 Bharat-R65777
 Subject: Re: [PATCH 2/4 v2] KVM: PPC: debug stub interface parameter defined
 
 
 On 21.03.2013, at 07:24, Bharat Bhushan wrote:
 
  From: Bharat Bhushan bharat.bhus...@freescale.com
 
  This patch defines the interface parameter for KVM_SET_GUEST_DEBUG
  ioctl support. Follow up patches will use this for setting up hardware
  breakpoints, watchpoints and software breakpoints.
 
  Also kvm_arch_vcpu_ioctl_set_guest_debug() is brought one level below.
  This is because I am not sure what is required for book3s. So this
  ioctl behaviour will not change for book3s.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  v2:
  - No Change
 
  arch/powerpc/include/uapi/asm/kvm.h |   23 +++
  arch/powerpc/kvm/book3s.c   |6 ++
  arch/powerpc/kvm/booke.c|6 ++
  arch/powerpc/kvm/powerpc.c  |6 --
  4 files changed, 35 insertions(+), 6 deletions(-)
 
  diff --git a/arch/powerpc/include/uapi/asm/kvm.h
  b/arch/powerpc/include/uapi/asm/kvm.h
  index c2ff99c..15f9a00 100644
  --- a/arch/powerpc/include/uapi/asm/kvm.h
  +++ b/arch/powerpc/include/uapi/asm/kvm.h
  @@ -272,8 +272,31 @@ struct kvm_debug_exit_arch {
 
  /* for KVM_SET_GUEST_DEBUG */
  struct kvm_guest_debug_arch {
  +   struct {
  +   /* H/W breakpoint/watchpoint address */
  +   __u64 addr;
  +   /*
  +* Type denotes h/w breakpoint, read watchpoint, write
  +* watchpoint or watchpoint (both read and write).
  +*/
  +#define KVMPPC_DEBUG_NOTYPE0x0
  +#define KVMPPC_DEBUG_BREAKPOINT(1UL  1)
  +#define KVMPPC_DEBUG_WATCH_WRITE   (1UL  2)
  +#define KVMPPC_DEBUG_WATCH_READ(1UL  3)
 
 Are you sure you want to introduce these here, just to remove them again in a
 later patch?

Up to this patch the scope was limited to this structure. So for clarity I 
defined here and later the scope expands so moved out of this structure. I do 
not think this really matters, let me know how you want to see ?

-Bharat

 
 
 Alex
 


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

52 matches

Mail list logo