[PATCH v10 0/4] tcm_vhost hotplug

2013-04-25 Thread Asias He
Asias He (4):
  tcm_vhost: Refactor the lock nesting rule
  tcm_vhost: Add hotplug/hotunplug support
  tcm_vhost: Add ioctl to get and set events missed flag
  tcm_vhost: Enable VIRTIO_SCSI_F_HOTPLUG

 drivers/vhost/tcm_vhost.c | 262 +++---
 drivers/vhost/tcm_vhost.h |  13 +++
 2 files changed, 259 insertions(+), 16 deletions(-)

-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v10 1/4] tcm_vhost: Refactor the lock nesting rule

2013-04-25 Thread Asias He
We want to use tcm_vhost_mutex to make sure hotplug/hotunplug will not
happen when set_endpoint/clear_endpoint is in process.

Signed-off-by: Asias He as...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 32 +++-
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 957a0b9..822cd1f 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -808,6 +808,9 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
 /*
  * Called from vhost_scsi_ioctl() context to walk the list of available
  * tcm_vhost_tpg with an active struct tcm_vhost_nexus
+ *
+ *  The lock nesting rule is:
+ *tcm_vhost_mutex - vs-dev.mutex - tpg-tv_tpg_mutex - vq-mutex
  */
 static int vhost_scsi_set_endpoint(
struct vhost_scsi *vs,
@@ -820,26 +823,27 @@ static int vhost_scsi_set_endpoint(
int index, ret, i, len;
bool match = false;
 
+   mutex_lock(tcm_vhost_mutex);
mutex_lock(vs-dev.mutex);
+
/* Verify that ring has been setup correctly. */
for (index = 0; index  vs-dev.nvqs; ++index) {
/* Verify that ring has been setup correctly. */
if (!vhost_vq_access_ok(vs-vqs[index])) {
-   mutex_unlock(vs-dev.mutex);
-   return -EFAULT;
+   ret = -EFAULT;
+   goto out;
}
}
 
len = sizeof(vs_tpg[0]) * VHOST_SCSI_MAX_TARGET;
vs_tpg = kzalloc(len, GFP_KERNEL);
if (!vs_tpg) {
-   mutex_unlock(vs-dev.mutex);
-   return -ENOMEM;
+   ret = -ENOMEM;
+   goto out;
}
if (vs-vs_tpg)
memcpy(vs_tpg, vs-vs_tpg, len);
 
-   mutex_lock(tcm_vhost_mutex);
list_for_each_entry(tv_tpg, tcm_vhost_list, tv_tpg_list) {
mutex_lock(tv_tpg-tv_tpg_mutex);
if (!tv_tpg-tpg_nexus) {
@@ -854,11 +858,10 @@ static int vhost_scsi_set_endpoint(
 
if (!strcmp(tv_tport-tport_name, t-vhost_wwpn)) {
if (vs-vs_tpg  vs-vs_tpg[tv_tpg-tport_tpgt]) {
-   mutex_unlock(tv_tpg-tv_tpg_mutex);
-   mutex_unlock(tcm_vhost_mutex);
-   mutex_unlock(vs-dev.mutex);
kfree(vs_tpg);
-   return -EEXIST;
+   mutex_unlock(tv_tpg-tv_tpg_mutex);
+   ret = -EEXIST;
+   goto out;
}
tv_tpg-tv_tpg_vhost_count++;
vs_tpg[tv_tpg-tport_tpgt] = tv_tpg;
@@ -867,7 +870,6 @@ static int vhost_scsi_set_endpoint(
}
mutex_unlock(tv_tpg-tv_tpg_mutex);
}
-   mutex_unlock(tcm_vhost_mutex);
 
if (match) {
memcpy(vs-vs_vhost_wwpn, t-vhost_wwpn,
@@ -893,7 +895,9 @@ static int vhost_scsi_set_endpoint(
kfree(vs-vs_tpg);
vs-vs_tpg = vs_tpg;
 
+out:
mutex_unlock(vs-dev.mutex);
+   mutex_unlock(tcm_vhost_mutex);
return ret;
 }
 
@@ -908,6 +912,7 @@ static int vhost_scsi_clear_endpoint(
int index, ret, i;
u8 target;
 
+   mutex_lock(tcm_vhost_mutex);
mutex_lock(vs-dev.mutex);
/* Verify that ring has been setup correctly. */
for (index = 0; index  vs-dev.nvqs; ++index) {
@@ -918,8 +923,8 @@ static int vhost_scsi_clear_endpoint(
}
 
if (!vs-vs_tpg) {
-   mutex_unlock(vs-dev.mutex);
-   return 0;
+   ret = 0;
+   goto err_dev;
}
 
for (i = 0; i  VHOST_SCSI_MAX_TARGET; i++) {
@@ -965,13 +970,14 @@ static int vhost_scsi_clear_endpoint(
kfree(vs-vs_tpg);
vs-vs_tpg = NULL;
mutex_unlock(vs-dev.mutex);
-
+   mutex_unlock(tcm_vhost_mutex);
return 0;
 
 err_tpg:
mutex_unlock(tv_tpg-tv_tpg_mutex);
 err_dev:
mutex_unlock(vs-dev.mutex);
+   mutex_unlock(tcm_vhost_mutex);
return ret;
 }
 
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v10 2/4] tcm_vhost: Add hotplug/hotunplug support

2013-04-25 Thread Asias He
In commit 365a7150094 ([SCSI] virtio-scsi: hotplug support for
virtio-scsi), hotplug support is added to virtio-scsi.

This patch adds hotplug and hotunplug support to tcm_vhost.

You can create or delete a LUN in targetcli to hotplug or hotunplug a
LUN in guest.

Changes in v8:
- Use vq-mutex for event
- Drop tcm_vhost: Add helper to check if endpoint is setup
- Rename vs_events_dropped to vs_events_missed
- Init lun[] explicitly

Changes in v7:
- Add vhost_work_flush for vs-vs_event_work to this series

Changes in v6:
- Pass tcm_vhost_evt to tcm_vhost_do_evt_work

Changes in v5:
- Switch to int from u64 to vs_events_nr
- Set s-vs_events_dropped flag in tcm_vhost_allocate_evt
- Do not nest dev mutex within vq mutex
- Use vs_events_lock to protect vs_events_dropped and vs_events_nr
- Rebase to target/master

Changes in v4:
- Drop tcm_vhost_check_endpoint in tcm_vhost_send_evt
- Add tcm_vhost_check_endpoint in vhost_scsi_evt_handle_kick

Changes in v3:
- Separate the bug fix to another thread

Changes in v2:
- Remove code duplication in tcm_vhost_{hotplug,hotunplug}
- Fix racing of vs_events_nr
- Add flush fix patch to this series

Signed-off-by: Asias He as...@redhat.com
Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 210 +-
 drivers/vhost/tcm_vhost.h |  10 +++
 2 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 822cd1f..5340fd7 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -71,6 +71,7 @@ enum {
 
 #define VHOST_SCSI_MAX_TARGET  256
 #define VHOST_SCSI_MAX_VQ  128
+#define VHOST_SCSI_MAX_EVENT   128
 
 struct vhost_scsi {
/* Protected by vhost_scsi-dev.mutex */
@@ -82,6 +83,12 @@ struct vhost_scsi {
 
struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */
+
+   struct vhost_work vs_event_work; /* evt injection work item */
+   struct llist_head vs_event_list; /* evt injection queue */
+
+   bool vs_events_missed; /* any missed events, protected by vq-mutex */
+   int vs_events_nr; /* num of pending events, protected by vq-mutex */
 };
 
 /* Local pointer to allocated TCM configfs fabric module */
@@ -349,6 +356,37 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
return 0;
 }
 
+static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt 
*evt)
+{
+   vs-vs_events_nr--;
+   kfree(evt);
+}
+
+static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
+   u32 event, u32 reason)
+{
+   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
+   struct tcm_vhost_evt *evt;
+
+   if (vs-vs_events_nr  VHOST_SCSI_MAX_EVENT) {
+   vs-vs_events_missed = true;
+   return NULL;
+   }
+
+   evt = kzalloc(sizeof(*evt), GFP_KERNEL);
+   if (!evt) {
+   vq_err(vq, Failed to allocate tcm_vhost_evt\n);
+   vs-vs_events_missed = true;
+   return NULL;
+   }
+
+   evt-event.event = event;
+   evt-event.reason = reason;
+   vs-vs_events_nr++;
+
+   return evt;
+}
+
 static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
 {
struct se_cmd *se_cmd = tv_cmd-tvc_se_cmd;
@@ -367,6 +405,75 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd 
*tv_cmd)
kfree(tv_cmd);
 }
 
+static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
+   struct tcm_vhost_evt *evt)
+{
+   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
+   struct virtio_scsi_event *event = evt-event;
+   struct virtio_scsi_event __user *eventp;
+   unsigned out, in;
+   int head, ret;
+
+   if (!vq-private_data) {
+   vs-vs_events_missed = true;
+   return;
+   }
+
+again:
+   vhost_disable_notify(vs-dev, vq);
+   head = vhost_get_vq_desc(vs-dev, vq, vq-iov,
+   ARRAY_SIZE(vq-iov), out, in,
+   NULL, NULL);
+   if (head  0) {
+   vs-vs_events_missed = true;
+   return;
+   }
+   if (head == vq-num) {
+   if (vhost_enable_notify(vs-dev, vq))
+   goto again;
+   vs-vs_events_missed = true;
+   return;
+   }
+
+   if ((vq-iov[out].iov_len != sizeof(struct virtio_scsi_event))) {
+   vq_err(vq, Expecting virtio_scsi_event, got %zu bytes\n,
+   vq-iov[out].iov_len);
+   vs-vs_events_missed = true;
+   return;
+   }
+
+   if (vs-vs_events_missed) {
+   event-event |= VIRTIO_SCSI_T_EVENTS_MISSED;
+   vs-vs_events_missed = false;
+   }
+
+   eventp = vq-iov[out].iov_base;
+   ret = __copy_to_user(eventp, event, sizeof(*event));
+   if (!ret)
+   

[PATCH v10 4/4] tcm_vhost: Enable VIRTIO_SCSI_F_HOTPLUG

2013-04-25 Thread Asias He
Everything for hotplug is ready. Let's enable the feature bit.

Signed-off-by: Asias He as...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 07217d8..1677238 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -66,7 +66,8 @@ enum {
  * TODO: debug and remove the workaround.
  */
 enum {
-   VHOST_SCSI_FEATURES = VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)
+   VHOST_SCSI_FEATURES = (VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)) |
+ (1ULL  VIRTIO_SCSI_F_HOTPLUG)
 };
 
 #define VHOST_SCSI_MAX_TARGET  256
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v10 3/4] tcm_vhost: Add ioctl to get and set events missed flag

2013-04-25 Thread Asias He
Signed-off-by: Asias He as...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 17 +
 drivers/vhost/tcm_vhost.h |  3 +++
 2 files changed, 20 insertions(+)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 5340fd7..07217d8 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -1200,8 +1200,11 @@ static long vhost_scsi_ioctl(struct file *f, unsigned 
int ioctl,
struct vhost_scsi_target backend;
void __user *argp = (void __user *)arg;
u64 __user *featurep = argp;
+   u32 __user *eventsp = argp;
+   u32 events_missed;
u64 features;
int r, abi_version = VHOST_SCSI_ABI_VERSION;
+   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
 
switch (ioctl) {
case VHOST_SCSI_SET_ENDPOINT:
@@ -1222,6 +1225,20 @@ static long vhost_scsi_ioctl(struct file *f, unsigned 
int ioctl,
if (copy_to_user(argp, abi_version, sizeof abi_version))
return -EFAULT;
return 0;
+   case VHOST_SCSI_SET_EVENTS_MISSED:
+   if (get_user(events_missed, eventsp))
+   return -EFAULT;
+   mutex_lock(vq-mutex);
+   vs-vs_events_missed = events_missed;
+   mutex_unlock(vq-mutex);
+   return 0;
+   case VHOST_SCSI_GET_EVENTS_MISSED:
+   mutex_lock(vq-mutex);
+   events_missed = vs-vs_events_missed;
+   mutex_unlock(vq-mutex);
+   if (put_user(events_missed, eventsp))
+   return -EFAULT;
+   return 0;
case VHOST_GET_FEATURES:
features = VHOST_SCSI_FEATURES;
if (copy_to_user(featurep, features, sizeof features))
diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h
index a545a5b..514b9fd 100644
--- a/drivers/vhost/tcm_vhost.h
+++ b/drivers/vhost/tcm_vhost.h
@@ -123,3 +123,6 @@ struct vhost_scsi_target {
 #define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct 
vhost_scsi_target)
 /* Changing this breaks userspace. */
 #define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
+/* Set and get the events missed flag */
+#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
+#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Para-Virtualized Clock Usage

2013-04-25 Thread Gleb Natapov
On Thu, Apr 25, 2013 at 12:28:35AM +, Joji Mekkattuparamban (joji) wrote:
 Thank you Gleb and Marcelo. I will migrate the API using gettimeofday.
 
 Is there any dependency on the QEMU or the Guest? If the host supports 
 pvclock and the guest invokes gettimeofday, would the pvclock be 
 automatically used? Or do I require a patch in either the Qemu or the guest 
 kernel?
 
Guest and host kernel should be at least 3.8. IIRC there is not QEMU
version dependency.

 Thanks!
 Joji.
 
 -Original Message-
 From: Marcelo Tosatti [mailto:mtosa...@redhat.com] 
 Sent: Wednesday, April 24, 2013 1:28 AM
 To: Gleb Natapov
 Cc: Joji Mekkattuparamban (joji); qemu-de...@nongnu.org; kvm@vger.kernel.org
 Subject: Re: [Qemu-devel] Para-Virtualized Clock Usage
 
 On Tue, Apr 23, 2013 at 08:52:16AM +0300, Gleb Natapov wrote:
  On Mon, Apr 22, 2013 at 04:58:01PM +, Joji Mekkattuparamban (joji) 
  wrote:
   Greetings,
   
   I have a SMP guest application, running on the 2.6.27 Linux kernel. The 
   application, originally written for bare metal, makes extensive use of 
   the TSC, by directly invoking rdtsc from the user space for timestamp 
   purposes. While running on KVM (RHEL version 6.3), we are running into 
   TSC issues on some hardware. As a solution, I am considering migrating to 
   the pvclock. I am wondering if there is an example for migrating from TSC 
   to the pvclock. Any pointers?
   
  Wrong list, you should ask KVM (copied). Recent kernels have pvclock 
  vdso support which means that gettimeofday() uses it without entering 
  the kernel. Marcelo?
  
  --
  Gleb.
 
 Converting application to make use of gettimeofday() should be the best way 
 to make use of pvclock, yes.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v10 2/4] tcm_vhost: Add hotplug/hotunplug support

2013-04-25 Thread Asias He
MST wants one without change history in commit log.

From 8996c9464fae1f28d0bd729677a3917d204990ec Mon Sep 17 00:00:00 2001
From: Asias He as...@redhat.com
Date: Thu, 25 Apr 2013 09:51:26 +0800
Subject: [PATCH v10 2/4] tcm_vhost: Add hotplug/hotunplug support

In commit 365a7150094 ([SCSI] virtio-scsi: hotplug support for
virtio-scsi), hotplug support is added to virtio-scsi.

This patch adds hotplug and hotunplug support to tcm_vhost.

You can create or delete a LUN in targetcli to hotplug or hotunplug a
LUN in guest.

Signed-off-by: Asias He as...@redhat.com
Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
---

Changes in v8:
- Use vq-mutex for event
- Drop tcm_vhost: Add helper to check if endpoint is setup
- Rename vs_events_dropped to vs_events_missed
- Init lun[] explicitly

Changes in v7:
- Add vhost_work_flush for vs-vs_event_work to this series

Changes in v6:
- Pass tcm_vhost_evt to tcm_vhost_do_evt_work

Changes in v5:
- Switch to int from u64 to vs_events_nr
- Set s-vs_events_dropped flag in tcm_vhost_allocate_evt
- Do not nest dev mutex within vq mutex
- Use vs_events_lock to protect vs_events_dropped and vs_events_nr
- Rebase to target/master

Changes in v4:
- Drop tcm_vhost_check_endpoint in tcm_vhost_send_evt
- Add tcm_vhost_check_endpoint in vhost_scsi_evt_handle_kick

Changes in v3:
- Separate the bug fix to another thread

Changes in v2:
- Remove code duplication in tcm_vhost_{hotplug,hotunplug}
- Fix racing of vs_events_nr
- Add flush fix patch to this series

 drivers/vhost/tcm_vhost.c | 210 +-
 drivers/vhost/tcm_vhost.h |  10 +++
 2 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 822cd1f..5340fd7 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -71,6 +71,7 @@ enum {
 
 #define VHOST_SCSI_MAX_TARGET  256
 #define VHOST_SCSI_MAX_VQ  128
+#define VHOST_SCSI_MAX_EVENT   128
 
 struct vhost_scsi {
/* Protected by vhost_scsi-dev.mutex */
@@ -82,6 +83,12 @@ struct vhost_scsi {
 
struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */
+
+   struct vhost_work vs_event_work; /* evt injection work item */
+   struct llist_head vs_event_list; /* evt injection queue */
+
+   bool vs_events_missed; /* any missed events, protected by vq-mutex */
+   int vs_events_nr; /* num of pending events, protected by vq-mutex */
 };
 
 /* Local pointer to allocated TCM configfs fabric module */
@@ -349,6 +356,37 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
return 0;
 }
 
+static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt 
*evt)
+{
+   vs-vs_events_nr--;
+   kfree(evt);
+}
+
+static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
+   u32 event, u32 reason)
+{
+   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
+   struct tcm_vhost_evt *evt;
+
+   if (vs-vs_events_nr  VHOST_SCSI_MAX_EVENT) {
+   vs-vs_events_missed = true;
+   return NULL;
+   }
+
+   evt = kzalloc(sizeof(*evt), GFP_KERNEL);
+   if (!evt) {
+   vq_err(vq, Failed to allocate tcm_vhost_evt\n);
+   vs-vs_events_missed = true;
+   return NULL;
+   }
+
+   evt-event.event = event;
+   evt-event.reason = reason;
+   vs-vs_events_nr++;
+
+   return evt;
+}
+
 static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
 {
struct se_cmd *se_cmd = tv_cmd-tvc_se_cmd;
@@ -367,6 +405,75 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd 
*tv_cmd)
kfree(tv_cmd);
 }
 
+static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
+   struct tcm_vhost_evt *evt)
+{
+   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
+   struct virtio_scsi_event *event = evt-event;
+   struct virtio_scsi_event __user *eventp;
+   unsigned out, in;
+   int head, ret;
+
+   if (!vq-private_data) {
+   vs-vs_events_missed = true;
+   return;
+   }
+
+again:
+   vhost_disable_notify(vs-dev, vq);
+   head = vhost_get_vq_desc(vs-dev, vq, vq-iov,
+   ARRAY_SIZE(vq-iov), out, in,
+   NULL, NULL);
+   if (head  0) {
+   vs-vs_events_missed = true;
+   return;
+   }
+   if (head == vq-num) {
+   if (vhost_enable_notify(vs-dev, vq))
+   goto again;
+   vs-vs_events_missed = true;
+   return;
+   }
+
+   if ((vq-iov[out].iov_len != sizeof(struct virtio_scsi_event))) {
+   vq_err(vq, Expecting virtio_scsi_event, got %zu bytes\n,
+   vq-iov[out].iov_len);
+   vs-vs_events_missed = true;
+   return;
+   }
+
+   if (vs-vs_events_missed) {
+   

Re: [PATCH v10 2/4] tcm_vhost: Add hotplug/hotunplug support

2013-04-25 Thread Michael S. Tsirkin
On Thu, Apr 25, 2013 at 03:14:11PM +0800, Asias He wrote:
 MST wants one without change history in commit log.

So post v11, and add changes since v9 and since v10
after ---.
Make life easy for maintainers please.

 From 8996c9464fae1f28d0bd729677a3917d204990ec Mon Sep 17 00:00:00 2001
 From: Asias He as...@redhat.com
 Date: Thu, 25 Apr 2013 09:51:26 +0800
 Subject: [PATCH v10 2/4] tcm_vhost: Add hotplug/hotunplug support
 
 In commit 365a7150094 ([SCSI] virtio-scsi: hotplug support for
 virtio-scsi), hotplug support is added to virtio-scsi.
 
 This patch adds hotplug and hotunplug support to tcm_vhost.
 
 You can create or delete a LUN in targetcli to hotplug or hotunplug a
 LUN in guest.
 
 Signed-off-by: Asias He as...@redhat.com
 Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
 ---
 
 Changes in v8:
 - Use vq-mutex for event
 - Drop tcm_vhost: Add helper to check if endpoint is setup
 - Rename vs_events_dropped to vs_events_missed
 - Init lun[] explicitly
 
 Changes in v7:
 - Add vhost_work_flush for vs-vs_event_work to this series
 
 Changes in v6:
 - Pass tcm_vhost_evt to tcm_vhost_do_evt_work
 
 Changes in v5:
 - Switch to int from u64 to vs_events_nr
 - Set s-vs_events_dropped flag in tcm_vhost_allocate_evt
 - Do not nest dev mutex within vq mutex
 - Use vs_events_lock to protect vs_events_dropped and vs_events_nr
 - Rebase to target/master
 
 Changes in v4:
 - Drop tcm_vhost_check_endpoint in tcm_vhost_send_evt
 - Add tcm_vhost_check_endpoint in vhost_scsi_evt_handle_kick
 
 Changes in v3:
 - Separate the bug fix to another thread
 
 Changes in v2:
 - Remove code duplication in tcm_vhost_{hotplug,hotunplug}
 - Fix racing of vs_events_nr
 - Add flush fix patch to this series
 
  drivers/vhost/tcm_vhost.c | 210 
 +-
  drivers/vhost/tcm_vhost.h |  10 +++
  2 files changed, 218 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 822cd1f..5340fd7 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -71,6 +71,7 @@ enum {
  
  #define VHOST_SCSI_MAX_TARGET256
  #define VHOST_SCSI_MAX_VQ128
 +#define VHOST_SCSI_MAX_EVENT 128
  
  struct vhost_scsi {
   /* Protected by vhost_scsi-dev.mutex */
 @@ -82,6 +83,12 @@ struct vhost_scsi {
  
   struct vhost_work vs_completion_work; /* cmd completion work item */
   struct llist_head vs_completion_list; /* cmd completion queue */
 +
 + struct vhost_work vs_event_work; /* evt injection work item */
 + struct llist_head vs_event_list; /* evt injection queue */
 +
 + bool vs_events_missed; /* any missed events, protected by vq-mutex */
 + int vs_events_nr; /* num of pending events, protected by vq-mutex */
  };
  
  /* Local pointer to allocated TCM configfs fabric module */
 @@ -349,6 +356,37 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
   return 0;
  }
  
 +static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt 
 *evt)
 +{
 + vs-vs_events_nr--;
 + kfree(evt);
 +}
 +
 +static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
 + u32 event, u32 reason)
 +{
 + struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
 + struct tcm_vhost_evt *evt;
 +
 + if (vs-vs_events_nr  VHOST_SCSI_MAX_EVENT) {
 + vs-vs_events_missed = true;
 + return NULL;
 + }
 +
 + evt = kzalloc(sizeof(*evt), GFP_KERNEL);
 + if (!evt) {
 + vq_err(vq, Failed to allocate tcm_vhost_evt\n);
 + vs-vs_events_missed = true;
 + return NULL;
 + }
 +
 + evt-event.event = event;
 + evt-event.reason = reason;
 + vs-vs_events_nr++;
 +
 + return evt;
 +}
 +
  static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
  {
   struct se_cmd *se_cmd = tv_cmd-tvc_se_cmd;
 @@ -367,6 +405,75 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd 
 *tv_cmd)
   kfree(tv_cmd);
  }
  
 +static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
 + struct tcm_vhost_evt *evt)
 +{
 + struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
 + struct virtio_scsi_event *event = evt-event;
 + struct virtio_scsi_event __user *eventp;
 + unsigned out, in;
 + int head, ret;
 +
 + if (!vq-private_data) {
 + vs-vs_events_missed = true;
 + return;
 + }
 +
 +again:
 + vhost_disable_notify(vs-dev, vq);
 + head = vhost_get_vq_desc(vs-dev, vq, vq-iov,
 + ARRAY_SIZE(vq-iov), out, in,
 + NULL, NULL);
 + if (head  0) {
 + vs-vs_events_missed = true;
 + return;
 + }
 + if (head == vq-num) {
 + if (vhost_enable_notify(vs-dev, vq))
 + goto again;
 + vs-vs_events_missed = true;
 + return;
 + }
 +
 + if ((vq-iov[out].iov_len != sizeof(struct virtio_scsi_event))) {
 + vq_err(vq, Expecting 

Re: [PATCH 0/7] KVM: irqfd generalization prepare patch set

2013-04-25 Thread Gleb Natapov
On Wed, Apr 24, 2013 at 01:20:31PM +0300, Gleb Natapov wrote:
 On Tue, Apr 16, 2013 at 07:26:08PM +0200, Alexander Graf wrote:
  The concept of an irqfd and interrupt routing are nothing particularly tied
  into the IOAPIC implementation. In fact, most of the code already is 
  perfectly
  generic.
  
  This patch set decouples most bits of the existing irqchip and irqfd
  implementation to make it reusable for non-IOAPIC platforms, like the PPC 
  MPIC.
  
  I also have a patch that implements working irqfd support on top of these,
  but that requires the in-kernel MPIC implementation to go upstream first, so
  I'm holding off on it until we settled everything there, so the concept
  certainly does work.
  
  Alex
  
 Nice cleanup, thanks! Should expect a new series with ifdef
 kvm_irqchip and ia64 compilation fixed. The fixes are minor enough for
 me to fix them while applying.
 
Actually the series does not apply any more and has to be rebased on top of the
current queue.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v11 0/4] tcm_vhost hotplug

2013-04-25 Thread Asias He
Changes in v11
- Drop change log histroy in commit log

Changes in v10
- Drop comments about lun
- Add Enable VIRTIO_SCSI_F_HOTPLUG to this series

Changes in v9
- Drop tcm_vhost_check_feature
- Add Refactor the lock nesting rule to this sereis

Asias He (4):
  tcm_vhost: Refactor the lock nesting rule
  tcm_vhost: Add hotplug/hotunplug support
  tcm_vhost: Add ioctl to get and set events missed flag
  tcm_vhost: Enable VIRTIO_SCSI_F_HOTPLUG

 drivers/vhost/tcm_vhost.c | 262 +++---
 drivers/vhost/tcm_vhost.h |  13 +++
 2 files changed, 259 insertions(+), 16 deletions(-)

-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v11 1/4] tcm_vhost: Refactor the lock nesting rule

2013-04-25 Thread Asias He
We want to use tcm_vhost_mutex to make sure hotplug/hotunplug will not
happen when set_endpoint/clear_endpoint is in process.

Signed-off-by: Asias He as...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 32 +++-
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 957a0b9..822cd1f 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -808,6 +808,9 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
 /*
  * Called from vhost_scsi_ioctl() context to walk the list of available
  * tcm_vhost_tpg with an active struct tcm_vhost_nexus
+ *
+ *  The lock nesting rule is:
+ *tcm_vhost_mutex - vs-dev.mutex - tpg-tv_tpg_mutex - vq-mutex
  */
 static int vhost_scsi_set_endpoint(
struct vhost_scsi *vs,
@@ -820,26 +823,27 @@ static int vhost_scsi_set_endpoint(
int index, ret, i, len;
bool match = false;
 
+   mutex_lock(tcm_vhost_mutex);
mutex_lock(vs-dev.mutex);
+
/* Verify that ring has been setup correctly. */
for (index = 0; index  vs-dev.nvqs; ++index) {
/* Verify that ring has been setup correctly. */
if (!vhost_vq_access_ok(vs-vqs[index])) {
-   mutex_unlock(vs-dev.mutex);
-   return -EFAULT;
+   ret = -EFAULT;
+   goto out;
}
}
 
len = sizeof(vs_tpg[0]) * VHOST_SCSI_MAX_TARGET;
vs_tpg = kzalloc(len, GFP_KERNEL);
if (!vs_tpg) {
-   mutex_unlock(vs-dev.mutex);
-   return -ENOMEM;
+   ret = -ENOMEM;
+   goto out;
}
if (vs-vs_tpg)
memcpy(vs_tpg, vs-vs_tpg, len);
 
-   mutex_lock(tcm_vhost_mutex);
list_for_each_entry(tv_tpg, tcm_vhost_list, tv_tpg_list) {
mutex_lock(tv_tpg-tv_tpg_mutex);
if (!tv_tpg-tpg_nexus) {
@@ -854,11 +858,10 @@ static int vhost_scsi_set_endpoint(
 
if (!strcmp(tv_tport-tport_name, t-vhost_wwpn)) {
if (vs-vs_tpg  vs-vs_tpg[tv_tpg-tport_tpgt]) {
-   mutex_unlock(tv_tpg-tv_tpg_mutex);
-   mutex_unlock(tcm_vhost_mutex);
-   mutex_unlock(vs-dev.mutex);
kfree(vs_tpg);
-   return -EEXIST;
+   mutex_unlock(tv_tpg-tv_tpg_mutex);
+   ret = -EEXIST;
+   goto out;
}
tv_tpg-tv_tpg_vhost_count++;
vs_tpg[tv_tpg-tport_tpgt] = tv_tpg;
@@ -867,7 +870,6 @@ static int vhost_scsi_set_endpoint(
}
mutex_unlock(tv_tpg-tv_tpg_mutex);
}
-   mutex_unlock(tcm_vhost_mutex);
 
if (match) {
memcpy(vs-vs_vhost_wwpn, t-vhost_wwpn,
@@ -893,7 +895,9 @@ static int vhost_scsi_set_endpoint(
kfree(vs-vs_tpg);
vs-vs_tpg = vs_tpg;
 
+out:
mutex_unlock(vs-dev.mutex);
+   mutex_unlock(tcm_vhost_mutex);
return ret;
 }
 
@@ -908,6 +912,7 @@ static int vhost_scsi_clear_endpoint(
int index, ret, i;
u8 target;
 
+   mutex_lock(tcm_vhost_mutex);
mutex_lock(vs-dev.mutex);
/* Verify that ring has been setup correctly. */
for (index = 0; index  vs-dev.nvqs; ++index) {
@@ -918,8 +923,8 @@ static int vhost_scsi_clear_endpoint(
}
 
if (!vs-vs_tpg) {
-   mutex_unlock(vs-dev.mutex);
-   return 0;
+   ret = 0;
+   goto err_dev;
}
 
for (i = 0; i  VHOST_SCSI_MAX_TARGET; i++) {
@@ -965,13 +970,14 @@ static int vhost_scsi_clear_endpoint(
kfree(vs-vs_tpg);
vs-vs_tpg = NULL;
mutex_unlock(vs-dev.mutex);
-
+   mutex_unlock(tcm_vhost_mutex);
return 0;
 
 err_tpg:
mutex_unlock(tv_tpg-tv_tpg_mutex);
 err_dev:
mutex_unlock(vs-dev.mutex);
+   mutex_unlock(tcm_vhost_mutex);
return ret;
 }
 
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v11 2/4] tcm_vhost: Add hotplug/hotunplug support

2013-04-25 Thread Asias He
In commit 365a7150094 ([SCSI] virtio-scsi: hotplug support for
virtio-scsi), hotplug support is added to virtio-scsi.

This patch adds hotplug and hotunplug support to tcm_vhost.

You can create or delete a LUN in targetcli to hotplug or hotunplug a
LUN in guest.

Signed-off-by: Asias He as...@redhat.com
Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 210 +-
 drivers/vhost/tcm_vhost.h |  10 +++
 2 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 822cd1f..5340fd7 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -71,6 +71,7 @@ enum {
 
 #define VHOST_SCSI_MAX_TARGET  256
 #define VHOST_SCSI_MAX_VQ  128
+#define VHOST_SCSI_MAX_EVENT   128
 
 struct vhost_scsi {
/* Protected by vhost_scsi-dev.mutex */
@@ -82,6 +83,12 @@ struct vhost_scsi {
 
struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */
+
+   struct vhost_work vs_event_work; /* evt injection work item */
+   struct llist_head vs_event_list; /* evt injection queue */
+
+   bool vs_events_missed; /* any missed events, protected by vq-mutex */
+   int vs_events_nr; /* num of pending events, protected by vq-mutex */
 };
 
 /* Local pointer to allocated TCM configfs fabric module */
@@ -349,6 +356,37 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
return 0;
 }
 
+static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt 
*evt)
+{
+   vs-vs_events_nr--;
+   kfree(evt);
+}
+
+static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
+   u32 event, u32 reason)
+{
+   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
+   struct tcm_vhost_evt *evt;
+
+   if (vs-vs_events_nr  VHOST_SCSI_MAX_EVENT) {
+   vs-vs_events_missed = true;
+   return NULL;
+   }
+
+   evt = kzalloc(sizeof(*evt), GFP_KERNEL);
+   if (!evt) {
+   vq_err(vq, Failed to allocate tcm_vhost_evt\n);
+   vs-vs_events_missed = true;
+   return NULL;
+   }
+
+   evt-event.event = event;
+   evt-event.reason = reason;
+   vs-vs_events_nr++;
+
+   return evt;
+}
+
 static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
 {
struct se_cmd *se_cmd = tv_cmd-tvc_se_cmd;
@@ -367,6 +405,75 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd 
*tv_cmd)
kfree(tv_cmd);
 }
 
+static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
+   struct tcm_vhost_evt *evt)
+{
+   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
+   struct virtio_scsi_event *event = evt-event;
+   struct virtio_scsi_event __user *eventp;
+   unsigned out, in;
+   int head, ret;
+
+   if (!vq-private_data) {
+   vs-vs_events_missed = true;
+   return;
+   }
+
+again:
+   vhost_disable_notify(vs-dev, vq);
+   head = vhost_get_vq_desc(vs-dev, vq, vq-iov,
+   ARRAY_SIZE(vq-iov), out, in,
+   NULL, NULL);
+   if (head  0) {
+   vs-vs_events_missed = true;
+   return;
+   }
+   if (head == vq-num) {
+   if (vhost_enable_notify(vs-dev, vq))
+   goto again;
+   vs-vs_events_missed = true;
+   return;
+   }
+
+   if ((vq-iov[out].iov_len != sizeof(struct virtio_scsi_event))) {
+   vq_err(vq, Expecting virtio_scsi_event, got %zu bytes\n,
+   vq-iov[out].iov_len);
+   vs-vs_events_missed = true;
+   return;
+   }
+
+   if (vs-vs_events_missed) {
+   event-event |= VIRTIO_SCSI_T_EVENTS_MISSED;
+   vs-vs_events_missed = false;
+   }
+
+   eventp = vq-iov[out].iov_base;
+   ret = __copy_to_user(eventp, event, sizeof(*event));
+   if (!ret)
+   vhost_add_used_and_signal(vs-dev, vq, head, 0);
+   else
+   vq_err(vq, Faulted on tcm_vhost_send_event\n);
+}
+
+static void tcm_vhost_evt_work(struct vhost_work *work)
+{
+   struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
+   vs_event_work);
+   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
+   struct tcm_vhost_evt *evt;
+   struct llist_node *llnode;
+
+   mutex_lock(vq-mutex);
+   llnode = llist_del_all(vs-vs_event_list);
+   while (llnode) {
+   evt = llist_entry(llnode, struct tcm_vhost_evt, list);
+   llnode = llist_next(llnode);
+   tcm_vhost_do_evt_work(vs, evt);
+   tcm_vhost_free_evt(vs, evt);
+   }
+   mutex_unlock(vq-mutex);
+}
+
 /* Fill in status and signal that we are done processing this command
  *
  * This is 

[PATCH v11 3/4] tcm_vhost: Add ioctl to get and set events missed flag

2013-04-25 Thread Asias He
Signed-off-by: Asias He as...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 17 +
 drivers/vhost/tcm_vhost.h |  3 +++
 2 files changed, 20 insertions(+)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 5340fd7..07217d8 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -1200,8 +1200,11 @@ static long vhost_scsi_ioctl(struct file *f, unsigned 
int ioctl,
struct vhost_scsi_target backend;
void __user *argp = (void __user *)arg;
u64 __user *featurep = argp;
+   u32 __user *eventsp = argp;
+   u32 events_missed;
u64 features;
int r, abi_version = VHOST_SCSI_ABI_VERSION;
+   struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
 
switch (ioctl) {
case VHOST_SCSI_SET_ENDPOINT:
@@ -1222,6 +1225,20 @@ static long vhost_scsi_ioctl(struct file *f, unsigned 
int ioctl,
if (copy_to_user(argp, abi_version, sizeof abi_version))
return -EFAULT;
return 0;
+   case VHOST_SCSI_SET_EVENTS_MISSED:
+   if (get_user(events_missed, eventsp))
+   return -EFAULT;
+   mutex_lock(vq-mutex);
+   vs-vs_events_missed = events_missed;
+   mutex_unlock(vq-mutex);
+   return 0;
+   case VHOST_SCSI_GET_EVENTS_MISSED:
+   mutex_lock(vq-mutex);
+   events_missed = vs-vs_events_missed;
+   mutex_unlock(vq-mutex);
+   if (put_user(events_missed, eventsp))
+   return -EFAULT;
+   return 0;
case VHOST_GET_FEATURES:
features = VHOST_SCSI_FEATURES;
if (copy_to_user(featurep, features, sizeof features))
diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h
index a545a5b..514b9fd 100644
--- a/drivers/vhost/tcm_vhost.h
+++ b/drivers/vhost/tcm_vhost.h
@@ -123,3 +123,6 @@ struct vhost_scsi_target {
 #define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct 
vhost_scsi_target)
 /* Changing this breaks userspace. */
 #define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
+/* Set and get the events missed flag */
+#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
+#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v11 4/4] tcm_vhost: Enable VIRTIO_SCSI_F_HOTPLUG

2013-04-25 Thread Asias He
Everything for hotplug is ready. Let's enable the feature bit.

Signed-off-by: Asias He as...@redhat.com
---
 drivers/vhost/tcm_vhost.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 07217d8..1677238 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -66,7 +66,8 @@ enum {
  * TODO: debug and remove the workaround.
  */
 enum {
-   VHOST_SCSI_FEATURES = VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)
+   VHOST_SCSI_FEATURES = (VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)) |
+ (1ULL  VIRTIO_SCSI_F_HOTPLUG)
 };
 
 #define VHOST_SCSI_MAX_TARGET  256
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v11 0/4] tcm_vhost hotplug

2013-04-25 Thread Michael S. Tsirkin
On Thu, Apr 25, 2013 at 03:35:19PM +0800, Asias He wrote:
 Changes in v11
 - Drop change log histroy in commit log
 
 Changes in v10
 - Drop comments about lun
 - Add Enable VIRTIO_SCSI_F_HOTPLUG to this series
 
 Changes in v9
 - Drop tcm_vhost_check_feature
 - Add Refactor the lock nesting rule to this sereis
 
 Asias He (4):
   tcm_vhost: Refactor the lock nesting rule
   tcm_vhost: Add hotplug/hotunplug support
   tcm_vhost: Add ioctl to get and set events missed flag
   tcm_vhost: Enable VIRTIO_SCSI_F_HOTPLUG
 
  drivers/vhost/tcm_vhost.c | 262 
 +++---
  drivers/vhost/tcm_vhost.h |  13 +++
  2 files changed, 259 insertions(+), 16 deletions(-)


Acked-by: Michael S. Tsirkin m...@redhat.com

 -- 
 1.8.1.4
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v11 1/4] tcm_vhost: Refactor the lock nesting rule

2013-04-25 Thread Michael S. Tsirkin
On Thu, Apr 25, 2013 at 03:35:20PM +0800, Asias He wrote:
 We want to use tcm_vhost_mutex to make sure hotplug/hotunplug will not
 happen when set_endpoint/clear_endpoint is in process.
 
 Signed-off-by: Asias He as...@redhat.com

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  drivers/vhost/tcm_vhost.c | 32 +++-
  1 file changed, 19 insertions(+), 13 deletions(-)
 
 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 957a0b9..822cd1f 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -808,6 +808,9 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
  /*
   * Called from vhost_scsi_ioctl() context to walk the list of available
   * tcm_vhost_tpg with an active struct tcm_vhost_nexus
 + *
 + *  The lock nesting rule is:
 + *tcm_vhost_mutex - vs-dev.mutex - tpg-tv_tpg_mutex - vq-mutex
   */
  static int vhost_scsi_set_endpoint(
   struct vhost_scsi *vs,
 @@ -820,26 +823,27 @@ static int vhost_scsi_set_endpoint(
   int index, ret, i, len;
   bool match = false;
  
 + mutex_lock(tcm_vhost_mutex);
   mutex_lock(vs-dev.mutex);
 +
   /* Verify that ring has been setup correctly. */
   for (index = 0; index  vs-dev.nvqs; ++index) {
   /* Verify that ring has been setup correctly. */
   if (!vhost_vq_access_ok(vs-vqs[index])) {
 - mutex_unlock(vs-dev.mutex);
 - return -EFAULT;
 + ret = -EFAULT;
 + goto out;
   }
   }
  
   len = sizeof(vs_tpg[0]) * VHOST_SCSI_MAX_TARGET;
   vs_tpg = kzalloc(len, GFP_KERNEL);
   if (!vs_tpg) {
 - mutex_unlock(vs-dev.mutex);
 - return -ENOMEM;
 + ret = -ENOMEM;
 + goto out;
   }
   if (vs-vs_tpg)
   memcpy(vs_tpg, vs-vs_tpg, len);
  
 - mutex_lock(tcm_vhost_mutex);
   list_for_each_entry(tv_tpg, tcm_vhost_list, tv_tpg_list) {
   mutex_lock(tv_tpg-tv_tpg_mutex);
   if (!tv_tpg-tpg_nexus) {
 @@ -854,11 +858,10 @@ static int vhost_scsi_set_endpoint(
  
   if (!strcmp(tv_tport-tport_name, t-vhost_wwpn)) {
   if (vs-vs_tpg  vs-vs_tpg[tv_tpg-tport_tpgt]) {
 - mutex_unlock(tv_tpg-tv_tpg_mutex);
 - mutex_unlock(tcm_vhost_mutex);
 - mutex_unlock(vs-dev.mutex);
   kfree(vs_tpg);
 - return -EEXIST;
 + mutex_unlock(tv_tpg-tv_tpg_mutex);
 + ret = -EEXIST;
 + goto out;
   }
   tv_tpg-tv_tpg_vhost_count++;
   vs_tpg[tv_tpg-tport_tpgt] = tv_tpg;
 @@ -867,7 +870,6 @@ static int vhost_scsi_set_endpoint(
   }
   mutex_unlock(tv_tpg-tv_tpg_mutex);
   }
 - mutex_unlock(tcm_vhost_mutex);
  
   if (match) {
   memcpy(vs-vs_vhost_wwpn, t-vhost_wwpn,
 @@ -893,7 +895,9 @@ static int vhost_scsi_set_endpoint(
   kfree(vs-vs_tpg);
   vs-vs_tpg = vs_tpg;
  
 +out:
   mutex_unlock(vs-dev.mutex);
 + mutex_unlock(tcm_vhost_mutex);
   return ret;
  }
  
 @@ -908,6 +912,7 @@ static int vhost_scsi_clear_endpoint(
   int index, ret, i;
   u8 target;
  
 + mutex_lock(tcm_vhost_mutex);
   mutex_lock(vs-dev.mutex);
   /* Verify that ring has been setup correctly. */
   for (index = 0; index  vs-dev.nvqs; ++index) {
 @@ -918,8 +923,8 @@ static int vhost_scsi_clear_endpoint(
   }
  
   if (!vs-vs_tpg) {
 - mutex_unlock(vs-dev.mutex);
 - return 0;
 + ret = 0;
 + goto err_dev;
   }
  
   for (i = 0; i  VHOST_SCSI_MAX_TARGET; i++) {
 @@ -965,13 +970,14 @@ static int vhost_scsi_clear_endpoint(
   kfree(vs-vs_tpg);
   vs-vs_tpg = NULL;
   mutex_unlock(vs-dev.mutex);
 -
 + mutex_unlock(tcm_vhost_mutex);
   return 0;
  
  err_tpg:
   mutex_unlock(tv_tpg-tv_tpg_mutex);
  err_dev:
   mutex_unlock(vs-dev.mutex);
 + mutex_unlock(tcm_vhost_mutex);
   return ret;
  }
  
 -- 
 1.8.1.4
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v11 2/4] tcm_vhost: Add hotplug/hotunplug support

2013-04-25 Thread Michael S. Tsirkin
On Thu, Apr 25, 2013 at 03:35:21PM +0800, Asias He wrote:
 In commit 365a7150094 ([SCSI] virtio-scsi: hotplug support for
 virtio-scsi), hotplug support is added to virtio-scsi.
 
 This patch adds hotplug and hotunplug support to tcm_vhost.
 
 You can create or delete a LUN in targetcli to hotplug or hotunplug a
 LUN in guest.
 
 Signed-off-by: Asias He as...@redhat.com
 Reviewed-by: Stefan Hajnoczi stefa...@redhat.com

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  drivers/vhost/tcm_vhost.c | 210 
 +-
  drivers/vhost/tcm_vhost.h |  10 +++
  2 files changed, 218 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 822cd1f..5340fd7 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -71,6 +71,7 @@ enum {
  
  #define VHOST_SCSI_MAX_TARGET256
  #define VHOST_SCSI_MAX_VQ128
 +#define VHOST_SCSI_MAX_EVENT 128
  
  struct vhost_scsi {
   /* Protected by vhost_scsi-dev.mutex */
 @@ -82,6 +83,12 @@ struct vhost_scsi {
  
   struct vhost_work vs_completion_work; /* cmd completion work item */
   struct llist_head vs_completion_list; /* cmd completion queue */
 +
 + struct vhost_work vs_event_work; /* evt injection work item */
 + struct llist_head vs_event_list; /* evt injection queue */
 +
 + bool vs_events_missed; /* any missed events, protected by vq-mutex */
 + int vs_events_nr; /* num of pending events, protected by vq-mutex */
  };
  
  /* Local pointer to allocated TCM configfs fabric module */
 @@ -349,6 +356,37 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
   return 0;
  }
  
 +static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt 
 *evt)
 +{
 + vs-vs_events_nr--;
 + kfree(evt);
 +}
 +
 +static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
 + u32 event, u32 reason)
 +{
 + struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
 + struct tcm_vhost_evt *evt;
 +
 + if (vs-vs_events_nr  VHOST_SCSI_MAX_EVENT) {
 + vs-vs_events_missed = true;
 + return NULL;
 + }
 +
 + evt = kzalloc(sizeof(*evt), GFP_KERNEL);
 + if (!evt) {
 + vq_err(vq, Failed to allocate tcm_vhost_evt\n);
 + vs-vs_events_missed = true;
 + return NULL;
 + }
 +
 + evt-event.event = event;
 + evt-event.reason = reason;
 + vs-vs_events_nr++;
 +
 + return evt;
 +}
 +
  static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
  {
   struct se_cmd *se_cmd = tv_cmd-tvc_se_cmd;
 @@ -367,6 +405,75 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd 
 *tv_cmd)
   kfree(tv_cmd);
  }
  
 +static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
 + struct tcm_vhost_evt *evt)
 +{
 + struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
 + struct virtio_scsi_event *event = evt-event;
 + struct virtio_scsi_event __user *eventp;
 + unsigned out, in;
 + int head, ret;
 +
 + if (!vq-private_data) {
 + vs-vs_events_missed = true;
 + return;
 + }
 +
 +again:
 + vhost_disable_notify(vs-dev, vq);
 + head = vhost_get_vq_desc(vs-dev, vq, vq-iov,
 + ARRAY_SIZE(vq-iov), out, in,
 + NULL, NULL);
 + if (head  0) {
 + vs-vs_events_missed = true;
 + return;
 + }
 + if (head == vq-num) {
 + if (vhost_enable_notify(vs-dev, vq))
 + goto again;
 + vs-vs_events_missed = true;
 + return;
 + }
 +
 + if ((vq-iov[out].iov_len != sizeof(struct virtio_scsi_event))) {
 + vq_err(vq, Expecting virtio_scsi_event, got %zu bytes\n,
 + vq-iov[out].iov_len);
 + vs-vs_events_missed = true;
 + return;
 + }
 +
 + if (vs-vs_events_missed) {
 + event-event |= VIRTIO_SCSI_T_EVENTS_MISSED;
 + vs-vs_events_missed = false;
 + }
 +
 + eventp = vq-iov[out].iov_base;
 + ret = __copy_to_user(eventp, event, sizeof(*event));
 + if (!ret)
 + vhost_add_used_and_signal(vs-dev, vq, head, 0);
 + else
 + vq_err(vq, Faulted on tcm_vhost_send_event\n);
 +}
 +
 +static void tcm_vhost_evt_work(struct vhost_work *work)
 +{
 + struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
 + vs_event_work);
 + struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
 + struct tcm_vhost_evt *evt;
 + struct llist_node *llnode;
 +
 + mutex_lock(vq-mutex);
 + llnode = llist_del_all(vs-vs_event_list);
 + while (llnode) {
 + evt = llist_entry(llnode, struct tcm_vhost_evt, list);
 + llnode = llist_next(llnode);
 + tcm_vhost_do_evt_work(vs, evt);
 + tcm_vhost_free_evt(vs, evt);
 + }
 + mutex_unlock(vq-mutex);
 +}
 

Re: [PATCH v11 3/4] tcm_vhost: Add ioctl to get and set events missed flag

2013-04-25 Thread Michael S. Tsirkin
On Thu, Apr 25, 2013 at 03:35:22PM +0800, Asias He wrote:
 Signed-off-by: Asias He as...@redhat.com

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  drivers/vhost/tcm_vhost.c | 17 +
  drivers/vhost/tcm_vhost.h |  3 +++
  2 files changed, 20 insertions(+)
 
 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 5340fd7..07217d8 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -1200,8 +1200,11 @@ static long vhost_scsi_ioctl(struct file *f, unsigned 
 int ioctl,
   struct vhost_scsi_target backend;
   void __user *argp = (void __user *)arg;
   u64 __user *featurep = argp;
 + u32 __user *eventsp = argp;
 + u32 events_missed;
   u64 features;
   int r, abi_version = VHOST_SCSI_ABI_VERSION;
 + struct vhost_virtqueue *vq = vs-vqs[VHOST_SCSI_VQ_EVT];
  
   switch (ioctl) {
   case VHOST_SCSI_SET_ENDPOINT:
 @@ -1222,6 +1225,20 @@ static long vhost_scsi_ioctl(struct file *f, unsigned 
 int ioctl,
   if (copy_to_user(argp, abi_version, sizeof abi_version))
   return -EFAULT;
   return 0;
 + case VHOST_SCSI_SET_EVENTS_MISSED:
 + if (get_user(events_missed, eventsp))
 + return -EFAULT;
 + mutex_lock(vq-mutex);
 + vs-vs_events_missed = events_missed;
 + mutex_unlock(vq-mutex);
 + return 0;
 + case VHOST_SCSI_GET_EVENTS_MISSED:
 + mutex_lock(vq-mutex);
 + events_missed = vs-vs_events_missed;
 + mutex_unlock(vq-mutex);
 + if (put_user(events_missed, eventsp))
 + return -EFAULT;
 + return 0;
   case VHOST_GET_FEATURES:
   features = VHOST_SCSI_FEATURES;
   if (copy_to_user(featurep, features, sizeof features))
 diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h
 index a545a5b..514b9fd 100644
 --- a/drivers/vhost/tcm_vhost.h
 +++ b/drivers/vhost/tcm_vhost.h
 @@ -123,3 +123,6 @@ struct vhost_scsi_target {
  #define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct 
 vhost_scsi_target)
  /* Changing this breaks userspace. */
  #define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
 +/* Set and get the events missed flag */
 +#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
 +#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
 -- 
 1.8.1.4
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v11 4/4] tcm_vhost: Enable VIRTIO_SCSI_F_HOTPLUG

2013-04-25 Thread Michael S. Tsirkin
On Thu, Apr 25, 2013 at 03:35:23PM +0800, Asias He wrote:
 Everything for hotplug is ready. Let's enable the feature bit.
 
 Signed-off-by: Asias He as...@redhat.com

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  drivers/vhost/tcm_vhost.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
 index 07217d8..1677238 100644
 --- a/drivers/vhost/tcm_vhost.c
 +++ b/drivers/vhost/tcm_vhost.c
 @@ -66,7 +66,8 @@ enum {
   * TODO: debug and remove the workaround.
   */
  enum {
 - VHOST_SCSI_FEATURES = VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)
 + VHOST_SCSI_FEATURES = (VHOST_FEATURES  (~VIRTIO_RING_F_EVENT_IDX)) |
 +   (1ULL  VIRTIO_SCSI_F_HOTPLUG)
  };
  
  #define VHOST_SCSI_MAX_TARGET256
 -- 
 1.8.1.4
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/12] Subject: [PATCH 01/10] nEPT: Support LOAD_IA32_EFER entry/exit controls for L1

2013-04-25 Thread Nakajima, Jun
Recent KVM, since http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
switch the EFER MSR when EPT is used and the host and guest have different
NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
and want to be able to run recent KVM as L1, we need to allow L1 to use this
EFER switching feature.

To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if available,
and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
support for the former (the latter is still unsupported).

Nested entry and exit emulation (prepare_vmcs_02 and load_vmcs12_host_state,
respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So all
that's left to do in this patch is to properly advertise this feature to L1.

Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by using
vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
support this feature, regardless of whether the host supports it.

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/kvm/vmx.c
---
 arch/x86/kvm/vmx.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6667042..9e0ec9d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2057,6 +2057,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 #else
  nested_vmx_exit_ctls_high = 0;
 #endif
+ nested_vmx_exit_ctls_high |= VM_EXIT_LOAD_IA32_EFER;

  /* entry controls */
  rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2064,6 +2065,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
  nested_vmx_entry_ctls_low = 0;
  nested_vmx_entry_ctls_high =
  VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
+ nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_IA32_EFER;

  /* cpu-based controls */
  rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -7050,10 +7052,18 @@ static void prepare_vmcs02(struct kvm_vcpu
*vcpu, struct vmcs12 *vmcs12)
  vcpu-arch.cr0_guest_owned_bits = ~vmcs12-cr0_guest_host_mask;
  vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu-arch.cr0_guest_owned_bits);

- /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */
- vmcs_write32(VM_EXIT_CONTROLS,
- vmcs12-vm_exit_controls | vmcs_config.vmexit_ctrl);
- vmcs_write32(VM_ENTRY_CONTROLS, vmcs12-vm_entry_controls |
+ /* L2-L1 exit controls are emulated - the hardware exit is to L0 so
+ * we should use its exit controls. Note that IA32_MODE, LOAD_IA32_EFER
+ * bits are further modified by vmx_set_efer() below.
+ */
+ vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+
+ /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
+ * emulated by vmx_set_efer(), below.
+ */
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ (vmcs12-vm_entry_controls  ~VM_ENTRY_LOAD_IA32_EFER 
+ ~VM_ENTRY_IA32E_MODE) |
  (vmcs_config.vmentry_ctrl  ~VM_ENTRY_IA32E_MODE));

  if (vmcs12-vm_entry_controls  VM_ENTRY_LOAD_IA32_PAT)
--
1.8.2.1.610.g562af5b
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/12] Subject: [PATCH 02/10] nEPT: Add EPT tables support to paging_tmpl.h

2013-04-25 Thread Nakajima, Jun
This is the first patch in a series which adds nested EPT support to KVM's
nested VMX. Nested EPT means emulating EPT for an L1 guest so that L1 can use
EPT when running a nested guest L2. When L1 uses EPT, it allows the L2 guest
to set its own cr3 and take its own page faults without either of L0 or L1
getting involved. This often significanlty improves L2's performance over the
previous two alternatives (shadow page tables over EPT, and shadow page
tables over shadow page tables).

This patch adds EPT support to paging_tmpl.h.

paging_tmpl.h contains the code for reading and writing page tables. The code
for 32-bit and 64-bit tables is very similar, but not identical, so
paging_tmpl.h is #include'd twice in mmu.c, once with PTTTYPE=32 and once
with PTTYPE=64, and this generates the two sets of similar functions.

There are subtle but important differences between the format of EPT tables
and that of ordinary x86 64-bit page tables, so for nested EPT we need a
third set of functions to read the guest EPT table and to write the shadow
EPT table.

So this patch adds third PTTYPE, PTTYPE_EPT, which creates functions (prefixed
with EPT) which correctly read and write EPT tables.

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/kvm/mmu.c
modified:   arch/x86/kvm/paging_tmpl.h
---
 arch/x86/kvm/mmu.c |   5 ++
 arch/x86/kvm/paging_tmpl.h | 135 ++---
 2 files changed, 131 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 956ca35..91cac19 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3418,6 +3418,11 @@ static inline bool is_last_gpte(struct kvm_mmu
*mmu, unsigned level, unsigned gp
  return mmu-last_pte_bitmap  (1  index);
 }

+#define PTTYPE_EPT 18 /* arbitrary */
+#define PTTYPE PTTYPE_EPT
+#include paging_tmpl.h
+#undef PTTYPE
+
 #define PTTYPE 64
 #include paging_tmpl.h
 #undef PTTYPE
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 105dd5b..6226b51 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -50,6 +50,22 @@
  #define PT_LEVEL_BITS PT32_LEVEL_BITS
  #define PT_MAX_FULL_LEVELS 2
  #define CMPXCHG cmpxchg
+#elif PTTYPE == PTTYPE_EPT
+ #define pt_element_t u64
+ #define guest_walker guest_walkerEPT
+ #define FNAME(name) EPT_##name
+ #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
+ #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
+ #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
+ #define PT_INDEX(addr, level) PT64_INDEX(addr, level)
+ #define PT_LEVEL_BITS PT64_LEVEL_BITS
+ #ifdef CONFIG_X86_64
+ #define PT_MAX_FULL_LEVELS 4
+ #define CMPXCHG cmpxchg
+ #else
+ #define CMPXCHG cmpxchg64
+ #define PT_MAX_FULL_LEVELS 2
+ #endif
 #else
  #error Invalid PTTYPE value
 #endif
@@ -80,6 +96,7 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
  return (gpte  PT_LVL_ADDR_MASK(lvl))  PAGE_SHIFT;
 }

+#if PTTYPE != PTTYPE_EPT
 static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
pt_element_t __user *ptep_user, unsigned index,
pt_element_t orig_pte, pt_element_t new_pte)
@@ -102,7 +119,52 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu
*vcpu, struct kvm_mmu *mmu,

  return (ret != orig_pte);
 }
+#endif
+
+static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
+{
+ unsigned access;
+
+#if PTTYPE == PTTYPE_EPT
+ /* We rely here that ACC_WRITE_MASK==VMX_EPT_WRITABLE_MASK */
+ access = (gpte  VMX_EPT_WRITABLE_MASK) | ACC_USER_MASK |
+ ((gpte  VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0);
+#else
+ access = (gpte  (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
+ access = ~(gpte  PT64_NX_SHIFT);
+#endif
+
+ return access;
+}
+
+static inline int FNAME(is_present_gpte)(unsigned long pte)
+{
+#if PTTYPE == PTTYPE_EPT
+ return pte  (VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK |
+ VMX_EPT_EXECUTABLE_MASK);
+#else
+ return is_present_gpte(pte);
+#endif
+}
+
+static inline int FNAME(check_write_user_access)(struct kvm_vcpu *vcpu,
+   bool write_fault, bool user_fault,
+   unsigned long pte)
+{
+#if PTTYPE == PTTYPE_EPT
+ if (unlikely(write_fault  !(pte  VMX_EPT_WRITABLE_MASK)
+  (user_fault || is_write_protection(vcpu
+ return false;
+ return true;
+#else
+ u32 access = ((kvm_x86_ops-get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
+| (write_fault ? PFERR_WRITE_MASK : 0);
+
+ return !permission_fault(vcpu-arch.walk_mmu, vcpu-arch.access, access);
+#endif
+}

+#if PTTYPE != PTTYPE_EPT
 static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
  struct kvm_mmu *mmu,
  struct guest_walker *walker,
@@ -139,6 +201,7 @@ static int
FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
  }
  return 0;
 }
+#endif

 /*
  * Fetch a guest pte for a guest virtual address
@@ -147,7 +210,6 @@ static int FNAME(walk_addr_generic)(struct
guest_walker *walker,
 struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 

[PATCH 03/12] Subject: [PATCH 03/10] nEPT: MMU context for nested EPT

2013-04-25 Thread Nakajima, Jun
KVM's existing shadow MMU code already supports nested TDP. To use it, we
need to set up a new MMU context for nested EPT, and create a few callbacks
for it (nested_ept_*()). This context should also use the EPT versions of
the page table access functions (defined in the previous patch).
Then, we need to switch back and forth between this nested context and the
regular MMU context when switching between L1 and L2 (when L1 runs this L2
with EPT).

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/kvm/mmu.c
modified:   arch/x86/kvm/mmu.h
modified:   arch/x86/kvm/vmx.c
---
 arch/x86/kvm/mmu.c | 38 
 arch/x86/kvm/mmu.h |  1 +
 arch/x86/kvm/vmx.c | 56 +++---
 3 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 91cac19..34e406e2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3674,6 +3674,44 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu,
struct kvm_mmu *context)
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);

+int kvm_init_shadow_EPT_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+{
+ ASSERT(vcpu);
+ ASSERT(!VALID_PAGE(vcpu-arch.mmu.root_hpa));
+
+ context-shadow_root_level = kvm_x86_ops-get_tdp_level();
+
+ context-nx = is_nx(vcpu); /* TODO: ? */
+ context-new_cr3 = paging_new_cr3;
+ context-page_fault = EPT_page_fault;
+ context-gva_to_gpa = EPT_gva_to_gpa;
+ context-sync_page = EPT_sync_page;
+ context-invlpg = EPT_invlpg;
+ context-update_pte = EPT_update_pte;
+ context-free = paging_free;
+ context-root_level = context-shadow_root_level;
+ context-root_hpa = INVALID_PAGE;
+ context-direct_map = false;
+
+ /* TODO: reset_rsvds_bits_mask() is not built for EPT, we need
+   something different.
+ */
+ reset_rsvds_bits_mask(vcpu, context);
+
+
+ /* TODO: I copied these from kvm_init_shadow_mmu, I don't know why
+   they are done, or why they write to vcpu-arch.mmu and not context
+ */
+ vcpu-arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
+ vcpu-arch.mmu.base_role.cr0_wp  = is_write_protection(vcpu);
+ vcpu-arch.mmu.base_role.smep_andnot_wp =
+ kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) 
+ !is_write_protection(vcpu);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_init_shadow_EPT_mmu);
+
 static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
  int r = kvm_init_shadow_mmu(vcpu, vcpu-arch.walk_mmu);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 6987108..19dd5ab 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -54,6 +54,7 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu
*vcpu, u64 addr, u64 sptes[4]);
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr,
bool direct);
 int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+int kvm_init_shadow_EPT_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);

 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9e0ec9d..f2fd79d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -912,12 +912,16 @@ static inline bool nested_cpu_has2(struct vmcs12
*vmcs12, u32 bit)
  (vmcs12-secondary_vm_exec_control  bit);
 }

-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
- struct kvm_vcpu *vcpu)
+static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
 {
  return vmcs12-pin_based_vm_exec_control  PIN_BASED_VIRTUAL_NMIS;
 }

+static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
+}
+
 static inline bool is_exception(u32 intr_info)
 {
  return (intr_info  (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -6873,6 +6877,46 @@ static void vmx_set_supported_cpuid(u32 func,
struct kvm_cpuid_entry2 *entry)
  entry-ecx |= bit(X86_FEATURE_VMX);
 }

+/* Callbacks for nested_ept_init_mmu_context: */
+
+static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
+{
+ /* return the page table to be shadowed - in our case, EPT12 */
+ return get_vmcs12(vcpu)-ept_pointer;
+}
+
+static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault)
+{
+ struct vmcs12 *vmcs12;
+ nested_vmx_vmexit(vcpu);
+ vmcs12 = get_vmcs12(vcpu);
+ /*
+ * Note no need to set vmcs12-vm_exit_reason as it is already copied
+ * from vmcs02 in nested_vmx_vmexit() above, i.e., EPT_VIOLATION.
+ */
+ vmcs12-exit_qualification = fault-error_code;
+ vmcs12-guest_physical_address = fault-address;
+}
+
+static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+{
+ int r = kvm_init_shadow_EPT_mmu(vcpu, vcpu-arch.mmu);
+
+ vcpu-arch.mmu.set_cr3   = vmx_set_cr3;
+ vcpu-arch.mmu.get_cr3   = nested_ept_get_cr3;
+ vcpu-arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
+
+ vcpu-arch.walk_mmu  = vcpu-arch.nested_mmu;
+
+ return r;
+}
+
+static void 

[PATCH 04/12] Subject: [PATCH 04/10] nEPT: Fix cr3 handling in nested exit and entry

2013-04-25 Thread Nakajima, Jun
The existing code for handling cr3 and related VMCS fields during nested
exit and entry wasn't correct in all cases:

If L2 is allowed to control cr3 (and this is indeed the case in nested EPT),
during nested exit we must copy the modified cr3 from vmcs02 to vmcs12, and
we forgot to do so. This patch adds this copy.

If L0 isn't controlling cr3 when running L2 (i.e., L0 is using EPT), and
whoever does control cr3 (L1 or L2) is using PAE, the processor might have
saved PDPTEs and we should also save them in vmcs12 (and restore later).

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/kvm/vmx.c
---
 arch/x86/kvm/vmx.c | 37 -
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f2fd79d..d4bfd32 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7162,10 +7162,26 @@ static void prepare_vmcs02(struct kvm_vcpu
*vcpu, struct vmcs12 *vmcs12)
  vmx_set_cr4(vcpu, vmcs12-guest_cr4);
  vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));

- /* shadow page tables on either EPT or shadow page tables */
+ /*
+ * Note that kvm_set_cr3() and kvm_mmu_reset_context() will do the
+ * right thing, and set GUEST_CR3 and/or EPT_POINTER in all supported
+ * settings: 1. shadow page tables on shadow page tables, 2. shadow
+ * page tables on EPT, 3. EPT on EPT.
+ */
  kvm_set_cr3(vcpu, vmcs12-guest_cr3);
  kvm_mmu_reset_context(vcpu);

+ /*
+ * Additionally, except when L0 is using shadow page tables, L1 or
+ * L2 control guest_cr3 for L2, so they may also have saved PDPTEs
+ */
+ if (enable_ept) {
+ vmcs_write64(GUEST_PDPTR0, vmcs12-guest_pdptr0);
+ vmcs_write64(GUEST_PDPTR1, vmcs12-guest_pdptr1);
+ vmcs_write64(GUEST_PDPTR2, vmcs12-guest_pdptr2);
+ vmcs_write64(GUEST_PDPTR3, vmcs12-guest_pdptr3);
+ }
+
  kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12-guest_rsp);
  kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12-guest_rip);
 }
@@ -7397,6 +7413,25 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
  vmcs12-guest_pending_dbg_exceptions =
  vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);

+ /*
+ * In some cases (usually, nested EPT), L2 is allowed to change its
+ * own CR3 without exiting. If it has changed it, we must keep it.
+ * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
+ * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
+ */
+ if (enable_ept)
+ vmcs12-guest_cr3 = vmcs_read64(GUEST_CR3);
+ /*
+ * Additionally, except when L0 is using shadow page tables, L1 or
+ * L2 control guest_cr3 for L2, so save their PDPTEs
+ */
+ if (enable_ept) {
+ vmcs12-guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
+ vmcs12-guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
+ vmcs12-guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
+ vmcs12-guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+ }
+
  /* TODO: These cannot have changed unless we have MSR bitmaps and
  * the relevant bit asks not to trap the change */
  vmcs12-guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
--
1.8.2.1.610.g562af5b
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/12] Subject: [PATCH 05/10] nEPT: Fix wrong test in kvm_set_cr3

2013-04-25 Thread Nakajima, Jun
kvm_set_cr3() attempts to check if the new cr3 is a valid guest physical
address. The problem is that with nested EPT, cr3 is an *L2* physical
address, not an L1 physical address as this test expects.

As the comment above this test explains, it isn't necessary, and doesn't
correspond to anything a real processor would do. So this patch removes it.

Note that this wrong test could have also theoretically caused problems
in nested NPT, not just in nested EPT. However, in practice, the problem
was avoided: nested_svm_vmexit()/vmrun() do not call kvm_set_cr3 in the
nested NPT case, and instead set the vmcb (and arch.cr3) directly, thus
circumventing the problem. Additional potential calls to the buggy function
are avoided in that we don't trap cr3 modifications when nested NPT is
enabled. However, because in nested VMX we did want to use kvm_set_cr3()
(as requested in Avi Kivity's review of the original nested VMX patches),
we can't avoid this problem and need to fix it.

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/kvm/x86.c
---
 arch/x86/kvm/x86.c | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e172132..c34590d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -659,17 +659,6 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
  */
  }

- /*
- * Does the new cr3 value map to physical memory? (Note, we
- * catch an invalid cr3 even in real-mode, because it would
- * cause trouble later on when we turn on paging anyway.)
- *
- * A real CPU would silently accept an invalid cr3 and would
- * attempt to use it - with largely undefined (and often hard
- * to debug) behavior on the guest side.
- */
- if (unlikely(!gfn_to_memslot(vcpu-kvm, cr3  PAGE_SHIFT)))
- return 1;
  vcpu-arch.cr3 = cr3;
  __set_bit(VCPU_EXREG_CR3, (ulong *)vcpu-arch.regs_avail);
  vcpu-arch.mmu.new_cr3(vcpu);
--
1.8.2.1.610.g562af5b
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/12] Subject: [PATCH 06/10] nEPT: Some additional comments

2013-04-25 Thread Nakajima, Jun
Some additional comments to preexisting code:
Explain who (L0 or L1) handles EPT violation and misconfiguration exits.
Don't mention shadow on either EPT or shadow as the only two options.

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/kvm/vmx.c
---
 arch/x86/kvm/vmx.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d4bfd32..0e99b15 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6126,7 +6126,20 @@ static bool nested_vmx_exit_handled(struct
kvm_vcpu *vcpu)
  return nested_cpu_has2(vmcs12,
  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
  case EXIT_REASON_EPT_VIOLATION:
+ /*
+ * L0 always deals with the EPT violation. If nested EPT is
+ * used, and the nested mmu code discovers that the address is
+ * missing in the guest EPT table (EPT12), the EPT violation
+ * will be injected with nested_ept_inject_page_fault()
+ */
+ return 0;
  case EXIT_REASON_EPT_MISCONFIG:
+ /*
+ * L2 never uses directly L1's EPT, but rather L0's own EPT
+ * table (shadow on EPT) or a merged EPT table that L0 built
+ * (EPT on EPT). So any problems with the structure of the
+ * table is L0's fault.
+ */
  return 0;
  case EXIT_REASON_WBINVD:
  return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
--
1.8.2.1.610.g562af5b
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/12] Subject: [PATCH 07/10] nEPT: Advertise EPT to L1

2013-04-25 Thread Nakajima, Jun
Advertise the support of EPT to the L1 guest, through the appropriate MSR.

This is the last patch of the basic Nested EPT feature, so as to allow
bisection through this patch series: The guest will not see EPT support until
this last patch, and will not attempt to use the half-applied feature.

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/kvm/vmx.c
---
 arch/x86/kvm/vmx.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0e99b15..a5e14d1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2026,6 +2026,7 @@ static u32 nested_vmx_secondary_ctls_low,
nested_vmx_secondary_ctls_high;
 static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
 static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
 static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
+static u32 nested_vmx_ept_caps;
 static __init void nested_vmx_setup_ctls_msrs(void)
 {
  /*
@@ -2101,6 +2102,18 @@ static __init void nested_vmx_setup_ctls_msrs(void)
  nested_vmx_secondary_ctls_low = 0;
  nested_vmx_secondary_ctls_high =
  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ if (enable_ept) {
+ /* nested EPT: emulate EPT also to L1 */
+ nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
+ nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT;
+ nested_vmx_ept_caps |=
+ VMX_EPT_INVEPT_BIT | VMX_EPT_EXTENT_GLOBAL_BIT |
+ VMX_EPT_EXTENT_CONTEXT_BIT |
+ VMX_EPT_EXTENT_INDIVIDUAL_BIT;
+ nested_vmx_ept_caps = vmx_capability.ept;
+ } else
+ nested_vmx_ept_caps = 0;
+
 }

 static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
@@ -2200,8 +2213,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu
*vcpu, u32 msr_index, u64 *pdata)
  nested_vmx_secondary_ctls_high);
  break;
  case MSR_IA32_VMX_EPT_VPID_CAP:
- /* Currently, no nested ept or nested vpid */
- *pdata = 0;
+ /* Currently, no nested vpid support */
+ *pdata = nested_vmx_ept_caps;
  break;
  default:
  return 0;
--
1.8.2.1.610.g562af5b
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/12] Subject: [PATCH 08/10] nEPT: Nested INVEPT

2013-04-25 Thread Nakajima, Jun
If we let L1 use EPT, we should probably also support the INVEPT instruction.

In our current nested EPT implementation, when L1 changes its EPT table for
L2 (i.e., EPT12), L0 modifies the shadow EPT table (EPT02), and in the course
of this modification already calls INVEPT. Therefore, when L1 calls INVEPT,
we don't really need to do anything. In particular we *don't* need to call
the real INVEPT again. All we do in our INVEPT is verify the validity of the
call, and its parameters, and then do nothing.

In KVM Forum 2010, Dong et al. presented Nested Virtualization Friendly KVM
and classified our current nested EPT implementation as shadow-like virtual
EPT. He recommended instead a different approach, which he called VTLB-like
virtual EPT. If we had taken that alternative approach, INVEPT would have had
a bigger role: L0 would only rebuild the shadow EPT table when L1 calls INVEPT.

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/include/asm/vmx.h
modified:   arch/x86/kvm/vmx.c
---
 arch/x86/include/asm/vmx.h |  4 ++-
 arch/x86/kvm/vmx.c | 83 ++
 2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index b6fbf86..0ce54f3 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -376,7 +376,9 @@ enum vmcs_field {
 #define VMX_EPTP_WB_BIT (1ull  14)
 #define VMX_EPT_2MB_PAGE_BIT (1ull  16)
 #define VMX_EPT_1GB_PAGE_BIT (1ull  17)
-#define VMX_EPT_AD_BIT(1ull  21)
+#define VMX_EPT_INVEPT_BIT (1ull  20)
+#define VMX_EPT_AD_BIT (1ull  21)
+#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull  24)
 #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull  25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull  26)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a5e14d1..10f2a69 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5878,6 +5878,87 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
  return 1;
 }

+/* Emulate the INVEPT instruction */
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+ u32 vmx_instruction_info;
+ unsigned long type;
+ gva_t gva;
+ struct x86_exception e;
+ struct {
+ u64 eptp, gpa;
+ } operand;
+
+ if (!(nested_vmx_secondary_ctls_high  SECONDARY_EXEC_ENABLE_EPT) ||
+!(nested_vmx_ept_caps  VMX_EPT_INVEPT_BIT)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ if (!nested_vmx_check_permission(vcpu))
+ return 1;
+
+ if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ /* According to the Intel VMX instruction reference, the memory
+ * operand is read even if it isn't needed (e.g., for type==global)
+ */
+ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ vmx_instruction_info, gva))
+ return 1;
+ if (kvm_read_guest_virt(vcpu-arch.emulate_ctxt, gva, operand,
+ sizeof(operand), e)) {
+ kvm_inject_page_fault(vcpu, e);
+ return 1;
+ }
+
+ type = kvm_register_read(vcpu, (vmx_instruction_info  28)  0xf);
+
+ switch (type) {
+ case VMX_EPT_EXTENT_GLOBAL:
+ if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_GLOBAL_BIT))
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ else {
+ /*
+ * Do nothing: when L1 changes EPT12, we already
+ * update EPT02 (the shadow EPT table) and call INVEPT.
+ * So when L1 calls INVEPT, there's nothing left to do.
+ */
+ nested_vmx_succeed(vcpu);
+ }
+ break;
+ case VMX_EPT_EXTENT_CONTEXT:
+ if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_CONTEXT_BIT))
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ else {
+ /* Do nothing */
+ nested_vmx_succeed(vcpu);
+ }
+ break;
+ case VMX_EPT_EXTENT_INDIVIDUAL_ADDR:
+ if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_INDIVIDUAL_BIT))
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ else {
+ /* Do nothing */
+ nested_vmx_succeed(vcpu);
+ }
+ break;
+ default:
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ }
+
+ skip_emulated_instruction(vcpu);
+ return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -5922,6 +6003,7 @@ static int (*const
kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
  [EXIT_REASON_PAUSE_INSTRUCTION]   = handle_pause,
  [EXIT_REASON_MWAIT_INSTRUCTION]  = handle_invalid_op,
  [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
+ [EXIT_REASON_INVEPT]  = handle_invept,
 };

 static const int kvm_vmx_max_exit_handlers =
@@ -6106,6 +6188,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
  case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
  case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
  case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+ case EXIT_REASON_INVEPT:
  /*
  * VMX instructions trap unconditionally. This allows 

[PATCH 09/12] Subject: [PATCH 09/10] nEPT: Documentation

2013-04-25 Thread Nakajima, Jun
Update the documentation to no longer say that nested EPT is not supported.

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   Documentation/virtual/kvm/nested-vmx.txt
---
 Documentation/virtual/kvm/nested-vmx.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/virtual/kvm/nested-vmx.txt
b/Documentation/virtual/kvm/nested-vmx.txt
index 8ed937d..cdf7839 100644
--- a/Documentation/virtual/kvm/nested-vmx.txt
+++ b/Documentation/virtual/kvm/nested-vmx.txt
@@ -38,8 +38,8 @@ The current code supports running Linux guests under
KVM guests.
 Only 64-bit guest hypervisors are supported.

 Additional patches for running Windows under guest KVM, and Linux under
-guest VMware server, and support for nested EPT, are currently running in
-the lab, and will be sent as follow-on patchsets.
+guest VMware server, are currently running in the lab, and will be sent as
+follow-on patchsets.


 Running nested VMX
--
1.8.2.1.610.g562af5b
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/12] Subject: [PATCH 10/10] nEPT: Miscelleneous cleanups

2013-04-25 Thread Nakajima, Jun
Some trivial code cleanups not really related to nested EPT.

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/include/asm/vmx.h
modified:   arch/x86/kvm/vmx.c
---
 arch/x86/include/asm/vmx.h | 44 
 arch/x86/kvm/vmx.c |  3 +--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 0ce54f3..5838be1 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -254,6 +254,50 @@ enum vmcs_field {
  HOST_RIP= 0x6c16,
 };

+#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x8000
+
+#define EXIT_REASON_EXCEPTION_NMI   0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+#define EXIT_REASON_TRIPLE_FAULT2
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+#define EXIT_REASON_NMI_WINDOW 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID   10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD13
+#define EXIT_REASON_INVLPG  14
+#define EXIT_REASON_RDPMC   15
+#define EXIT_REASON_RDTSC   16
+#define EXIT_REASON_VMCALL  18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD  23
+#define EXIT_REASON_VMRESUME24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF   26
+#define EXIT_REASON_VMON27
+#define EXIT_REASON_CR_ACCESS   28
+#define EXIT_REASON_DR_ACCESS   29
+#define EXIT_REASON_IO_INSTRUCTION  30
+#define EXIT_REASON_MSR_READ31
+#define EXIT_REASON_MSR_WRITE   32
+#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION   40
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EPT_VIOLATION   48
+#define EXIT_REASON_EPT_MISCONFIG   49
+#define EXIT_REASON_INVEPT 50
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_INVPCID 58
+
 /*
  * Interruption-information format
  */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 10f2a69..95304cc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -616,7 +616,6 @@ static void nested_release_page_clean(struct page *page)
 static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
-static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 struct kvm_segment *var, int seg);
@@ -6320,7 +6319,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)

  if (unlikely(!cpu_has_virtual_nmis()  vmx-soft_vnmi_blocked 
 !(is_guest_mode(vcpu)  nested_cpu_has_virtual_nmis(
-get_vmcs12(vcpu), vcpu {
+ get_vmcs12(vcpu) {
  if (vmx_interrupt_allowed(vcpu)) {
  vmx-soft_vnmi_blocked = 0;
  } else if (vmx-vnmi_blocked_time  10LL 
--
1.8.2.1.610.g562af5b
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/12] Provide the correct exit qualification upon EPT violation to L1 VMM.

2013-04-25 Thread Nakajima, Jun
Since vcpu_vmx is contained in vmx.c, use kvm_vcpu_arch so that we can
use the exit quaflication in paging_tmpl.h.

Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/include/asm/kvm_host.h
modified:   arch/x86/kvm/paging_tmpl.h
modified:   arch/x86/kvm/vmx.c
---
 arch/x86/include/asm/kvm_host.h | 2 ++
 arch/x86/kvm/paging_tmpl.h  | 4 
 arch/x86/kvm/vmx.c  | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4979778..5d1fdf2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -504,6 +504,8 @@ struct kvm_vcpu_arch {
  * instruction.
  */
  bool write_fault_to_shadow_pgtable;
+
+ unsigned long exit_qualification;
 };

 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6226b51..0da6044 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -349,7 +349,11 @@ error:

  walker-fault.vector = PF_VECTOR;
  walker-fault.error_code_valid = true;
+#if PTTYPE != PTTYPE_EPT
  walker-fault.error_code = errcode;
+#else
+ walker-fault.error_code = vcpu-arch.exit_qualification  0x7; /*
exit_qualificaiton */
+#endif
  walker-fault.address = addr;
  walker-fault.nested_page_fault = mmu != vcpu-arch.walk_mmu;

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 95304cc..61e2853 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -425,6 +425,7 @@ struct vcpu_vmx {
  ktime_t entry_time;
  s64 vnmi_blocked_time;
  u32 exit_reason;
+ unsigned long exit_qualification;

  bool rdtscp_enabled;

@@ -5074,6 +5075,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
  /* ept page table is present? */
  error_code |= (exit_qualification  3)  0x1;

+vcpu-arch.exit_qualification = exit_qualification;
+
  return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
 }

--
1.8.2.1.610.g562af5b
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/12] Move the routines to paging_tmpl.h to make them diffrent for virtual EPT.

2013-04-25 Thread Nakajima, Jun
Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com

modified:   arch/x86/kvm/mmu.c
---
 arch/x86/kvm/mmu.c | 30 --
 1 file changed, 30 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 34e406e2..99bfc5e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2480,26 +2480,6 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct
kvm_vcpu *vcpu, gfn_t gfn,
  return gfn_to_pfn_memslot_atomic(slot, gfn);
 }

-static bool prefetch_invalid_gpte(struct kvm_vcpu *vcpu,
-  struct kvm_mmu_page *sp, u64 *spte,
-  u64 gpte)
-{
- if (is_rsvd_bits_set(vcpu-arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
- goto no_present;
-
- if (!is_present_gpte(gpte))
- goto no_present;
-
- if (!(gpte  PT_ACCESSED_MASK))
- goto no_present;
-
- return false;
-
-no_present:
- drop_spte(vcpu-kvm, spte);
- return true;
-}
-
 static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
 struct kvm_mmu_page *sp,
 u64 *start, u64 *end)
@@ -3399,16 +3379,6 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t
gfn, unsigned access,
  return false;
 }

-static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
-{
- unsigned access;
-
- access = (gpte  (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
- access = ~(gpte  PT64_NX_SHIFT);
-
- return access;
-}
-
 static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level,
unsigned gpte)
 {
  unsigned index;
--
1.8.2.1.610.g562af5b
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Bug 53611] New: nVMX: Add nested EPT

2013-04-25 Thread Nakajima, Jun
On Wed, Apr 24, 2013 at 8:55 AM, Nakajima, Jun jun.nakaj...@intel.com wrote:
 Sorry about the slow progress. We've been distracted by some priority
 things. The patches are ready (i.e. working), but we are cleaning them
 up. I'll send what we have today.

So, I have sent them, and frankly we are still cleaning up.  Please
bear with us.
We are also sending one more patchset to deal with EPT
misconfiguration, but Linux should run in L2 on top of L1 KVM.

--
Jun
Intel Open Source Technology Center
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/12] Subject: [PATCH 01/10] nEPT: Support LOAD_IA32_EFER entry/exit controls for L1

2013-04-25 Thread Gleb Natapov
All the patches are mangled by your email client. Please use git
send-email --thread to send them.

On Thu, Apr 25, 2013 at 12:50:19AM -0700, Nakajima, Jun wrote:
 Recent KVM, since http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
 switch the EFER MSR when EPT is used and the host and guest have different
 NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
 and want to be able to run recent KVM as L1, we need to allow L1 to use this
 EFER switching feature.
 
 To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if available,
 and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
 support for the former (the latter is still unsupported).
 
 Nested entry and exit emulation (prepare_vmcs_02 and load_vmcs12_host_state,
 respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So all
 that's left to do in this patch is to properly advertise this feature to L1.
 
 Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by using
 vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
 support this feature, regardless of whether the host supports it.
 
 Signed-off-by: Nadav Har'El n...@il.ibm.com
 Signed-off-by: Jun Nakajima jun.nakaj...@intel.com
 
 modified:   arch/x86/kvm/vmx.c
 ---
  arch/x86/kvm/vmx.c | 18 ++
  1 file changed, 14 insertions(+), 4 deletions(-)
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 6667042..9e0ec9d 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2057,6 +2057,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
  #else
   nested_vmx_exit_ctls_high = 0;
  #endif
 + nested_vmx_exit_ctls_high |= VM_EXIT_LOAD_IA32_EFER;
 
   /* entry controls */
   rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
 @@ -2064,6 +2065,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
   nested_vmx_entry_ctls_low = 0;
   nested_vmx_entry_ctls_high =
   VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
 + nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_IA32_EFER;
 
   /* cpu-based controls */
   rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
 @@ -7050,10 +7052,18 @@ static void prepare_vmcs02(struct kvm_vcpu
 *vcpu, struct vmcs12 *vmcs12)
   vcpu-arch.cr0_guest_owned_bits = ~vmcs12-cr0_guest_host_mask;
   vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu-arch.cr0_guest_owned_bits);
 
 - /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */
 - vmcs_write32(VM_EXIT_CONTROLS,
 - vmcs12-vm_exit_controls | vmcs_config.vmexit_ctrl);
 - vmcs_write32(VM_ENTRY_CONTROLS, vmcs12-vm_entry_controls |
 + /* L2-L1 exit controls are emulated - the hardware exit is to L0 so
 + * we should use its exit controls. Note that IA32_MODE, LOAD_IA32_EFER
 + * bits are further modified by vmx_set_efer() below.
 + */
 + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
 +
 + /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
 + * emulated by vmx_set_efer(), below.
 + */
 + vmcs_write32(VM_ENTRY_CONTROLS,
 + (vmcs12-vm_entry_controls  ~VM_ENTRY_LOAD_IA32_EFER 
 + ~VM_ENTRY_IA32E_MODE) |
   (vmcs_config.vmentry_ctrl  ~VM_ENTRY_IA32E_MODE));
 
   if (vmcs12-vm_entry_controls  VM_ENTRY_LOAD_IA32_PAT)
 --
 1.8.2.1.610.g562af5b
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v11 0/4] tcm_vhost hotplug

2013-04-25 Thread Nicholas A. Bellinger
On Thu, 2013-04-25 at 10:39 +0300, Michael S. Tsirkin wrote:
 On Thu, Apr 25, 2013 at 03:35:19PM +0800, Asias He wrote:
  Changes in v11
  - Drop change log histroy in commit log
  
  Changes in v10
  - Drop comments about lun
  - Add Enable VIRTIO_SCSI_F_HOTPLUG to this series
  
  Changes in v9
  - Drop tcm_vhost_check_feature
  - Add Refactor the lock nesting rule to this sereis
  
  Asias He (4):
tcm_vhost: Refactor the lock nesting rule
tcm_vhost: Add hotplug/hotunplug support
tcm_vhost: Add ioctl to get and set events missed flag
tcm_vhost: Enable VIRTIO_SCSI_F_HOTPLUG
  
   drivers/vhost/tcm_vhost.c | 262 
  +++---
   drivers/vhost/tcm_vhost.h |  13 +++
   2 files changed, 259 insertions(+), 16 deletions(-)
 
 
 Acked-by: Michael S. Tsirkin m...@redhat.com
 

Applied to target-pending/for-next.

Nice work Asias  MST !

--nab


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] kvm/powerpc/e500mc: fix tlb invalidation on cpu migration

2013-04-25 Thread Caraman Mihai Claudiu-B02008
 On 08.03.2013, at 21:25, Scott Wood wrote:
 
  The existing check handles the case where we've migrated to a different
  core than we last ran on, but it doesn't handle the case where we're
  still on the same cpu we last ran on, but some other vcpu has run on
  this cpu in the meantime.
 
  Without this, guest segfaults (and other misbehavior) have been seen in
  smp guests.
 
  Cc: sta...@vger.kernel.org # 3.8.x
  Signed-off-by: Scott Wood scottw...@freescale.com
 
 Thanks, applied to kvm-ppc-3.9.
 
 
 Alex

Can you pull it into kvm-ppc-queue?

Thanks,
Mike

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig description

2013-04-25 Thread Caraman Mihai Claudiu-B02008
 -Original Message-
 From: tiejun.chen [mailto:tiejun.c...@windriver.com]
 Sent: Friday, April 19, 2013 1:03 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig
 description
 
 On 04/11/2013 06:03 PM, Mihai Caraman wrote:
  Add e6500 core to Kconfig description.
 
  Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
  ---
  v3:
- No change
 
arch/powerpc/kvm/Kconfig |6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
 
  diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
  index 63c67ec..4489520 100644
  --- a/arch/powerpc/kvm/Kconfig
  +++ b/arch/powerpc/kvm/Kconfig
  @@ -136,15 +136,15 @@ config KVM_E500V2
If unsure, say N.
 
config KVM_E500MC
  -   bool KVM support for PowerPC E500MC/E5500 processors
  +   bool KVM support for PowerPC E500MC/E5500/E6500 processors
  depends on PPC_E500MC
  select KVM
  select KVM_MMIO
  select KVM_BOOKE_HV
  select MMU_NOTIFIER
  ---help---
  - Support running unmodified E500MC/E5500 (32-bit) guest kernels in
 
 I ever tried p5040ds but failed with 64-bit, but looks are you saying
 this patch
 set can make e5500/e6500 work well with 64-bit? If so, will we need to
 upgrade
 qemu or something else like dtb?

KVM should work on p5040ds with and without this patchset. The latest 
qemu requires this patch: powerpc: Add paravirt idle loop for 64-bit Book-E,
you will not pass guest udev without it.
Please details what fails on p5040ds.

-Mike


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig description

2013-04-25 Thread tiejun.chen

On 04/25/2013 05:09 PM, Caraman Mihai Claudiu-B02008 wrote:

-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Friday, April 19, 2013 1:03 PM
To: Caraman Mihai Claudiu-B02008
Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
Subject: Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig
description

On 04/11/2013 06:03 PM, Mihai Caraman wrote:

Add e6500 core to Kconfig description.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
   - No change

   arch/powerpc/kvm/Kconfig |6 +++---
   1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 63c67ec..4489520 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -136,15 +136,15 @@ config KVM_E500V2
  If unsure, say N.

   config KVM_E500MC
-   bool KVM support for PowerPC E500MC/E5500 processors
+   bool KVM support for PowerPC E500MC/E5500/E6500 processors
depends on PPC_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
select MMU_NOTIFIER
---help---
- Support running unmodified E500MC/E5500 (32-bit) guest kernels in


I ever tried p5040ds but failed with 64-bit, but looks are you saying
this patch
set can make e5500/e6500 work well with 64-bit? If so, will we need to
upgrade
qemu or something else like dtb?


KVM should work on p5040ds with and without this patchset. The latest
qemu requires this patch: powerpc: Add paravirt idle loop for 64-bit Book-E,
you will not pass guest udev without it.


Which should qemu tree be used here?

My tree is cloned from:

git://repo.or.cz/qemu/agraf.git ppc-next

But I can't find this commit.

Tiejun

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Bug 53611] New: nVMX: Add nested EPT

2013-04-25 Thread Gleb Natapov
On Thu, Apr 25, 2013 at 01:00:42AM -0700, Nakajima, Jun wrote:
 On Wed, Apr 24, 2013 at 8:55 AM, Nakajima, Jun jun.nakaj...@intel.com wrote:
  Sorry about the slow progress. We've been distracted by some priority
  things. The patches are ready (i.e. working), but we are cleaning them
  up. I'll send what we have today.
 
 So, I have sent them, and frankly we are still cleaning up.  Please
 bear with us.
 We are also sending one more patchset to deal with EPT
 misconfiguration, but Linux should run in L2 on top of L1 KVM.
 
The patches are mangled and unreadable. Please resend using git
send-email.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 6/6] KVM: MMU: init kvm generation close to mmio wrap-around value

2013-04-25 Thread Xiao Guangrong
On 04/24/2013 08:59 PM, Gleb Natapov wrote:
 On Mon, Apr 01, 2013 at 05:56:49PM +0800, Xiao Guangrong wrote:
 Then it has chance to trigger mmio generation number wrap-around

 Signed-off-by: Xiao Guangrong xiaoguangr...@linux.vnet.ibm.com
 ---
  arch/x86/include/asm/kvm_host.h |1 +
  arch/x86/kvm/mmu.c  |8 
  virt/kvm/kvm_main.c |6 ++
  3 files changed, 15 insertions(+), 0 deletions(-)

 diff --git a/arch/x86/include/asm/kvm_host.h 
 b/arch/x86/include/asm/kvm_host.h
 index 6c1e642..4e1f7cb 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -767,6 +767,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
   struct kvm_memory_slot *slot,
   gfn_t gfn_offset, unsigned long mask);
  void kvm_mmu_zap_all(struct kvm *kvm);
 +void kvm_arch_init_generation(struct kvm *kvm);
  void kvm_mmu_invalid_mmio_sptes(struct kvm *kvm);
  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
 kvm_nr_mmu_pages);
 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
 index d314e21..dcc059c 100644
 --- a/arch/x86/kvm/mmu.c
 +++ b/arch/x86/kvm/mmu.c
 @@ -4279,6 +4279,14 @@ restart:
  spin_unlock(kvm-mmu_lock);
  }
  
 +void kvm_arch_init_generation(struct kvm *kvm)
 +{
 +mutex_lock(kvm-slots_lock);
 +/* It is easier to trigger mmio generation-number wrap-around. */
 +kvm_memslots(kvm)-generation = MMIO_MAX_GEN - 13;
 kvm_memslots(kvm)-generation should never overflow since
 (read|write)_cached mechanism does not handle it. Initialising it to
 anything but 0 makes overflow more likely.
 
 You can hide mmio overflow trick in kvm_current_mmio_generation():
 
 static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
 {
   return (kvm_memslots(kvm)-generation + MMIO_MAX_GEN - 13)  
 MMIO_GEN_MASK;
 }

Very smart idea. Thanks you, Gleb!



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 3/6] KVM: MMU: make return value of mmio page fault handler more readable

2013-04-25 Thread Xiao Guangrong
On 04/24/2013 09:34 PM, Gleb Natapov wrote:

 diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
 index 2adcbc2..6b4ba1e 100644
 --- a/arch/x86/kvm/mmu.h
 +++ b/arch/x86/kvm/mmu.h
 @@ -52,6 +52,20 @@
  
  int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 
 sptes[4]);
  void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
 +
 +/*
 + * Return values of handle_mmio_page_fault_common:
 + * RET_MMIO_PF_EMU: it is a real mmio page fault, emulate the instruction
 + *  directly.
 + * RET_MMIO_PF_RETRY: let CPU fault again on the address.
 + * RET_MMIO_PF_BUG: bug is detected.
 + */
 +enum {
 +RET_MMIO_PF_EMU = 1,
 Make it RET_MMIO_PF_EMULATE please.

Good to me, will do.

Thanks!

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH untested] vhost: allow device specific fields per vq

2013-04-25 Thread Michael S. Tsirkin
Off-list, Asias asked about adding scsi specific fields per vq.
Something like the following would be helpful: untested, just to give
you the idea.

On top of this we can add patches to move things like ubufs
from vhost.h out to net.c

Warning: completely untested.

Signed-off-by: Michael S. Tsirkin m...@redhat.com

---

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ec6fb3f..e8fa9b6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -70,9 +70,13 @@ enum vhost_net_poll_state {
VHOST_NET_POLL_STOPPED = 2,
 };
 
+struct vhost_net_virtqueue {
+   struct vhost_virtqueue vq;
+};
+
 struct vhost_net {
struct vhost_dev dev;
-   struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
+   struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX];
struct vhost_poll poll[VHOST_NET_VQ_MAX];
/* Tells us whether we are polling a socket for TX.
 * We only do this when socket buffer fills up.
@@ -612,17 +616,26 @@ static int vhost_net_open(struct inode *inode, struct 
file *f)
 {
struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
struct vhost_dev *dev;
+   struct vhost_virtqueue **vqs;
int r;
 
if (!n)
return -ENOMEM;
+   vqs = kmalloc(VHOST_NET_VQ_MAX, sizeof *vqs);
+   if (!vqs) {
+   kfree(n);
+   return -ENOMEM;
+   }
 
dev = n-dev;
-   n-vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
-   n-vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
-   r = vhost_dev_init(dev, n-vqs, VHOST_NET_VQ_MAX);
+   vqs[VHOST_NET_VQ_TX] = n-vqs[VHOST_NET_VQ_TX].vq;
+   vqs[VHOST_NET_VQ_RX] = n-vqs[VHOST_NET_VQ_RX].vq;
+   n-vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick;
+   n-vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick;
+   r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
if (r  0) {
kfree(n);
+   kfree(vqs);
return r;
}
 
@@ -727,6 +740,7 @@ static int vhost_net_release(struct inode *inode, struct 
file *f)
/* We do an extra flush before freeing memory,
 * since jobs can re-queue themselves. */
vhost_net_flush(n);
+   kfree(n-dev-vqs);
kfree(n);
return 0;
 }
diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 2968b49..ba54b3c 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -72,6 +72,10 @@ enum {
 #define VHOST_SCSI_MAX_TARGET  256
 #define VHOST_SCSI_MAX_VQ  128
 
+struct vhost_scsi_virtqueue {
+   struct vhost_virtqueue vq;
+};
+
 struct vhost_scsi {
/* Protected by vhost_scsi-dev.mutex */
struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
@@ -79,7 +83,7 @@ struct vhost_scsi {
bool vs_endpoint;
 
struct vhost_dev dev;
-   struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ];
+   struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ];
 
struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */
@@ -902,20 +906,32 @@ err_dev:
 static int vhost_scsi_open(struct inode *inode, struct file *f)
 {
struct vhost_scsi *s;
+   struct vhost_scsi_virtqueue *vqs;
int r, i;
 
s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
 
+   vqs = kmalloc(VHOST_SCSI_MAX_VQ, sizeof *vqs);
+   if (!vqs) {
+   kfree(s);
+   return -ENOMEM;
+   }
+
vhost_work_init(s-vs_completion_work, vhost_scsi_complete_cmd_work);
 
-   s-vqs[VHOST_SCSI_VQ_CTL].handle_kick = vhost_scsi_ctl_handle_kick;
-   s-vqs[VHOST_SCSI_VQ_EVT].handle_kick = vhost_scsi_evt_handle_kick;
-   for (i = VHOST_SCSI_VQ_IO; i  VHOST_SCSI_MAX_VQ; i++)
-   s-vqs[i].handle_kick = vhost_scsi_handle_kick;
-   r = vhost_dev_init(s-dev, s-vqs, VHOST_SCSI_MAX_VQ);
+   vqs[VHOST_SCSI_VQ_CTL] = n-vqs[VHOST_SCSI_VQ_CTL].vq;
+   vqs[VHOST_SCSI_VQ_EVT] = n-vqs[VHOST_SCSI_VQ_EVT].vq;
+   s-vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
+   s-vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
+   for (i = VHOST_SCSI_VQ_IO; i  VHOST_SCSI_MAX_VQ; i++) {
+   vqs[i] = s-vqs[i].vq;
+   s-vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
+   }
+   r = vhost_dev_init(s-dev, vqs, VHOST_SCSI_MAX_VQ);
if (r  0) {
+   kfree(vqs);
kfree(s);
return r;
}
@@ -935,6 +951,7 @@ static int vhost_scsi_release(struct inode *inode, struct 
file *f)
vhost_scsi_clear_endpoint(s, t);
vhost_dev_stop(s-dev);
vhost_dev_cleanup(s-dev, false);
+   kfree(s-dev-vqs);
kfree(s);
return 0;
 }
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 9759249..666ed34 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -266,20 

Re: [PATCH 0/7] KVM: irqfd generalization prepare patch set

2013-04-25 Thread Alexander Graf

On 25.04.2013, at 09:28, Gleb Natapov wrote:

 On Wed, Apr 24, 2013 at 01:20:31PM +0300, Gleb Natapov wrote:
 On Tue, Apr 16, 2013 at 07:26:08PM +0200, Alexander Graf wrote:
 The concept of an irqfd and interrupt routing are nothing particularly tied
 into the IOAPIC implementation. In fact, most of the code already is 
 perfectly
 generic.
 
 This patch set decouples most bits of the existing irqchip and irqfd
 implementation to make it reusable for non-IOAPIC platforms, like the PPC 
 MPIC.
 
 I also have a patch that implements working irqfd support on top of these,
 but that requires the in-kernel MPIC implementation to go upstream first, so
 I'm holding off on it until we settled everything there, so the concept
 certainly does work.
 
 Alex
 
 Nice cleanup, thanks! Should expect a new series with ifdef
 kvm_irqchip and ia64 compilation fixed. The fixes are minor enough for
 me to fix them while applying.
 
 Actually the series does not apply any more and has to be rebased on top of 
 the
 current queue.

Heh, we're already at v3:

  http://www.mail-archive.com/kvm-ppc@vger.kernel.org/msg06214.html


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM VM(windows xp) reseted when running geekbench for about 2 days

2013-04-25 Thread Zhanghaoyu (A)
   On Thu, Apr 18, 2013 at 12:00:49PM +, Zhanghaoyu (A) wrote:
   I start 10 VMs(windows xp), then running geekbench tool on 
   them, about 2 days, one of them was reset, I found the reset 
   operation is done by int kvm_cpu_exec(CPUArchState *env) {
  ...
 switch (run-exit_reason)
 ...
  case KVM_EXIT_SHUTDOWN:
  DPRINTF(shutdown\n);
  qemu_system_reset_request();
  ret = EXCP_INTERRUPT;
  break;
  ...
   }
   
   KVM_EXIT_SHUTDOWN exit reason was set previously in triple fault 
   handle handle_triple_fault().
   
   How do you know that reset was done here? This is not the only 
   place where qemu_system_reset_request() is called.
  I used gdb to debug QEMU process, and add a breakpoint in 
  qemu_system_reset_request(), when the case occurred, backtrace 
  shown as below,
  (gdb) bt
  #0  qemu_system_reset_request () at vl.c:1964
  #1  0x7f9ef9dc5991 in kvm_cpu_exec (env=0x7f9efac47100)
  at /gt/qemu-kvm-1.4/qemu-kvm-1.4/kvm-all.c:1602
  #2  0x7f9ef9d5b229 in qemu_kvm_cpu_thread_fn (arg=0x7f9efac47100)
  at /gt/qemu-kvm-1.4/qemu-kvm-1.4/cpus.c:759
  #3  0x7f9ef898b5f0 in start_thread () from 
  /lib64/libpthread.so.0
  #4  0x7f9ef86fa84d in clone () from /lib64/libc.so.6
  #5  0x in ?? ()
  
  And, I add printk log in all places where KVM_EXIT_SHUTDOWN exit reason 
  is set, only handle_triple_fault() was called.
  
  Make sure XP is not set to auto-reset in case of BSOD. 
  No, winxp is not set to auto-reset in case of BSOD. No Winxp event log 
  reported.
  
  Best regards,
  Yan.
  
   
   What causes the triple fault?
   
   Are you asking what is triple fault or why it happened in your case?
  What I asked is why triple fault happened in my case.
   For the former see here: 
   http://en.wikipedia.org/wiki/Triple_fault
   For the later it is to late to tell after VM reset. You can run 
   QEMU with -no-reboot -no-shutdown. VM will pause instead of 
   rebooting and then you can examine what is going on.
  Great thanks, I'll run QEMU with -no-reboot -no-shutdown options, if VM 
  paused in my case, what should I examined?
  
 Register state info registers in the monitor for each vcpu. Code around 
 the instruction that faulted.
 
 I ran the QEMU with -no-reboot -no-shutdown options, the VM paused 
 When the case happened, then I info registers in QEMU monitor, shown as 
 below, CS =0008   00c09b00 DPL =0 CS32 [-RA]
 SS =0010   00c09300 DPL =0 DS   [-WA]
 DS =0023   00c0f300 DPL =3 DS   [-WA]
 FS =0030 ffdff000 1fff 00c09300 DPL =0 DS   [-WA]
 GS =   00c0
 LDT=   00c0
 TR =0028 80042000 20ab 8b00 DPL=0 TSS32-busy
 GDT= 8003f000 03ff
 IDT= 8003f400 07ff
 CR0=8001003b CR2=760d7fe4 CR3=002ec000 CR4=06f8 
 DR0= DR1= DR2= 
 DR3= DR6=0ff0 DR7=0400 
 EFER=0800 FCW=027f FSW= [ST=0] FTW=00 MXCSR=1f80 
 FPR0=  FPR1=  
 FPR2=  FPR3=  
 FPR4=  FPR5=  
 FPR6=  FPR7=  
 XMM00= 
 XMM01=
 XMM02= 
 XMM03=
 XMM04= 
 XMM05=
 XMM06= 
 XMM07=
 
 In normal case, info registers in QEMU monitor, shown as below CS 
 =001b   00c0fb00 DPL=3 CS32 [-RA]
 SS =0023   00c0f300 DPL=3 DS   [-WA]
 DS =0023   00c0f300 DPL=3 DS   [-WA]
 FS =0038 7ffda000 0fff 0040f300 DPL=3 DS   [-WA]
 GS =   0100
 LDT=   
 TR =0028 80042000 20ab 8b00 DPL=0 TSS32-busy
 GDT= 8003f000 03ff
 IDT= 8003f400 07ff
 CR0=80010031 CR2=0167fd20 CR3=0af00220 CR4=06f8 
 DR0= DR1= DR2= 
 DR3= DR6=0ff0 DR7=0400 
 EFER=0800 FCW=027f FSW= [ST=0] FTW=00 MXCSR=1f80
 FPR0=00a400a40a18 d830 FPR1=0012f9c07c90e900 e900 
 FPR2=7c910202 5d40 FPR3=01e27c903400 f808 
 FPR4=05230012f87a  FPR5=7c905d40 0001 
 FPR6=0001  FPR7=a9dfde00 4018 
 XMM00=7c917d9a0012f8d47c90 
 XMM01=0012f8740012f8740012f87a7c90
 XMM02=7c917de97c97b1787c917e3f0012f87a 
 XMM03=0012fa687c80901a0012f9186cfd
 XMM04=7c9102027c9034007c9102087c90e900 
 XMM05=000c7c900012f9907c91017b
 XMM06=9a400012f8780012f878 
 XMM07=6365446c74527c91340500241f18
 
 N.B. in two cases, CS DPL, SS DPL, FS DPL, FPR, XMM, FSW, ST, FTW 

Re: [PATCH 0/7] KVM: irqfd generalization prepare patch set

2013-04-25 Thread Alexander Graf

On 21.04.2013, at 12:51, Michael S. Tsirkin wrote:

 On Tue, Apr 16, 2013 at 07:26:08PM +0200, Alexander Graf wrote:
 The concept of an irqfd and interrupt routing are nothing particularly tied
 into the IOAPIC implementation. In fact, most of the code already is 
 perfectly
 generic.
 
 This patch set decouples most bits of the existing irqchip and irqfd
 implementation to make it reusable for non-IOAPIC platforms, like the PPC 
 MPIC.
 
 I also have a patch that implements working irqfd support on top of these,
 but that requires the in-kernel MPIC implementation to go upstream first, so
 I'm holding off on it until we settled everything there, so the concept
 certainly does work.
 
 Alex
 
 Nothing to object to here really, this is just
 moving code around.
 And patches 3 and 4 are definitely cleanups.
 Assuming this helps PPC gain in-kernel irqchip support:
 
 Acked-by: Michael S. Tsirkin m...@redhat.com

Could you please check the newer version of this patch set again and give your 
ack if it still holds?

  http://www.mail-archive.com/kvm-ppc@vger.kernel.org/msg06214.html


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/6] kvm: add device control API

2013-04-25 Thread Gleb Natapov
On Fri, Apr 12, 2013 at 07:08:42PM -0500, Scott Wood wrote:
 Currently, devices that are emulated inside KVM are configured in a
 hardcoded manner based on an assumption that any given architecture
 only has one way to do it.  If there's any need to access device state,
 it is done through inflexible one-purpose-only IOCTLs (e.g.
 KVM_GET/SET_LAPIC).  Defining new IOCTLs for every little thing is
 cumbersome and depletes a limited numberspace.
 
 This API provides a mechanism to instantiate a device of a certain
 type, returning an ID that can be used to set/get attributes of the
 device.  Attributes may include configuration parameters (e.g.
 register base address), device state, operational commands, etc.  It
 is similar to the ONE_REG API, except that it acts on devices rather
 than vcpus.
 
 Both device types and individual attributes can be tested without having
 to create the device or get/set the attribute, without the need for
 separately managing enumerated capabilities.
 
 Signed-off-by: Scott Wood scottw...@freescale.com
 ---
 v4:
  - Move some boilerplate back into generic code, as requested by Gleb.
File descriptor management and reference counting is no longer the
concern of the device implementation.
 
  - Don't hold kvm-lock during create.  The original reasons
for doing so have vanished as for as MPIC is concerned, and
this avoids needing to answer the question of whether to
hold the lock during destroy as well.
 
Paul, you may need to acquire the lock yourself in kvm_create_xics()
to protect the -EEXIST check.
 
 v3: remove some changes that were merged into this patch by accident,
 and fix the error documentation for KVM_CREATE_DEVICE.
 ---
  Documentation/virtual/kvm/api.txt|   70 
  Documentation/virtual/kvm/devices/README |1 +
  include/linux/kvm_host.h |   35 
  include/uapi/linux/kvm.h |   27 +++
  virt/kvm/kvm_main.c  |  129 
 ++
  5 files changed, 262 insertions(+)
  create mode 100644 Documentation/virtual/kvm/devices/README
 
 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index 976eb65..d52f3f9 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -2173,6 +2173,76 @@ header; first `n_valid' valid entries with contents 
 from the data
  written, then `n_invalid' invalid entries, invalidating any previously
  valid entries found.
  
 +4.79 KVM_CREATE_DEVICE
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: vm ioctl
 +Parameters: struct kvm_create_device (in/out)
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENODEV: The device type is unknown or unsupported
 +  EEXIST: Device already created, and this type of device may not
 +  be instantiated multiple times
 +
 +  Other error conditions may be defined by individual device types or
 +  have their standard meanings.
 +
 +Creates an emulated device in the kernel.  The file descriptor returned
 +in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
 +
 +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
 +device type is supported (not necessarily whether it can be created
 +in the current vm).
 +
 +Individual devices should not define flags.  Attributes should be used
 +for specifying any behavior that is not implied by the device type
 +number.
 +
 +struct kvm_create_device {
 + __u32   type;   /* in: KVM_DEV_TYPE_xxx */
 + __u32   fd; /* out: device handle */
 + __u32   flags;  /* in: KVM_CREATE_DEVICE_xxx */
 +};
Should we add __u32 padding here to make struct size multiple of u64?

 +
 +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: device ioctl
 +Parameters: struct kvm_device_attr
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENXIO:  The group or attribute is unknown/unsupported for this device
 +  EPERM:  The attribute cannot (currently) be accessed this way
 +  (e.g. read-only attribute, or attribute that only makes
 +  sense when the device is in a different state)
 +
 +  Other error conditions may be defined by individual device types.
 +
 +Gets/sets a specified piece of device configuration and/or state.  The
 +semantics are device-specific.  See individual device documentation in
 +the devices directory.  As with ONE_REG, the size of the data
 +transferred is defined by the particular attribute.
 +
 +struct kvm_device_attr {
 + __u32   flags;  /* no flags currently defined */
 + __u32   group;  /* device-defined */
 + __u64   attr;   /* group-defined */
 + __u64   addr;   /* userspace address of attr data */
 +};
 +
 +4.81 KVM_HAS_DEVICE_ATTR
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: device ioctl
 +Parameters: struct kvm_device_attr
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENXIO:  The group or attribute is unknown/unsupported for this device
 

Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig description

2013-04-25 Thread tiejun.chen

On 04/25/2013 05:32 PM, Caraman Mihai Claudiu-B02008 wrote:

-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Thursday, April 25, 2013 12:17 PM
To: Caraman Mihai Claudiu-B02008
Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
Subject: Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig
description

On 04/25/2013 05:09 PM, Caraman Mihai Claudiu-B02008 wrote:

-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Friday, April 19, 2013 1:03 PM
To: Caraman Mihai Claudiu-B02008
Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org
Subject: Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig
description

On 04/11/2013 06:03 PM, Mihai Caraman wrote:

Add e6500 core to Kconfig description.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
- No change

arch/powerpc/kvm/Kconfig |6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 63c67ec..4489520 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -136,15 +136,15 @@ config KVM_E500V2
  If unsure, say N.

config KVM_E500MC
-   bool KVM support for PowerPC E500MC/E5500 processors
+   bool KVM support for PowerPC E500MC/E5500/E6500 processors
depends on PPC_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
select MMU_NOTIFIER
---help---
- Support running unmodified E500MC/E5500 (32-bit) guest kernels in


I ever tried p5040ds but failed with 64-bit, but looks are you saying
this patch
set can make e5500/e6500 work well with 64-bit? If so, will we need to
upgrade
qemu or something else like dtb?


KVM should work on p5040ds with and without this patchset. The latest
qemu requires this patch: powerpc: Add paravirt idle loop for 64-bit

Book-E,

you will not pass guest udev without it.


This is a kernel patch required by latest qemu.


Looks this commit is applied only into galak/powerpc.git, next, but still not 
merged into agraf/linux-2.6.git, so I'm confused which tree can support 64bit 
Book3E KVM as you point.


Tiejun
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC

2013-04-25 Thread Alexander Graf

On 19.04.2013, at 20:02, Scott Wood wrote:

 On 04/19/2013 09:06:26 AM, Alexander Graf wrote:
 diff --git a/Documentation/virtual/kvm/devices/mpic.txt 
 b/Documentation/virtual/kvm/devices/mpic.txt
 index ce98e32..dadc1e0 100644
 --- a/Documentation/virtual/kvm/devices/mpic.txt
 +++ b/Documentation/virtual/kvm/devices/mpic.txt
 @@ -35,3 +35,14 @@ Groups:
 attr is the IRQ number.  IRQ numbers for standard sources are the
 byte offset of the relevant IVPR from EIVPR0, divided by 32.
 +
 +IRQ Routing:
 +
 +  The MPIC emulation supports IRQ routing. Only a single MPIC device can
 +  be instantiated. Once that device has been created, it's available as
 +  irqchip id 0.
 +
 
 +  This irqchip 0 has 256 interrupt pins. These pins reflect the SRC pins
 +  on the MPIC controller.
 
 This irqchip 0 has 256 interrupt pins, which expose the interrupts in the 
 main array of interrupt sources (a.k.a. SRC interrupts).  The numbering is 
 the same as the MPIC device tree binding -- based on the register offset from 
 the beginning of the sources array, without regard to any subdivisions in 
 chip documentation such as internal or external interrupts.  Default 
 routes are established for these pins, with the GSI being equal to the pin 
 number.
 
 +  Access to on-SRC registers is not implemented through IRQ routing 
 mechanisms.
 
 s/on-SRC registers/non-SRC interrupts/
 
 diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
 index 10bc08a..d137df8 100644
 --- a/arch/powerpc/kvm/mpic.c
 +++ b/arch/powerpc/kvm/mpic.c
 @@ -1029,6 +1029,7 @@ static int openpic_cpu_write_internal(void *opaque, 
 gpa_t addr,
  struct irq_source *src;
  struct irq_dest *dst;
  int s_IRQ, n_IRQ;
 +int notify_eoi = -1;
  pr_debug(%s: cpu %d addr %#llx = 0x%08x\n, __func__, idx,
  addr, val);
 @@ -1087,6 +1088,8 @@ static int openpic_cpu_write_internal(void *opaque, 
 gpa_t addr,
  }
  IRQ_resetbit(dst-servicing, s_IRQ);
 +/* Notify listeners that the IRQ is over */
 +notify_eoi = s_IRQ;
  /* Set up next servicing IRQ */
  s_IRQ = IRQ_get_next(opp, dst-servicing);
  /* Check queued interrupts. */
 @@ -1104,6 +1107,12 @@ static int openpic_cpu_write_internal(void *opaque, 
 gpa_t addr,
  break;
  }
 +if (notify_eoi != -1) {
 +spin_unlock_irq(opp-lock);
 +kvm_notify_acked_irq(opp-kvm, 0, notify_eoi);
 +spin_lock_irq(opp-lock);
 +}
 
 I'd rather not have the _irq here, which could break if we enter this patch 
 via an _irqsave (I realize there currently is no such path that reaches EOI 
 emulation).
 
 Will we ever set notify_eoi when addr != EOI?  I'm wondering why it was moved 
 out of the switch statement, instead of being put at the end of the case EOI: 
 code.

I doubt it, but that's for the compiler to optimize away. I found it cleaner 
for some reason to put it down there. I don't think it really matters.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC

2013-04-25 Thread Alexander Graf

On 23.04.2013, at 08:38, Paul Mackerras wrote:

 On Fri, Apr 19, 2013 at 04:06:26PM +0200, Alexander Graf wrote:
 Now that all the irq routing and irqfd pieces are generic, we can expose
 real irqchip support to all of KVM's internal helpers.
 
 This allows us to use irqfd with the in-kernel MPIC.
 
 [snip]
 diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
 index 10bc08a..d137df8 100644
 --- a/arch/powerpc/kvm/mpic.c
 +++ b/arch/powerpc/kvm/mpic.c
 [snip]
 +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 +struct kvm *kvm, int irq_source_id, int level, bool line_status)
 [snip]
 +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 +  struct kvm_kernel_irq_routing_entry *e,
 +  const struct kvm_irq_routing_entry *ue)
 
 How do you see this working once we have more than one interrupt
 controller emulation in the kernel?  Presumably these two will have to
 move out to a common file, rather than being in mpic.c, but then the
 question is how do we know which interrupt controller to send the GSI
 to?  Were you thinking we would have a restriction that you can only
 instantiate one interrupt controller of any type?  Or were you
 thinking we would have an enum for kvm_irq_routing_irqchip::irqchip?
 In that case how would we handle MSIs?

In a first version of having 2 interrupt controllers, I'd make them mutually 
exclusive in Kconfig. That way each interrupt controller implements these 
functions itself.

Later we can sit down and generalize this support. Then we would need to have a 
mapping table which irqchip type each irqchip number is and call the respective 
functions.

But the use for that is so incredibly slim and the user space API would still 
be the same, that I don't think we need to worry about it today.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/17] KVM: Add KVM_IRQCHIP_NUM_PINS in addition to KVM_IOAPIC_NUM_PINS

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:12PM +0200, Alexander Graf wrote:
 The concept of routing interrupt lines to an irqchip is nothing
 that is IOAPIC specific. Every irqchip has a maximum number of pins
 that can be linked to irq lines.
 
 So let's add a new define that allows us to reuse generic code for
 non-IOAPIC platforms.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  arch/x86/include/asm/kvm_host.h |2 ++
  include/linux/kvm_host.h|2 +-
  virt/kvm/irq_comm.c |2 +-
  3 files changed, 4 insertions(+), 2 deletions(-)
 
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index 599f98b..f44c3fe 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -43,6 +43,8 @@
  #define KVM_PIO_PAGE_OFFSET 1
  #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
  
 +#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 +
  #define CR0_RESERVED_BITS   \
   (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index 93a5005..bf3b1dc 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -307,7 +307,7 @@ struct kvm_kernel_irq_routing_entry {
  #ifdef __KVM_HAVE_IOAPIC
  
  struct kvm_irq_routing_table {
 - int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
 + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
   struct kvm_kernel_irq_routing_entry *rt_entries;
   u32 nr_rt_entries;
   /*
 diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
 index 25ab480..7c0071d 100644
 --- a/virt/kvm/irq_comm.c
 +++ b/virt/kvm/irq_comm.c
 @@ -480,7 +480,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
  
   new-nr_rt_entries = nr_rt_entries;
   for (i = 0; i  3; i++)
 - for (j = 0; j  KVM_IOAPIC_NUM_PINS; j++)
 + for (j = 0; j  KVM_IRQCHIP_NUM_PINS; j++)
   new-chip[i][j] = -1;
  
   for (i = 0; i  nr; ++i) {
 -- 
 1.6.0.2
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 02/17] KVM: Introduce CONFIG_HAVE_KVM_IRQ_ROUTING

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:13PM +0200, Alexander Graf wrote:
 Quite a bit of code in KVM has been conditionalized on availability of
 IOAPIC emulation. However, most of it is generically applicable to
 platforms that don't have an IOPIC, but a different type of irq chip.
 
 Make code that only relies on IRQ routing, not an APIC itself, on
 CONFIG_HAVE_KVM_IRQ_ROUTING, so that we can reuse it later.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  arch/x86/kvm/Kconfig |1 +
  include/linux/kvm_host.h |6 +++---
  virt/kvm/Kconfig |3 +++
  virt/kvm/eventfd.c   |6 +++---
  virt/kvm/kvm_main.c  |2 +-
  5 files changed, 11 insertions(+), 7 deletions(-)
 
 diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
 index 586f000..9d50efd 100644
 --- a/arch/x86/kvm/Kconfig
 +++ b/arch/x86/kvm/Kconfig
 @@ -29,6 +29,7 @@ config KVM
   select MMU_NOTIFIER
   select ANON_INODES
   select HAVE_KVM_IRQCHIP
 + select HAVE_KVM_IRQ_ROUTING
   select HAVE_KVM_EVENTFD
   select KVM_APIC_ARCHITECTURE
   select KVM_ASYNC_PF
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index bf3b1dc..4215d4f 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -304,7 +304,7 @@ struct kvm_kernel_irq_routing_entry {
   struct hlist_node link;
  };
  
 -#ifdef __KVM_HAVE_IOAPIC
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  
  struct kvm_irq_routing_table {
   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
 @@ -432,7 +432,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
  int __must_check vcpu_load(struct kvm_vcpu *vcpu);
  void vcpu_put(struct kvm_vcpu *vcpu);
  
 -#ifdef __KVM_HAVE_IOAPIC
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  int kvm_irqfd_init(void);
  void kvm_irqfd_exit(void);
  #else
 @@ -957,7 +957,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, 
 unsigned long mmu_seq)
  }
  #endif
  
 -#ifdef KVM_CAP_IRQ_ROUTING
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  
  #define KVM_MAX_IRQ_ROUTES 1024
  
 diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
 index d01b24b..779262f 100644
 --- a/virt/kvm/Kconfig
 +++ b/virt/kvm/Kconfig
 @@ -6,6 +6,9 @@ config HAVE_KVM
  config HAVE_KVM_IRQCHIP
 bool
  
 +config HAVE_KVM_IRQ_ROUTING
 +   bool
 +
  config HAVE_KVM_EVENTFD
 bool
 select EVENTFD
 diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
 index c5d43ff..64ee720 100644
 --- a/virt/kvm/eventfd.c
 +++ b/virt/kvm/eventfd.c
 @@ -35,7 +35,7 @@
  
  #include iodev.h
  
 -#ifdef __KVM_HAVE_IOAPIC
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  /*
   * 
   * irqfd: Allows an fd to be used to inject an interrupt to the guest
 @@ -433,7 +433,7 @@ fail:
  void
  kvm_eventfd_init(struct kvm *kvm)
  {
 -#ifdef __KVM_HAVE_IOAPIC
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
   spin_lock_init(kvm-irqfds.lock);
   INIT_LIST_HEAD(kvm-irqfds.items);
   INIT_LIST_HEAD(kvm-irqfds.resampler_list);
 @@ -442,7 +442,7 @@ kvm_eventfd_init(struct kvm *kvm)
   INIT_LIST_HEAD(kvm-ioeventfds);
  }
  
 -#ifdef __KVM_HAVE_IOAPIC
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  /*
   * shutdown any irqfd's that match fd+gsi
   */
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index aaac1a7..2c3b226 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -2404,7 +2404,7 @@ static long kvm_dev_ioctl_check_extension_generic(long 
 arg)
   case KVM_CAP_SIGNAL_MSI:
  #endif
   return 1;
 -#ifdef KVM_CAP_IRQ_ROUTING
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
   case KVM_CAP_IRQ_ROUTING:
   return KVM_MAX_IRQ_ROUTES;
  #endif
 -- 
 1.6.0.2
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 03/17] KVM: Drop __KVM_HAVE_IOAPIC condition on irq routing

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:14PM +0200, Alexander Graf wrote:
 We have a capability enquire system that allows user space to ask kvm
 whether a feature is available.
 
 The point behind this system is that we can have different kernel
 configurations with different capabilities and user space can adjust
 accordingly.
 
 Because features can always be non existent, we can drop any #ifdefs
 on CAP defines that could be used generically, like the irq routing
 bits. These can be easily reused for non-IOAPIC systems as well.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  include/uapi/linux/kvm.h |2 --
  1 files changed, 0 insertions(+), 2 deletions(-)
 
 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
 index 74d0ff3..c741902 100644
 --- a/include/uapi/linux/kvm.h
 +++ b/include/uapi/linux/kvm.h
 @@ -579,9 +579,7 @@ struct kvm_ppc_smmu_info {
  #ifdef __KVM_HAVE_PIT
  #define KVM_CAP_REINJECT_CONTROL 24
  #endif
 -#ifdef __KVM_HAVE_IOAPIC
  #define KVM_CAP_IRQ_ROUTING 25
 -#endif
  #define KVM_CAP_IRQ_INJECT_STATUS 26
  #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
  #define KVM_CAP_DEVICE_DEASSIGNMENT 27
 -- 
 1.6.0.2
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 05/17] KVM: Move irq routing to generic code

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:16PM +0200, Alexander Graf wrote:
 The IRQ routing set ioctl lives in the hacky device assignment code inside
 of KVM today. This is definitely the wrong place for it. Move it to the much
 more natural kvm_main.c.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  virt/kvm/assigned-dev.c |   30 --
  virt/kvm/kvm_main.c |   30 ++
  2 files changed, 30 insertions(+), 30 deletions(-)
 
 diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
 index f4c7f59..8db4370 100644
 --- a/virt/kvm/assigned-dev.c
 +++ b/virt/kvm/assigned-dev.c
 @@ -983,36 +983,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, 
 unsigned ioctl,
   goto out;
   break;
   }
 -#ifdef KVM_CAP_IRQ_ROUTING
 - case KVM_SET_GSI_ROUTING: {
 - struct kvm_irq_routing routing;
 - struct kvm_irq_routing __user *urouting;
 - struct kvm_irq_routing_entry *entries;
 -
 - r = -EFAULT;
 - if (copy_from_user(routing, argp, sizeof(routing)))
 - goto out;
 - r = -EINVAL;
 - if (routing.nr = KVM_MAX_IRQ_ROUTES)
 - goto out;
 - if (routing.flags)
 - goto out;
 - r = -ENOMEM;
 - entries = vmalloc(routing.nr * sizeof(*entries));
 - if (!entries)
 - goto out;
 - r = -EFAULT;
 - urouting = argp;
 - if (copy_from_user(entries, urouting-entries,
 -routing.nr * sizeof(*entries)))
 - goto out_free_irq_routing;
 - r = kvm_set_irq_routing(kvm, entries, routing.nr,
 - routing.flags);
 - out_free_irq_routing:
 - vfree(entries);
 - break;
 - }
 -#endif /* KVM_CAP_IRQ_ROUTING */
  #ifdef __KVM_HAVE_MSIX
   case KVM_ASSIGN_SET_MSIX_NR: {
   struct kvm_assigned_msix_nr entry_nr;
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index 2c3b226..b6f3354 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -2274,6 +2274,36 @@ static long kvm_vm_ioctl(struct file *filp,
   break;
   }
  #endif
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 + case KVM_SET_GSI_ROUTING: {
 + struct kvm_irq_routing routing;
 + struct kvm_irq_routing __user *urouting;
 + struct kvm_irq_routing_entry *entries;
 +
 + r = -EFAULT;
 + if (copy_from_user(routing, argp, sizeof(routing)))
 + goto out;
 + r = -EINVAL;
 + if (routing.nr = KVM_MAX_IRQ_ROUTES)
 + goto out;
 + if (routing.flags)
 + goto out;
 + r = -ENOMEM;
 + entries = vmalloc(routing.nr * sizeof(*entries));
 + if (!entries)
 + goto out;
 + r = -EFAULT;
 + urouting = argp;
 + if (copy_from_user(entries, urouting-entries,
 +routing.nr * sizeof(*entries)))
 + goto out_free_irq_routing;
 + r = kvm_set_irq_routing(kvm, entries, routing.nr,
 + routing.flags);
 + out_free_irq_routing:
 + vfree(entries);
 + break;
 + }
 +#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
   default:
   r = kvm_arch_vm_ioctl(filp, ioctl, arg);
   if (r == -ENOTTY)
 -- 
 1.6.0.2
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 04/17] KVM: Remove kvm_get_intr_delivery_bitmask

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:15PM +0200, Alexander Graf wrote:
 The prototype has been stale for a while, I can't spot any real function
 define behind it. Let's just remove it.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  include/linux/kvm_host.h |5 -
  1 files changed, 0 insertions(+), 5 deletions(-)
 
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index 4215d4f..a7bfe9d 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -719,11 +719,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, 
 int irq,
  void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
bool mask);
  
 -#ifdef __KVM_HAVE_IOAPIC
 -void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 -union kvm_ioapic_redirect_entry *entry,
 -unsigned long *deliver_bitmask);
 -#endif
  int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
   bool line_status);
  int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int 
 level);
 -- 
 1.6.0.2
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 06/17] KVM: Extract generic irqchip logic into irqchip.c

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:17PM +0200, Alexander Graf wrote:
 The current irq_comm.c file contains pieces of code that are generic
 across different irqchip implementations, as well as code that is
 fully IOAPIC specific.
 
 Split the generic bits out into irqchip.c.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  arch/x86/kvm/Makefile  |2 +-
  include/trace/events/kvm.h |   12 +++-
  virt/kvm/irq_comm.c|  118 --
  virt/kvm/irqchip.c |  152 
 
  4 files changed, 163 insertions(+), 121 deletions(-)
  create mode 100644 virt/kvm/irqchip.c
 
 diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
 index 04d3040..a797b8e 100644
 --- a/arch/x86/kvm/Makefile
 +++ b/arch/x86/kvm/Makefile
 @@ -7,7 +7,7 @@ CFLAGS_vmx.o := -I.
  
  kvm-y+= $(addprefix ../../../virt/kvm/, kvm_main.o 
 ioapic.o \
   coalesced_mmio.o irq_comm.o eventfd.o \
 - assigned-dev.o)
 + assigned-dev.o irqchip.o)
  kvm-$(CONFIG_IOMMU_API)  += $(addprefix ../../../virt/kvm/, iommu.o)
  kvm-$(CONFIG_KVM_ASYNC_PF)   += $(addprefix ../../../virt/kvm/, async_pf.o)
  
 diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
 index 19911dd..7005d11 100644
 --- a/include/trace/events/kvm.h
 +++ b/include/trace/events/kvm.h
 @@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit,
 __entry-errno  0 ? -__entry-errno : __entry-reason)
  );
  
 -#if defined(__KVM_HAVE_IRQ_LINE)
 +#if defined(CONFIG_HAVE_KVM_IRQCHIP)
  TRACE_EVENT(kvm_set_irq,
   TP_PROTO(unsigned int gsi, int level, int irq_source_id),
   TP_ARGS(gsi, level, irq_source_id),
 @@ -122,6 +122,10 @@ TRACE_EVENT(kvm_msi_set_irq,
   {KVM_IRQCHIP_PIC_SLAVE, PIC slave},   \
   {KVM_IRQCHIP_IOAPIC,IOAPIC}
  
 +#endif /* defined(__KVM_HAVE_IOAPIC) */
 +
 +#if defined(CONFIG_HAVE_KVM_IRQCHIP)
 +
  TRACE_EVENT(kvm_ack_irq,
   TP_PROTO(unsigned int irqchip, unsigned int pin),
   TP_ARGS(irqchip, pin),
 @@ -136,14 +140,18 @@ TRACE_EVENT(kvm_ack_irq,
   __entry-pin= pin;
   ),
  
 +#ifdef kvm_irqchips
   TP_printk(irqchip %s pin %u,
 __print_symbolic(__entry-irqchip, kvm_irqchips),
__entry-pin)
 +#else
 + TP_printk(irqchip %d pin %u, __entry-irqchip, __entry-pin)
 +#endif
  );
  
 +#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
  
  
 -#endif /* defined(__KVM_HAVE_IOAPIC) */
  
  #define KVM_TRACE_MMIO_READ_UNSATISFIED 0
  #define KVM_TRACE_MMIO_READ 1
 diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
 index 7c0071d..d5008f4 100644
 --- a/virt/kvm/irq_comm.c
 +++ b/virt/kvm/irq_comm.c
 @@ -151,59 +151,6 @@ static int kvm_set_msi_inatomic(struct 
 kvm_kernel_irq_routing_entry *e,
   return -EWOULDBLOCK;
  }
  
 -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 -{
 - struct kvm_kernel_irq_routing_entry route;
 -
 - if (!irqchip_in_kernel(kvm) || msi-flags != 0)
 - return -EINVAL;
 -
 - route.msi.address_lo = msi-address_lo;
 - route.msi.address_hi = msi-address_hi;
 - route.msi.data = msi-data;
 -
 - return kvm_set_msi(route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
 -}
 -
 -/*
 - * Return value:
 - *   0   Interrupt was ignored (masked or not delivered for other reasons)
 - *  = 0   Interrupt was coalesced (previous irq is still pending)
 - *   0   Number of CPUs interrupt was delivered to
 - */
 -int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 - bool line_status)
 -{
 - struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
 - int ret = -1, i = 0;
 - struct kvm_irq_routing_table *irq_rt;
 -
 - trace_kvm_set_irq(irq, level, irq_source_id);
 -
 - /* Not possible to detect if the guest uses the PIC or the
 -  * IOAPIC.  So set the bit in both. The guest will ignore
 -  * writes to the unused one.
 -  */
 - rcu_read_lock();
 - irq_rt = rcu_dereference(kvm-irq_routing);
 - if (irq  irq_rt-nr_rt_entries)
 - hlist_for_each_entry(e, irq_rt-map[irq], link)
 - irq_set[i++] = *e;
 - rcu_read_unlock();
 -
 - while(i--) {
 - int r;
 - r = irq_set[i].set(irq_set[i], kvm, irq_source_id, level,
 - line_status);
 - if (r  0)
 - continue;
 -
 - ret = r + ((ret  0) ? 0 : ret);
 - }
 -
 - return ret;
 -}
 -
  /*
   * Deliver an IRQ in an atomic context if we can, or return a failure,
   * user can retry in a process context.
 @@ -241,63 +188,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int 
 irq_source_id, u32 irq, int level)
   return ret;
  }
  
 -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned 

Re: [PATCH 07/17] KVM: Move irq routing setup to irqchip.c

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:18PM +0200, Alexander Graf wrote:
 Setting up IRQ routes is nothing IOAPIC specific. Extract everything
 that really is generic code into irqchip.c and only leave the ioapic
 specific bits to irq_comm.c.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  include/linux/kvm_host.h |3 ++
  virt/kvm/irq_comm.c  |   76 ++---
  virt/kvm/irqchip.c   |   85 
 ++
  3 files changed, 91 insertions(+), 73 deletions(-)
 
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index a7bfe9d..dcef724 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -961,6 +961,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
   const struct kvm_irq_routing_entry *entries,
   unsigned nr,
   unsigned flags);
 +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 +   struct kvm_kernel_irq_routing_entry *e,
 +   const struct kvm_irq_routing_entry *ue);
  void kvm_free_irq_routing(struct kvm *kvm);
  
  int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
 diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
 index d5008f4..e2e6b44 100644
 --- a/virt/kvm/irq_comm.c
 +++ b/virt/kvm/irq_comm.c
 @@ -271,27 +271,14 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned 
 irqchip, unsigned pin,
   rcu_read_unlock();
  }
  
 -static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 -struct kvm_kernel_irq_routing_entry *e,
 -const struct kvm_irq_routing_entry *ue)
 +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 +   struct kvm_kernel_irq_routing_entry *e,
 +   const struct kvm_irq_routing_entry *ue)
  {
   int r = -EINVAL;
   int delta;
   unsigned max_pin;
 - struct kvm_kernel_irq_routing_entry *ei;
  
 - /*
 -  * Do not allow GSI to be mapped to the same irqchip more than once.
 -  * Allow only one to one mapping between GSI and MSI.
 -  */
 - hlist_for_each_entry(ei, rt-map[ue-gsi], link)
 - if (ei-type == KVM_IRQ_ROUTING_MSI ||
 - ue-type == KVM_IRQ_ROUTING_MSI ||
 - ue-u.irqchip.irqchip == ei-irqchip.irqchip)
 - return r;
 -
 - e-gsi = ue-gsi;
 - e-type = ue-type;
   switch (ue-type) {
   case KVM_IRQ_ROUTING_IRQCHIP:
   delta = 0;
 @@ -328,68 +315,11 @@ static int setup_routing_entry(struct 
 kvm_irq_routing_table *rt,
   goto out;
   }
  
 - hlist_add_head(e-link, rt-map[e-gsi]);
   r = 0;
  out:
   return r;
  }
  
 -int kvm_set_irq_routing(struct kvm *kvm,
 - const struct kvm_irq_routing_entry *ue,
 - unsigned nr,
 - unsigned flags)
 -{
 - struct kvm_irq_routing_table *new, *old;
 - u32 i, j, nr_rt_entries = 0;
 - int r;
 -
 - for (i = 0; i  nr; ++i) {
 - if (ue[i].gsi = KVM_MAX_IRQ_ROUTES)
 - return -EINVAL;
 - nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
 - }
 -
 - nr_rt_entries += 1;
 -
 - new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
 -   + (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
 -   GFP_KERNEL);
 -
 - if (!new)
 - return -ENOMEM;
 -
 - new-rt_entries = (void *)new-map[nr_rt_entries];
 -
 - new-nr_rt_entries = nr_rt_entries;
 - for (i = 0; i  3; i++)
 - for (j = 0; j  KVM_IRQCHIP_NUM_PINS; j++)
 - new-chip[i][j] = -1;
 -
 - for (i = 0; i  nr; ++i) {
 - r = -EINVAL;
 - if (ue-flags)
 - goto out;
 - r = setup_routing_entry(new, new-rt_entries[i], ue);
 - if (r)
 - goto out;
 - ++ue;
 - }
 -
 - mutex_lock(kvm-irq_lock);
 - old = kvm-irq_routing;
 - kvm_irq_routing_update(kvm, new);
 - mutex_unlock(kvm-irq_lock);
 -
 - synchronize_rcu();
 -
 - new = old;
 - r = 0;
 -
 -out:
 - kfree(new);
 - return r;
 -}
 -
  #define IOAPIC_ROUTING_ENTRY(irq) \
   { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
 .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
 diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
 index 12f7f26..20dc9e4 100644
 --- a/virt/kvm/irqchip.c
 +++ b/virt/kvm/irqchip.c
 @@ -150,3 +150,88 @@ void kvm_free_irq_routing(struct kvm *kvm)
  at this stage */
   kfree(kvm-irq_routing);
  }
 +
 +static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 +struct kvm_kernel_irq_routing_entry *e,
 +const struct kvm_irq_routing_entry 

Re: [PATCH 08/17] KVM: Move irqfd resample cap handling to generic code

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:19PM +0200, Alexander Graf wrote:
 Now that we have most irqfd code completely platform agnostic, let's move
 irqfd's resample capability return to generic code as well.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  arch/x86/kvm/x86.c  |1 -
  virt/kvm/kvm_main.c |3 +++
  2 files changed, 3 insertions(+), 1 deletions(-)
 
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 50e2e10..888d892 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -2513,7 +2513,6 @@ int kvm_dev_ioctl_check_extension(long ext)
   case KVM_CAP_PCI_2_3:
   case KVM_CAP_KVMCLOCK_CTRL:
   case KVM_CAP_READONLY_MEM:
 - case KVM_CAP_IRQFD_RESAMPLE:
   r = 1;
   break;
   case KVM_CAP_COALESCED_MMIO:
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index b6f3354..f9492f3 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -2433,6 +2433,9 @@ static long kvm_dev_ioctl_check_extension_generic(long 
 arg)
  #ifdef CONFIG_HAVE_KVM_MSI
   case KVM_CAP_SIGNAL_MSI:
  #endif
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 + case KVM_CAP_IRQFD_RESAMPLE:
 +#endif
   return 1;
  #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
   case KVM_CAP_IRQ_ROUTING:
 -- 
 1.6.0.2
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/17] KVM: PPC: In-kernel MPIC support with irqfd v3

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:11PM +0200, Alexander Graf wrote:
 Hi,
 
 This patch set contains a fully working implementation of the in-kernel MPIC
 from Scott with a few fixups and a new version of my irqfd generalization
 patch set.

For patches 1-8:
Acked-by: Michael S. Tsirkin m...@redhat.com

I don't have an opinion about the rest.

 v1 - v2:
 
   - depend on CONFIG_ defines rather than __KVM defines
   - fix compile issues
   - fix the kvm_irqchip{,s} typo
 
 v2 - v3:
 
   - make mpic pointer type safe
   - add wmb before setting global mpic variable
   - make eoi notification happen unlockedly
   - add IRQ routing documentation
   - announce mpic availability after its creation
   - fix pr_debug again
 
 I have refrained from touching IA64 at all in this patch set. It's marked
 as BROKEN, I doubt it even compiles at all today. The only sensible thing
 to do would be to remove all of IA64 kvm code from the kernel tree, but
 that is out of scope for this patch set and definitely should not gate it.
 
 
 Alex
 
 Alexander Graf (11):
   KVM: Add KVM_IRQCHIP_NUM_PINS in addition to KVM_IOAPIC_NUM_PINS
   KVM: Introduce CONFIG_HAVE_KVM_IRQ_ROUTING
   KVM: Drop __KVM_HAVE_IOAPIC condition on irq routing
   KVM: Remove kvm_get_intr_delivery_bitmask
   KVM: Move irq routing to generic code
   KVM: Extract generic irqchip logic into irqchip.c
   KVM: Move irq routing setup to irqchip.c
   KVM: Move irqfd resample cap handling to generic code
   KVM: PPC: Support irq routing and irqfd for in-kernel MPIC
   KVM: PPC: MPIC: Add support for KVM_IRQ_LINE
   KVM: PPC: MPIC: Restrict to e500 platforms
 
 Scott Wood (6):
   kvm: add device control API
   kvm/ppc/mpic: import hw/openpic.c from QEMU
   kvm/ppc/mpic: remove some obviously unneeded code
   kvm/ppc/mpic: adapt to kernel style and environment
   kvm/ppc/mpic: in-kernel MPIC emulation
   kvm/ppc/mpic: add KVM_CAP_IRQ_MPIC
 
  Documentation/virtual/kvm/api.txt  |   78 ++
  Documentation/virtual/kvm/devices/README   |1 +
  Documentation/virtual/kvm/devices/mpic.txt |   48 +
  arch/powerpc/include/asm/kvm_host.h|   24 +-
  arch/powerpc/include/asm/kvm_ppc.h |   30 +
  arch/powerpc/include/uapi/asm/kvm.h|9 +
  arch/powerpc/kvm/Kconfig   |   12 +
  arch/powerpc/kvm/Makefile  |3 +
  arch/powerpc/kvm/booke.c   |   12 +-
  arch/powerpc/kvm/irq.h |   17 +
  arch/powerpc/kvm/mpic.c| 1876 
 
  arch/powerpc/kvm/powerpc.c |   55 +-
  arch/x86/include/asm/kvm_host.h|2 +
  arch/x86/kvm/Kconfig   |1 +
  arch/x86/kvm/Makefile  |2 +-
  arch/x86/kvm/x86.c |1 -
  include/linux/kvm_host.h   |   53 +-
  include/trace/events/kvm.h |   12 +-
  include/uapi/linux/kvm.h   |   33 +-
  virt/kvm/Kconfig   |3 +
  virt/kvm/assigned-dev.c|   30 -
  virt/kvm/eventfd.c |6 +-
  virt/kvm/irq_comm.c|  194 +---
  virt/kvm/irqchip.c |  237 
  virt/kvm/kvm_main.c|  170 +++-
  25 files changed, 2659 insertions(+), 250 deletions(-)
  create mode 100644 Documentation/virtual/kvm/devices/README
  create mode 100644 Documentation/virtual/kvm/devices/mpic.txt
  create mode 100644 arch/powerpc/kvm/irq.h
  create mode 100644 arch/powerpc/kvm/mpic.c
  create mode 100644 virt/kvm/irqchip.c
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/6] kvm: add device control API

2013-04-25 Thread Alexander Graf

On 25.04.2013, at 11:43, Gleb Natapov wrote:

 On Fri, Apr 12, 2013 at 07:08:42PM -0500, Scott Wood wrote:
 Currently, devices that are emulated inside KVM are configured in a
 hardcoded manner based on an assumption that any given architecture
 only has one way to do it.  If there's any need to access device state,
 it is done through inflexible one-purpose-only IOCTLs (e.g.
 KVM_GET/SET_LAPIC).  Defining new IOCTLs for every little thing is
 cumbersome and depletes a limited numberspace.
 
 This API provides a mechanism to instantiate a device of a certain
 type, returning an ID that can be used to set/get attributes of the
 device.  Attributes may include configuration parameters (e.g.
 register base address), device state, operational commands, etc.  It
 is similar to the ONE_REG API, except that it acts on devices rather
 than vcpus.
 
 Both device types and individual attributes can be tested without having
 to create the device or get/set the attribute, without the need for
 separately managing enumerated capabilities.
 
 Signed-off-by: Scott Wood scottw...@freescale.com
 ---
 v4:
 - Move some boilerplate back into generic code, as requested by Gleb.
   File descriptor management and reference counting is no longer the
   concern of the device implementation.
 
 - Don't hold kvm-lock during create.  The original reasons
   for doing so have vanished as for as MPIC is concerned, and
   this avoids needing to answer the question of whether to
   hold the lock during destroy as well.
 
   Paul, you may need to acquire the lock yourself in kvm_create_xics()
   to protect the -EEXIST check.
 
 v3: remove some changes that were merged into this patch by accident,
 and fix the error documentation for KVM_CREATE_DEVICE.
 ---
 Documentation/virtual/kvm/api.txt|   70 
 Documentation/virtual/kvm/devices/README |1 +
 include/linux/kvm_host.h |   35 
 include/uapi/linux/kvm.h |   27 +++
 virt/kvm/kvm_main.c  |  129 
 ++
 5 files changed, 262 insertions(+)
 create mode 100644 Documentation/virtual/kvm/devices/README
 
 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index 976eb65..d52f3f9 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -2173,6 +2173,76 @@ header; first `n_valid' valid entries with contents 
 from the data
 written, then `n_invalid' invalid entries, invalidating any previously
 valid entries found.
 
 +4.79 KVM_CREATE_DEVICE
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: vm ioctl
 +Parameters: struct kvm_create_device (in/out)
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENODEV: The device type is unknown or unsupported
 +  EEXIST: Device already created, and this type of device may not
 +  be instantiated multiple times
 +
 +  Other error conditions may be defined by individual device types or
 +  have their standard meanings.
 +
 +Creates an emulated device in the kernel.  The file descriptor returned
 +in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
 +
 +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
 +device type is supported (not necessarily whether it can be created
 +in the current vm).
 +
 +Individual devices should not define flags.  Attributes should be used
 +for specifying any behavior that is not implied by the device type
 +number.
 +
 +struct kvm_create_device {
 +__u32   type;   /* in: KVM_DEV_TYPE_xxx */
 +__u32   fd; /* out: device handle */
 +__u32   flags;  /* in: KVM_CREATE_DEVICE_xxx */
 +};
 Should we add __u32 padding here to make struct size multiple of u64?

Do you know of any arch that pads structs to u64 boundaries? x86_64 doesn't and 
ppc64 doesn't either.

 
 +
 +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: device ioctl
 +Parameters: struct kvm_device_attr
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENXIO:  The group or attribute is unknown/unsupported for this device
 +  EPERM:  The attribute cannot (currently) be accessed this way
 +  (e.g. read-only attribute, or attribute that only makes
 +  sense when the device is in a different state)
 +
 +  Other error conditions may be defined by individual device types.
 +
 +Gets/sets a specified piece of device configuration and/or state.  The
 +semantics are device-specific.  See individual device documentation in
 +the devices directory.  As with ONE_REG, the size of the data
 +transferred is defined by the particular attribute.
 +
 +struct kvm_device_attr {
 +__u32   flags;  /* no flags currently defined */
 +__u32   group;  /* device-defined */
 +__u64   attr;   /* group-defined */
 +__u64   addr;   /* userspace address of attr data */
 +};
 +
 +4.81 KVM_HAS_DEVICE_ATTR
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: device ioctl
 +Parameters: struct 

Re: [PATCH 16/17] KVM: PPC: MPIC: Add support for KVM_IRQ_LINE

2013-04-25 Thread Alexander Graf

On 19.04.2013, at 20:51, Scott Wood wrote:

 On 04/19/2013 09:06:27 AM, Alexander Graf wrote:
 Now that all pieces are in place for reusing generic irq infrastructure,
 we can copy x86's implementation of KVM_IRQ_LINE irq injection and simply
 reuse it for PPC, as it will work there just as well.
 Signed-off-by: Alexander Graf ag...@suse.de
 ---
 arch/powerpc/include/uapi/asm/kvm.h |1 +
 arch/powerpc/kvm/powerpc.c  |   13 +
 2 files changed, 14 insertions(+), 0 deletions(-)
 diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
 b/arch/powerpc/include/uapi/asm/kvm.h
 index 3537bf3..dbb2ac2 100644
 --- a/arch/powerpc/include/uapi/asm/kvm.h
 +++ b/arch/powerpc/include/uapi/asm/kvm.h
 @@ -26,6 +26,7 @@
 #define __KVM_HAVE_SPAPR_TCE
 #define __KVM_HAVE_PPC_SMT
 #define __KVM_HAVE_IRQCHIP
 +#define __KVM_HAVE_IRQ_LINE
 struct kvm_regs {
  __u64 pc;
 diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
 index c431fea..874c106 100644
 --- a/arch/powerpc/kvm/powerpc.c
 +++ b/arch/powerpc/kvm/powerpc.c
 @@ -33,6 +33,7 @@
 #include asm/cputhreads.h
 #include asm/irqflags.h
 #include timing.h
 +#include irq.h
 #include ../mm/mmu_decl.h
 #define CREATE_TRACE_POINTS
 @@ -945,6 +946,18 @@ static int kvm_vm_ioctl_get_pvinfo(struct 
 kvm_ppc_pvinfo *pvinfo)
  return 0;
 }
 +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 +  bool line_status)
 +{
 +if (!irqchip_in_kernel(kvm))
 +return -ENXIO;
 +
 +irq_event-status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 +irq_event-irq, irq_event-level,
 +line_status);
 +return 0;
 +}
 
 As Paul noted in the XICS patchset, this could reference an MPIC that has 
 gone away if the user never attached any vcpus and then closed the MPIC fd.  
 It's not a reasonable use case, but it could be used malicously to get the 
 kernel to access a bad pointer.  The irqchip_in_kernel check helps somewhat, 
 but it's meant for ensuring that the creation has happened -- it's racy if 
 used for ensuring that destruction hasn't happened.
 
 The problem is rooted in the awkwardness of performing an operation that 
 logically should be on the MPIC fd, but is instead being done on the vm fd.
 
 I think these three steps would fix it (the first two seem like things we 
 should be doing anyway):
 - During MPIC destruction, make sure MPIC deregisters all routes that 
 reference it.
 - In kvm_set_irq(), do not release the RCU read lock until after the set() 
 function has been called.
 - Do not hook up kvm_send_userspace_msi() to MPIC or other new irqchips, as 
 that bypasses the RCU lock.  It could be supported as a device fd ioctl if 
 desired, or it could be reworked to operate on an RCU-managed list of MSI 
 handlers, though MPIC really doesn't need this at all.

Can't we just add an RCU lock in the send_userspace_msi case? I don't think we 
should handle MSIs any differently from normal IRQs.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/1] kvm:book3e: Fix a build error

2013-04-25 Thread Tiejun Chen
Commit cd66cc2e, powerpc/85xx: Add AltiVec support for e6500, adds
support for AltiVec on a Book-E class processor, but while compiling 
in the CONFIG_PPC_BOOK3E_64 and CONFIG_VIRTUALIZATION case, this
introduce the following error:

arch/powerpc/kernel/exceptions-64e.S:402: undefined reference to 
`kvmppc_handler_42_0x01B'
arch/powerpc/kernel/built-in.o: In function `exc_altivec_assist_book3e':
arch/powerpc/kernel/exceptions-64e.S:424: undefined reference to 
`kvmppc_handler_43_0x01B'
make: *** [vmlinux] Error 1

Looks we should add these altivec kvm handlers.

Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
---
 arch/powerpc/kvm/bookehv_interrupts.S |5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/kvm/bookehv_interrupts.S 
b/arch/powerpc/kvm/bookehv_interrupts.S
index e8ed7d6..fa9c78a 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -319,6 +319,11 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
SPRN_DSRR0, SPRN_DSRR1, 0
 kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
SPRN_CSRR0, SPRN_CSRR1, 0
+/* altivec */
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
+   SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
+   SPRN_SRR0, SPRN_SRR1, 0
 #else
 /*
  * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/6] kvm: add device control API

2013-04-25 Thread Gleb Natapov
On Thu, Apr 25, 2013 at 12:47:39PM +0200, Alexander Graf wrote:
 
 On 25.04.2013, at 11:43, Gleb Natapov wrote:
 
  On Fri, Apr 12, 2013 at 07:08:42PM -0500, Scott Wood wrote:
  Currently, devices that are emulated inside KVM are configured in a
  hardcoded manner based on an assumption that any given architecture
  only has one way to do it.  If there's any need to access device state,
  it is done through inflexible one-purpose-only IOCTLs (e.g.
  KVM_GET/SET_LAPIC).  Defining new IOCTLs for every little thing is
  cumbersome and depletes a limited numberspace.
  
  This API provides a mechanism to instantiate a device of a certain
  type, returning an ID that can be used to set/get attributes of the
  device.  Attributes may include configuration parameters (e.g.
  register base address), device state, operational commands, etc.  It
  is similar to the ONE_REG API, except that it acts on devices rather
  than vcpus.
  
  Both device types and individual attributes can be tested without having
  to create the device or get/set the attribute, without the need for
  separately managing enumerated capabilities.
  
  Signed-off-by: Scott Wood scottw...@freescale.com
  ---
  v4:
  - Move some boilerplate back into generic code, as requested by Gleb.
File descriptor management and reference counting is no longer the
concern of the device implementation.
  
  - Don't hold kvm-lock during create.  The original reasons
for doing so have vanished as for as MPIC is concerned, and
this avoids needing to answer the question of whether to
hold the lock during destroy as well.
  
Paul, you may need to acquire the lock yourself in kvm_create_xics()
to protect the -EEXIST check.
  
  v3: remove some changes that were merged into this patch by accident,
  and fix the error documentation for KVM_CREATE_DEVICE.
  ---
  Documentation/virtual/kvm/api.txt|   70 
  Documentation/virtual/kvm/devices/README |1 +
  include/linux/kvm_host.h |   35 
  include/uapi/linux/kvm.h |   27 +++
  virt/kvm/kvm_main.c  |  129 
  ++
  5 files changed, 262 insertions(+)
  create mode 100644 Documentation/virtual/kvm/devices/README
  
  diff --git a/Documentation/virtual/kvm/api.txt 
  b/Documentation/virtual/kvm/api.txt
  index 976eb65..d52f3f9 100644
  --- a/Documentation/virtual/kvm/api.txt
  +++ b/Documentation/virtual/kvm/api.txt
  @@ -2173,6 +2173,76 @@ header; first `n_valid' valid entries with contents 
  from the data
  written, then `n_invalid' invalid entries, invalidating any previously
  valid entries found.
  
  +4.79 KVM_CREATE_DEVICE
  +
  +Capability: KVM_CAP_DEVICE_CTRL
  +Type: vm ioctl
  +Parameters: struct kvm_create_device (in/out)
  +Returns: 0 on success, -1 on error
  +Errors:
  +  ENODEV: The device type is unknown or unsupported
  +  EEXIST: Device already created, and this type of device may not
  +  be instantiated multiple times
  +
  +  Other error conditions may be defined by individual device types or
  +  have their standard meanings.
  +
  +Creates an emulated device in the kernel.  The file descriptor returned
  +in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
  +
  +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
  +device type is supported (not necessarily whether it can be created
  +in the current vm).
  +
  +Individual devices should not define flags.  Attributes should be used
  +for specifying any behavior that is not implied by the device type
  +number.
  +
  +struct kvm_create_device {
  +  __u32   type;   /* in: KVM_DEV_TYPE_xxx */
  +  __u32   fd; /* out: device handle */
  +  __u32   flags;  /* in: KVM_CREATE_DEVICE_xxx */
  +};
  Should we add __u32 padding here to make struct size multiple of u64?
 
 Do you know of any arch that pads structs to u64 boundaries? x86_64 doesn't 
 and ppc64 doesn't either.
 
Not really. I just notices that we pad some structures to that effect.

  
  +
  +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
  +
  +Capability: KVM_CAP_DEVICE_CTRL
  +Type: device ioctl
  +Parameters: struct kvm_device_attr
  +Returns: 0 on success, -1 on error
  +Errors:
  +  ENXIO:  The group or attribute is unknown/unsupported for this device
  +  EPERM:  The attribute cannot (currently) be accessed this way
  +  (e.g. read-only attribute, or attribute that only makes
  +  sense when the device is in a different state)
  +
  +  Other error conditions may be defined by individual device types.
  +
  +Gets/sets a specified piece of device configuration and/or state.  The
  +semantics are device-specific.  See individual device documentation in
  +the devices directory.  As with ONE_REG, the size of the data
  +transferred is defined by the particular attribute.
  +
  +struct kvm_device_attr {
  +  __u32   flags;  /* no flags currently defined */
  +  __u32   group;  /* 

RE: [PATCH 1/1] kvm:book3e: Fix a build error

2013-04-25 Thread Caraman Mihai Claudiu-B02008
 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-
 ow...@vger.kernel.org] On Behalf Of Tiejun Chen
 Sent: Thursday, April 25, 2013 2:46 PM
 To: ga...@kernel.crashing.org
 Cc: linuxppc-...@lists.ozlabs.org; kvm-...@vger.kernel.org;
 kvm@vger.kernel.org
 Subject: [PATCH 1/1] kvm:book3e: Fix a build error
 
 Commit cd66cc2e, powerpc/85xx: Add AltiVec support for e6500, adds
 support for AltiVec on a Book-E class processor, but while compiling
 in the CONFIG_PPC_BOOK3E_64 and CONFIG_VIRTUALIZATION case, this
 introduce the following error:
 
 arch/powerpc/kernel/exceptions-64e.S:402: undefined reference to
 `kvmppc_handler_42_0x01B'
 arch/powerpc/kernel/built-in.o: In function `exc_altivec_assist_book3e':
 arch/powerpc/kernel/exceptions-64e.S:424: undefined reference to
 `kvmppc_handler_43_0x01B'
 make: *** [vmlinux] Error 1
 
 Looks we should add these altivec kvm handlers.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/kvm/bookehv_interrupts.S |5 +
  1 file changed, 5 insertions(+)
 
 diff --git a/arch/powerpc/kvm/bookehv_interrupts.S
 b/arch/powerpc/kvm/bookehv_interrupts.S
 index e8ed7d6..fa9c78a 100644
 --- a/arch/powerpc/kvm/bookehv_interrupts.S
 +++ b/arch/powerpc/kvm/bookehv_interrupts.S
 @@ -319,6 +319,11 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
   SPRN_DSRR0, SPRN_DSRR1, 0
  kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
   SPRN_CSRR0, SPRN_CSRR1, 0
 +/* altivec */
 +kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
 + SPRN_SRR0, SPRN_SRR1, 0
 +kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
 + SPRN_SRR0, SPRN_SRR1, 0
  #else
  /*
   * For input register values, see
 arch/powerpc/include/asm/kvm_booke_hv_asm.h
 --
 
It seems that you are not using kvm-ppc-queue branch.

I already have a patch ready for this (and AltiVec support is work
in progress) but we need first to pull e6500 kernel patches from
Linux tree into agraf.git.
 
-Mike



 



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 08/32] arm64: KVM: architecture specific MMU backend

2013-04-25 Thread Marc Zyngier
On 24/04/13 17:55, Christoffer Dall wrote:
 On Wed, Apr 24, 2013 at 4:03 AM, Marc Zyngier marc.zyng...@arm.com wrote:
 On 23/04/13 23:58, Christoffer Dall wrote:
 On Mon, Apr 08, 2013 at 05:17:10PM +0100, Marc Zyngier wrote:
 Define the arm64 specific MMU backend:
 - HYP/kernel VA offset
 - S2 4/64kB definitions
 - S2 page table populating and flushing
 - icache cleaning

 Reviewed-by: Christopher Covington c...@codeaurora.org
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  arch/arm64/include/asm/kvm_mmu.h | 136 
 +++
  1 file changed, 136 insertions(+)
  create mode 100644 arch/arm64/include/asm/kvm_mmu.h

 diff --git a/arch/arm64/include/asm/kvm_mmu.h 
 b/arch/arm64/include/asm/kvm_mmu.h
 new file mode 100644
 index 000..2eb2230
 --- /dev/null
 +++ b/arch/arm64/include/asm/kvm_mmu.h
 @@ -0,0 +1,136 @@
 +/*
 + * Copyright (C) 2012,2013 - ARM Ltd
 + * Author: Marc Zyngier marc.zyng...@arm.com
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License version 2 as
 + * published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License
 + * along with this program.  If not, see http://www.gnu.org/licenses/.
 + */
 +
 +#ifndef __ARM64_KVM_MMU_H__
 +#define __ARM64_KVM_MMU_H__
 +
 +#include asm/page.h
 +#include asm/memory.h
 +
 +/*
 + * As we only have the TTBR0_EL2 register, we cannot express
 + * negative addresses. This makes it impossible to directly share
 + * mappings with the kernel.
 + *
 + * Instead, give the HYP mode its own VA region at a fixed offset from
 + * the kernel by just masking the top bits (which are all ones for a
 + * kernel address).

 For some reason I keep choking on this, despite it being very simple.
 We're just defining a different PAGE_OFFSET, right? Why not do a hard
 define as:

 #define HYP_PAGE_OFFSET_MASK  0x
 #define HYP_PAGE_OFFSET   0xffc0

 ...or change the second paragraph of the comment to say
 that we definethe HYP_PAGE_OFFSET to be 0x ffc0 .

 One of these days, VA_BITS will change to accommodate for more virtual
 space. When that day comes, I don't want to touch any of this because it
 did hurt enough when writing it. As such, I'll refrain from hardcoding
 anything.

 I don't mind a comment, though.

 + */
 +#define HYP_PAGE_OFFSET_SHIFT   VA_BITS
 +#define HYP_PAGE_OFFSET_MASK((UL(1)  HYP_PAGE_OFFSET_SHIFT) - 1)

 In any case, is there a reason for the HYP_PAGE_OFFSET_SHIFT
 indirection? It may be simpler without...

 It is common practice to have XXX_SHIFT and XXX_MASK together.

 +#define HYP_PAGE_OFFSET (PAGE_OFFSET  HYP_PAGE_OFFSET_MASK)
 +
 +/*
 + * Our virtual mapping for the idmap-ed MMU-enable code. Must be
 + * shared across all the page-tables. Conveniently, we use the last
 + * possible page, where no kernel mapping will ever exist.
 + */
 +#define TRAMPOLINE_VA   (HYP_PAGE_OFFSET_MASK  PAGE_MASK)

 hmmm, ok, here it's kind of nice to have that define correlation, so
 maybe it's not cleaner.  Something should be improved here in the define
 or the comment to make it more clear.  Perhaps just adding the real
 constants in the comment or in Documentation/arm64/memory.txt would
 help.

 Yes, I plan to write something there.

 +
 +#ifdef __ASSEMBLY__
 +
 +/*
 + * Convert a kernel VA into a HYP VA.
 + * reg: VA to be converted.
 + */
 +.macro kern_hyp_va  reg
 +and \reg, \reg, #HYP_PAGE_OFFSET_MASK
 +.endm
 +
 +#else
 +
 +#include asm/cacheflush.h
 +
 +#define KERN_TO_HYP(kva)((unsigned long)kva - PAGE_OFFSET + 
 HYP_PAGE_OFFSET)
 +
 +/*
 + * Align KVM with the kernel's view of physical memory. Should be
 + * 40bit IPA, with PGD being 8kB aligned.
 + */
 +#define KVM_PHYS_SHIFT  PHYS_MASK_SHIFT
 +#define KVM_PHYS_SIZE   (1UL  KVM_PHYS_SHIFT)
 +#define KVM_PHYS_MASK   (KVM_PHYS_SIZE - 1UL)
 +
 +#ifdef CONFIG_ARM64_64K_PAGES
 +#define PAGE_LEVELS 2
 +#define BITS_PER_LEVEL  13
 +#else  /* 4kB pages */
 +#define PAGE_LEVELS 3
 +#define BITS_PER_LEVEL  9
 +#endif

 What are the semantics of these defines exactly? They should be
 S2_PAGE_LEVELS and make some assumptions of the VTCR_EL2.SL0 field
 right?

 Indeed, we assume SL0 is always 1, just like for the kernel. As for the
 semantics, I though they were pretty obvious...

 
 this is all stage2 right? so S2_PAGE_LEVELS may be more clear...

It actually applies to both host Stage-1, HYP Stage-1 and guest Stage-2
(we keep all page tables the same on the host side).

 PAGE_LEVELS is just that, the number of page levels. BITS_PER_LEVEL is
 the number of bits you need 

Re: [PATCH v4 1/6] kvm: add device control API

2013-04-25 Thread Alexander Graf

On 25.04.2013, at 14:07, Gleb Natapov wrote:

 On Thu, Apr 25, 2013 at 12:47:39PM +0200, Alexander Graf wrote:
 
 On 25.04.2013, at 11:43, Gleb Natapov wrote:
 
 On Fri, Apr 12, 2013 at 07:08:42PM -0500, Scott Wood wrote:
 Currently, devices that are emulated inside KVM are configured in a
 hardcoded manner based on an assumption that any given architecture
 only has one way to do it.  If there's any need to access device state,
 it is done through inflexible one-purpose-only IOCTLs (e.g.
 KVM_GET/SET_LAPIC).  Defining new IOCTLs for every little thing is
 cumbersome and depletes a limited numberspace.
 
 This API provides a mechanism to instantiate a device of a certain
 type, returning an ID that can be used to set/get attributes of the
 device.  Attributes may include configuration parameters (e.g.
 register base address), device state, operational commands, etc.  It
 is similar to the ONE_REG API, except that it acts on devices rather
 than vcpus.
 
 Both device types and individual attributes can be tested without having
 to create the device or get/set the attribute, without the need for
 separately managing enumerated capabilities.
 
 Signed-off-by: Scott Wood scottw...@freescale.com
 ---
 v4:
 - Move some boilerplate back into generic code, as requested by Gleb.
  File descriptor management and reference counting is no longer the
  concern of the device implementation.
 
 - Don't hold kvm-lock during create.  The original reasons
  for doing so have vanished as for as MPIC is concerned, and
  this avoids needing to answer the question of whether to
  hold the lock during destroy as well.
 
  Paul, you may need to acquire the lock yourself in kvm_create_xics()
  to protect the -EEXIST check.
 
 v3: remove some changes that were merged into this patch by accident,
 and fix the error documentation for KVM_CREATE_DEVICE.
 ---
 Documentation/virtual/kvm/api.txt|   70 
 Documentation/virtual/kvm/devices/README |1 +
 include/linux/kvm_host.h |   35 
 include/uapi/linux/kvm.h |   27 +++
 virt/kvm/kvm_main.c  |  129 
 ++
 5 files changed, 262 insertions(+)
 create mode 100644 Documentation/virtual/kvm/devices/README
 
 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index 976eb65..d52f3f9 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -2173,6 +2173,76 @@ header; first `n_valid' valid entries with contents 
 from the data
 written, then `n_invalid' invalid entries, invalidating any previously
 valid entries found.
 
 +4.79 KVM_CREATE_DEVICE
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: vm ioctl
 +Parameters: struct kvm_create_device (in/out)
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENODEV: The device type is unknown or unsupported
 +  EEXIST: Device already created, and this type of device may not
 +  be instantiated multiple times
 +
 +  Other error conditions may be defined by individual device types or
 +  have their standard meanings.
 +
 +Creates an emulated device in the kernel.  The file descriptor returned
 +in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
 +
 +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
 +device type is supported (not necessarily whether it can be created
 +in the current vm).
 +
 +Individual devices should not define flags.  Attributes should be used
 +for specifying any behavior that is not implied by the device type
 +number.
 +
 +struct kvm_create_device {
 +  __u32   type;   /* in: KVM_DEV_TYPE_xxx */
 +  __u32   fd; /* out: device handle */
 +  __u32   flags;  /* in: KVM_CREATE_DEVICE_xxx */
 +};
 Should we add __u32 padding here to make struct size multiple of u64?
 
 Do you know of any arch that pads structs to u64 boundaries? x86_64 doesn't 
 and ppc64 doesn't either.
 
 Not really. I just notices that we pad some structures to that effect.

I don't think we really need to :).

 
 
 +
 +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: device ioctl
 +Parameters: struct kvm_device_attr
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENXIO:  The group or attribute is unknown/unsupported for this device
 +  EPERM:  The attribute cannot (currently) be accessed this way
 +  (e.g. read-only attribute, or attribute that only makes
 +  sense when the device is in a different state)
 +
 +  Other error conditions may be defined by individual device types.
 +
 +Gets/sets a specified piece of device configuration and/or state.  The
 +semantics are device-specific.  See individual device documentation in
 +the devices directory.  As with ONE_REG, the size of the data
 +transferred is defined by the particular attribute.
 +
 +struct kvm_device_attr {
 +  __u32   flags;  /* no flags currently defined */
 +  __u32   group;  /* device-defined */
 +  __u64   attr; 

Re: [PATCH v4 1/6] kvm: add device control API

2013-04-25 Thread Gleb Natapov
On Thu, Apr 25, 2013 at 03:45:14PM +0200, Alexander Graf wrote:
  Please move struct definitions and KVM_CREATE_DEVICE_TEST define out
  from ioctl definition block.
  
  Let me change that in my tree...
  
  So are you sending this via your tree and I should not apply it directly?
 
 I was hoping to have things ready very soon for you to just pull...
 
Make sense since there are PPC patches that depend on this one. 3.10 merge 
windows
will very likely open next week though...

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 16/17] KVM: PPC: MPIC: Add support for KVM_IRQ_LINE

2013-04-25 Thread Alexander Graf

On 25.04.2013, at 13:30, Alexander Graf wrote:

 
 On 19.04.2013, at 20:51, Scott Wood wrote:
 
 On 04/19/2013 09:06:27 AM, Alexander Graf wrote:
 Now that all pieces are in place for reusing generic irq infrastructure,
 we can copy x86's implementation of KVM_IRQ_LINE irq injection and simply
 reuse it for PPC, as it will work there just as well.
 Signed-off-by: Alexander Graf ag...@suse.de
 ---
 arch/powerpc/include/uapi/asm/kvm.h |1 +
 arch/powerpc/kvm/powerpc.c  |   13 +
 2 files changed, 14 insertions(+), 0 deletions(-)
 diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
 b/arch/powerpc/include/uapi/asm/kvm.h
 index 3537bf3..dbb2ac2 100644
 --- a/arch/powerpc/include/uapi/asm/kvm.h
 +++ b/arch/powerpc/include/uapi/asm/kvm.h
 @@ -26,6 +26,7 @@
 #define __KVM_HAVE_SPAPR_TCE
 #define __KVM_HAVE_PPC_SMT
 #define __KVM_HAVE_IRQCHIP
 +#define __KVM_HAVE_IRQ_LINE
 struct kvm_regs {
 __u64 pc;
 diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
 index c431fea..874c106 100644
 --- a/arch/powerpc/kvm/powerpc.c
 +++ b/arch/powerpc/kvm/powerpc.c
 @@ -33,6 +33,7 @@
 #include asm/cputhreads.h
 #include asm/irqflags.h
 #include timing.h
 +#include irq.h
 #include ../mm/mmu_decl.h
 #define CREATE_TRACE_POINTS
 @@ -945,6 +946,18 @@ static int kvm_vm_ioctl_get_pvinfo(struct 
 kvm_ppc_pvinfo *pvinfo)
 return 0;
 }
 +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 + bool line_status)
 +{
 +   if (!irqchip_in_kernel(kvm))
 +   return -ENXIO;
 +
 +   irq_event-status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 +   irq_event-irq, irq_event-level,
 +   line_status);
 +   return 0;
 +}
 
 As Paul noted in the XICS patchset, this could reference an MPIC that has 
 gone away if the user never attached any vcpus and then closed the MPIC fd.  
 It's not a reasonable use case, but it could be used malicously to get the 
 kernel to access a bad pointer.  The irqchip_in_kernel check helps somewhat, 
 but it's meant for ensuring that the creation has happened -- it's racy if 
 used for ensuring that destruction hasn't happened.
 
 The problem is rooted in the awkwardness of performing an operation that 
 logically should be on the MPIC fd, but is instead being done on the vm fd.
 
 I think these three steps would fix it (the first two seem like things we 
 should be doing anyway):
 - During MPIC destruction, make sure MPIC deregisters all routes that 
 reference it.
 - In kvm_set_irq(), do not release the RCU read lock until after the set() 
 function has been called.
 - Do not hook up kvm_send_userspace_msi() to MPIC or other new irqchips, as 
 that bypasses the RCU lock.  It could be supported as a device fd ioctl if 
 desired, or it could be reworked to operate on an RCU-managed list of MSI 
 handlers, though MPIC really doesn't need this at all.
 
 Can't we just add an RCU lock in the send_userspace_msi case? I don't think 
 we should handle MSIs any differently from normal IRQs.

In fact I'm having a hard time verifying that we're always accessing things 
with proper locks held. I'm pretty sure we're missing a few cases.

So how about we delay mpic destruction to vm destruction? We simply add one 
user too many when we spawn the mpic and put it on vm_destruct. That way users 
_can_ destroy mpics, but they will only be really free'd once the vm is also 
gone.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM : PPC : cache flush for kernel managed pages

2013-04-25 Thread Alexander Graf

On 23.04.2013, at 08:39, Bharat Bhushan wrote:

 Kernel should only try flushing pages which are managed by kernel.
 pfn_to_page will returns junk struct page for pages not managed by kernel,
 so if kernel will try to flush direct mapped memory or direct assigned device
 mapping then it will work on junk struct page.
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 arch/powerpc/kvm/e500_mmu_host.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)
 
 diff --git a/arch/powerpc/kvm/e500_mmu_host.c 
 b/arch/powerpc/kvm/e500_mmu_host.c
 index 1c6a9d7..e07da21 100644
 --- a/arch/powerpc/kvm/e500_mmu_host.c
 +++ b/arch/powerpc/kvm/e500_mmu_host.c
 @@ -455,7 +455,8 @@ static inline int kvmppc_e500_shadow_map(struct 
 kvmppc_vcpu_e500 *vcpu_e500,
   ref, gvaddr, stlbe);
 
   /* Clear i-cache for new pages */
 - kvmppc_mmu_flush_icache(pfn);
 + if (pfn_valid(pfn))
 + kvmppc_mmu_flush_icache(pfn);

Could you please move the check into kvmppc_mmu_flush_icache()? That way we're 
guaranteed we can't screw up cache flushes ever :).

Also, please add a comment saying why we need this.


Alex

 
   /* Drop refcount on page, so that mmu notifiers can clear it */
   kvm_release_pfn_clean(pfn);
 -- 
 1.7.0.4
 
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] KVM : PPC : cache flush for kernel managed pages

2013-04-25 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Thursday, April 25, 2013 8:36 PM
 To: Bhushan Bharat-R65777
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421; Bhushan
 Bharat-R65777
 Subject: Re: [PATCH] KVM : PPC : cache flush for kernel managed pages
 
 
 On 23.04.2013, at 08:39, Bharat Bhushan wrote:
 
  Kernel should only try flushing pages which are managed by kernel.
  pfn_to_page will returns junk struct page for pages not managed by
  kernel, so if kernel will try to flush direct mapped memory or direct
  assigned device mapping then it will work on junk struct page.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  arch/powerpc/kvm/e500_mmu_host.c |3 ++-
  1 files changed, 2 insertions(+), 1 deletions(-)
 
  diff --git a/arch/powerpc/kvm/e500_mmu_host.c
  b/arch/powerpc/kvm/e500_mmu_host.c
  index 1c6a9d7..e07da21 100644
  --- a/arch/powerpc/kvm/e500_mmu_host.c
  +++ b/arch/powerpc/kvm/e500_mmu_host.c
  @@ -455,7 +455,8 @@ static inline int kvmppc_e500_shadow_map(struct
 kvmppc_vcpu_e500 *vcpu_e500,
  ref, gvaddr, stlbe);
 
  /* Clear i-cache for new pages */
  -   kvmppc_mmu_flush_icache(pfn);
  +   if (pfn_valid(pfn))
  +   kvmppc_mmu_flush_icache(pfn);
 
 Could you please move the check into kvmppc_mmu_flush_icache()? That way we're
 guaranteed we can't screw up cache flushes ever :).
 
 Also, please add a comment saying why we need this.

Ok

-Bharat

 
 
 Alex
 
 
  /* Drop refcount on page, so that mmu notifiers can clear it */
  kvm_release_pfn_clean(pfn);
  --
  1.7.0.4
 
 
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 08/32] arm64: KVM: architecture specific MMU backend

2013-04-25 Thread Christoffer Dall
On Thu, Apr 25, 2013 at 5:59 AM, Marc Zyngier marc.zyng...@arm.com wrote:
 On 24/04/13 17:55, Christoffer Dall wrote:
 On Wed, Apr 24, 2013 at 4:03 AM, Marc Zyngier marc.zyng...@arm.com wrote:
 On 23/04/13 23:58, Christoffer Dall wrote:
 On Mon, Apr 08, 2013 at 05:17:10PM +0100, Marc Zyngier wrote:
 Define the arm64 specific MMU backend:
 - HYP/kernel VA offset
 - S2 4/64kB definitions
 - S2 page table populating and flushing
 - icache cleaning

 Reviewed-by: Christopher Covington c...@codeaurora.org
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  arch/arm64/include/asm/kvm_mmu.h | 136 
 +++
  1 file changed, 136 insertions(+)
  create mode 100644 arch/arm64/include/asm/kvm_mmu.h

 diff --git a/arch/arm64/include/asm/kvm_mmu.h 
 b/arch/arm64/include/asm/kvm_mmu.h
 new file mode 100644
 index 000..2eb2230
 --- /dev/null
 +++ b/arch/arm64/include/asm/kvm_mmu.h
 @@ -0,0 +1,136 @@
 +/*
 + * Copyright (C) 2012,2013 - ARM Ltd
 + * Author: Marc Zyngier marc.zyng...@arm.com
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License version 2 as
 + * published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License
 + * along with this program.  If not, see http://www.gnu.org/licenses/.
 + */
 +
 +#ifndef __ARM64_KVM_MMU_H__
 +#define __ARM64_KVM_MMU_H__
 +
 +#include asm/page.h
 +#include asm/memory.h
 +
 +/*
 + * As we only have the TTBR0_EL2 register, we cannot express
 + * negative addresses. This makes it impossible to directly share
 + * mappings with the kernel.
 + *
 + * Instead, give the HYP mode its own VA region at a fixed offset from
 + * the kernel by just masking the top bits (which are all ones for a
 + * kernel address).

 For some reason I keep choking on this, despite it being very simple.
 We're just defining a different PAGE_OFFSET, right? Why not do a hard
 define as:

 #define HYP_PAGE_OFFSET_MASK  0x
 #define HYP_PAGE_OFFSET   0xffc0

 ...or change the second paragraph of the comment to say
 that we definethe HYP_PAGE_OFFSET to be 0x ffc0 .

 One of these days, VA_BITS will change to accommodate for more virtual
 space. When that day comes, I don't want to touch any of this because it
 did hurt enough when writing it. As such, I'll refrain from hardcoding
 anything.

 I don't mind a comment, though.

 + */
 +#define HYP_PAGE_OFFSET_SHIFT   VA_BITS
 +#define HYP_PAGE_OFFSET_MASK((UL(1)  HYP_PAGE_OFFSET_SHIFT) - 
 1)

 In any case, is there a reason for the HYP_PAGE_OFFSET_SHIFT
 indirection? It may be simpler without...

 It is common practice to have XXX_SHIFT and XXX_MASK together.

 +#define HYP_PAGE_OFFSET (PAGE_OFFSET  HYP_PAGE_OFFSET_MASK)
 +
 +/*
 + * Our virtual mapping for the idmap-ed MMU-enable code. Must be
 + * shared across all the page-tables. Conveniently, we use the last
 + * possible page, where no kernel mapping will ever exist.
 + */
 +#define TRAMPOLINE_VA   (HYP_PAGE_OFFSET_MASK  PAGE_MASK)

 hmmm, ok, here it's kind of nice to have that define correlation, so
 maybe it's not cleaner.  Something should be improved here in the define
 or the comment to make it more clear.  Perhaps just adding the real
 constants in the comment or in Documentation/arm64/memory.txt would
 help.

 Yes, I plan to write something there.

 +
 +#ifdef __ASSEMBLY__
 +
 +/*
 + * Convert a kernel VA into a HYP VA.
 + * reg: VA to be converted.
 + */
 +.macro kern_hyp_va  reg
 +and \reg, \reg, #HYP_PAGE_OFFSET_MASK
 +.endm
 +
 +#else
 +
 +#include asm/cacheflush.h
 +
 +#define KERN_TO_HYP(kva)((unsigned long)kva - PAGE_OFFSET + 
 HYP_PAGE_OFFSET)
 +
 +/*
 + * Align KVM with the kernel's view of physical memory. Should be
 + * 40bit IPA, with PGD being 8kB aligned.
 + */
 +#define KVM_PHYS_SHIFT  PHYS_MASK_SHIFT
 +#define KVM_PHYS_SIZE   (1UL  KVM_PHYS_SHIFT)
 +#define KVM_PHYS_MASK   (KVM_PHYS_SIZE - 1UL)
 +
 +#ifdef CONFIG_ARM64_64K_PAGES
 +#define PAGE_LEVELS 2
 +#define BITS_PER_LEVEL  13
 +#else  /* 4kB pages */
 +#define PAGE_LEVELS 3
 +#define BITS_PER_LEVEL  9
 +#endif

 What are the semantics of these defines exactly? They should be
 S2_PAGE_LEVELS and make some assumptions of the VTCR_EL2.SL0 field
 right?

 Indeed, we assume SL0 is always 1, just like for the kernel. As for the
 semantics, I though they were pretty obvious...


 this is all stage2 right? so S2_PAGE_LEVELS may be more clear...

 It actually applies to both host Stage-1, HYP Stage-1 and guest Stage-2
 (we keep all page tables the same on the host side).

 PAGE_LEVELS is just 

[PATCH v2] KVM: PPC: cache flush for kernel managed pages

2013-04-25 Thread Bharat Bhushan
From: Bharat Bhushan bharat.bhus...@freescale.com

Kernel can only access pages which maps as memory.
So flush only the valid kernel pages.

Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
v1-v2
 - move pfn_valid() check in kvmppc_mmu_flush_icache
 - Added comment to describe why this is needed

 arch/powerpc/include/asm/kvm_ppc.h |9 -
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index f589307..4794de6 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -282,8 +282,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids);
 
 static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
 {
-   /* Clear i-cache for new pages */
struct page *page;
+   /*
+* We can only access pages that the kernel maps
+* as memory. Bail out for unmapped ones.
+*/
+   if (!pfn_valid(pfn))
+   return;
+
+   /* Clear i-cache for new pages */
page = pfn_to_page(pfn);
if (!test_bit(PG_arch_1, page-flags)) {
flush_dcache_icache_page(page);
-- 
1.7.0.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/6] kvm: add device control API

2013-04-25 Thread Scott Wood

On 04/25/2013 05:47:39 AM, Alexander Graf wrote:


On 25.04.2013, at 11:43, Gleb Natapov wrote:

 +void kvm_device_put(struct kvm_device *dev)
 +{
 +  if (atomic_dec_and_test(dev-users))
 +  dev-ops-destroy(dev);
 +}
 +
 +static int kvm_device_release(struct inode *inode, struct file  
*filp)

 +{
 +  struct kvm_device *dev = filp-private_data;
 +  struct kvm *kvm = dev-kvm;
 +
 +  kvm_device_put(dev);
 +  kvm_put_kvm(kvm);
 We may put kvm only if users goes to zero, otherwise kvm can be
 freed while something holds a reference to a device. Why not make
 kvm_device_put() do it?

Nice catch. I'll change the patch so it does the kvm_put_kvm inside  
kvm_device_put's destroy branch.


No, please don't.  The KVM reference being put here is associated  
with the file descriptor, not with the MPIC object.  If you make that  
change I think you'll have circular references and thus a memory leak,  
because the vcpus can hold a reference to the MPIC object.


-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC

2013-04-25 Thread Scott Wood

On 04/25/2013 04:58:51 AM, Alexander Graf wrote:


On 19.04.2013, at 20:02, Scott Wood wrote:

 On 04/19/2013 09:06:26 AM, Alexander Graf wrote:
 +  if (notify_eoi != -1) {
 +  spin_unlock_irq(opp-lock);
 +  kvm_notify_acked_irq(opp-kvm, 0, notify_eoi);
 +  spin_lock_irq(opp-lock);
 +  }

 I'd rather not have the _irq here, which could break if we enter  
this patch via an _irqsave (I realize there currently is no such  
path that reaches EOI emulation).


 Will we ever set notify_eoi when addr != EOI?  I'm wondering why it  
was moved out of the switch statement, instead of being put at the  
end of the case EOI: code.


I doubt it, but that's for the compiler to optimize away. I found it  
cleaner for some reason to put it down there. I don't think it really  
matters.


Cleanliness is my concern as well.  It doesn't seem clean to  
arbitrarily split up the EOI implementation.


-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/6] kvm: add device control API

2013-04-25 Thread Gleb Natapov
On Thu, Apr 25, 2013 at 11:51:08AM -0500, Scott Wood wrote:
 On 04/25/2013 05:47:39 AM, Alexander Graf wrote:
 
 On 25.04.2013, at 11:43, Gleb Natapov wrote:
 
  +void kvm_device_put(struct kvm_device *dev)
  +{
  + if (atomic_dec_and_test(dev-users))
  + dev-ops-destroy(dev);
  +}
  +
  +static int kvm_device_release(struct inode *inode, struct file
 *filp)
  +{
  + struct kvm_device *dev = filp-private_data;
  + struct kvm *kvm = dev-kvm;
  +
  + kvm_device_put(dev);
  + kvm_put_kvm(kvm);
  We may put kvm only if users goes to zero, otherwise kvm can be
  freed while something holds a reference to a device. Why not make
  kvm_device_put() do it?
 
 Nice catch. I'll change the patch so it does the kvm_put_kvm
 inside kvm_device_put's destroy branch.
 
 No, please don't.  The KVM reference being put here is associated
 with the file descriptor, not with the MPIC object.
Is it so? Device holds a pointer to kvm, so it increments kvm reference
to make sure the pointer is valid. What prevents kvm from been destroyed
while device is still in use in current code?
 

If you make
 that change I think you'll have circular references and thus a
 memory leak, because the vcpus can hold a reference to the MPIC
 object.
 
How circular reference can be created?

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/6] kvm: add device control API

2013-04-25 Thread Scott Wood

On 04/25/2013 01:22:04 PM, Gleb Natapov wrote:

On Thu, Apr 25, 2013 at 11:51:08AM -0500, Scott Wood wrote:
 On 04/25/2013 05:47:39 AM, Alexander Graf wrote:
 
 On 25.04.2013, at 11:43, Gleb Natapov wrote:
 
  +void kvm_device_put(struct kvm_device *dev)
  +{
  + if (atomic_dec_and_test(dev-users))
  + dev-ops-destroy(dev);
  +}
  +
  +static int kvm_device_release(struct inode *inode, struct file
 *filp)
  +{
  + struct kvm_device *dev = filp-private_data;
  + struct kvm *kvm = dev-kvm;
  +
  + kvm_device_put(dev);
  + kvm_put_kvm(kvm);
  We may put kvm only if users goes to zero, otherwise kvm can be
  freed while something holds a reference to a device. Why not make
  kvm_device_put() do it?
 
 Nice catch. I'll change the patch so it does the kvm_put_kvm
 inside kvm_device_put's destroy branch.

 No, please don't.  The KVM reference being put here is associated
 with the file descriptor, not with the MPIC object.
Is it so? Device holds a pointer to kvm, so it increments kvm  
reference
to make sure the pointer is valid. What prevents kvm from been  
destroyed

while device is still in use in current code?


Where will that kvm pointer be used, after all the file descriptors go  
away and the vcpus stop running?  mmio_mapped guards against unmapping  
the MMIO if it's already been unmapped due to KVM destruction.  We  
don't have any timers or other delayed work.


Well, I do see one place, that Alex added -- the NULLing out of  
dev-kvm-arch.mpic, which didn't exist in my patchset.



 that change I think you'll have circular references and thus a
 memory leak, because the vcpus can hold a reference to the MPIC
 object.

How circular reference can be created?


MPIC holds reference on KVM, vcpu holds reference on MPIC, and vcpu is  
not destroyed until KVM is destroyed.


-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 16/17] KVM: PPC: MPIC: Add support for KVM_IRQ_LINE

2013-04-25 Thread Scott Wood

On 04/25/2013 09:49:23 AM, Alexander Graf wrote:


On 25.04.2013, at 13:30, Alexander Graf wrote:


 On 19.04.2013, at 20:51, Scott Wood wrote:

 On 04/19/2013 09:06:27 AM, Alexander Graf wrote:
 Now that all pieces are in place for reusing generic irq  
infrastructure,
 we can copy x86's implementation of KVM_IRQ_LINE irq injection  
and simply

 reuse it for PPC, as it will work there just as well.
 Signed-off-by: Alexander Graf ag...@suse.de
 ---
 arch/powerpc/include/uapi/asm/kvm.h |1 +
 arch/powerpc/kvm/powerpc.c  |   13 +
 2 files changed, 14 insertions(+), 0 deletions(-)
 diff --git a/arch/powerpc/include/uapi/asm/kvm.h  
b/arch/powerpc/include/uapi/asm/kvm.h

 index 3537bf3..dbb2ac2 100644
 --- a/arch/powerpc/include/uapi/asm/kvm.h
 +++ b/arch/powerpc/include/uapi/asm/kvm.h
 @@ -26,6 +26,7 @@
 #define __KVM_HAVE_SPAPR_TCE
 #define __KVM_HAVE_PPC_SMT
 #define __KVM_HAVE_IRQCHIP
 +#define __KVM_HAVE_IRQ_LINE
 struct kvm_regs {
__u64 pc;
 diff --git a/arch/powerpc/kvm/powerpc.c  
b/arch/powerpc/kvm/powerpc.c

 index c431fea..874c106 100644
 --- a/arch/powerpc/kvm/powerpc.c
 +++ b/arch/powerpc/kvm/powerpc.c
 @@ -33,6 +33,7 @@
 #include asm/cputhreads.h
 #include asm/irqflags.h
 #include timing.h
 +#include irq.h
 #include ../mm/mmu_decl.h
 #define CREATE_TRACE_POINTS
 @@ -945,6 +946,18 @@ static int kvm_vm_ioctl_get_pvinfo(struct  
kvm_ppc_pvinfo *pvinfo)

return 0;
 }
 +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level  
*irq_event,

 +bool line_status)
 +{
 +  if (!irqchip_in_kernel(kvm))
 +  return -ENXIO;
 +
 +	irq_event-status = kvm_set_irq(kvm,  
KVM_USERSPACE_IRQ_SOURCE_ID,
 +	irq_event-irq,  
irq_event-level,

 +  line_status);
 +  return 0;
 +}

 As Paul noted in the XICS patchset, this could reference an MPIC  
that has gone away if the user never attached any vcpus and then  
closed the MPIC fd.  It's not a reasonable use case, but it could be  
used malicously to get the kernel to access a bad pointer.  The  
irqchip_in_kernel check helps somewhat, but it's meant for ensuring  
that the creation has happened -- it's racy if used for ensuring that  
destruction hasn't happened.


 The problem is rooted in the awkwardness of performing an  
operation that logically should be on the MPIC fd, but is instead  
being done on the vm fd.


 I think these three steps would fix it (the first two seem like  
things we should be doing anyway):
 - During MPIC destruction, make sure MPIC deregisters all routes  
that reference it.
 - In kvm_set_irq(), do not release the RCU read lock until after  
the set() function has been called.
 - Do not hook up kvm_send_userspace_msi() to MPIC or other new  
irqchips, as that bypasses the RCU lock.  It could be supported as a  
device fd ioctl if desired, or it could be reworked to operate on an  
RCU-managed list of MSI handlers, though MPIC really doesn't need  
this at all.


 Can't we just add an RCU lock in the send_userspace_msi case? I  
don't think we should handle MSIs any differently from normal IRQs.


Well, you can't *just* add the RCU lock -- you need to add data to be  
managed via RCU (e.g. a list of MSI callbacks, or at least a boolean  
indicating whether calling the MSI code is OK).


In fact I'm having a hard time verifying that we're always accessing  
things with proper locks held. I'm pretty sure we're missing a few  
cases.


Any path in particular?

So how about we delay mpic destruction to vm destruction? We simply  
add one user too many when we spawn the mpic and put it on  
vm_destruct. That way users _can_ destroy mpics, but they will only  
be really free'd once the vm is also gone.


That's what we originally had before the fd conversion.  If we want it  
again, we'll need to go back to maintaining a list of devices in KVM  
(though it could be a linked list now that we don't need to use it for  
lookups), or have some hardcoded MPIC hack.


IIRC I said back then that converting to fd would make destruction  
ordering more of a pain...


-Scott
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 16/17] KVM: PPC: MPIC: Add support for KVM_IRQ_LINE

2013-04-25 Thread Alexander Graf

On 25.04.2013, at 21:03, Scott Wood wrote:

 On 04/25/2013 09:49:23 AM, Alexander Graf wrote:
 On 25.04.2013, at 13:30, Alexander Graf wrote:
 
  On 19.04.2013, at 20:51, Scott Wood wrote:
 
  On 04/19/2013 09:06:27 AM, Alexander Graf wrote:
  Now that all pieces are in place for reusing generic irq infrastructure,
  we can copy x86's implementation of KVM_IRQ_LINE irq injection and simply
  reuse it for PPC, as it will work there just as well.
  Signed-off-by: Alexander Graf ag...@suse.de
  ---
  arch/powerpc/include/uapi/asm/kvm.h |1 +
  arch/powerpc/kvm/powerpc.c  |   13 +
  2 files changed, 14 insertions(+), 0 deletions(-)
  diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
  b/arch/powerpc/include/uapi/asm/kvm.h
  index 3537bf3..dbb2ac2 100644
  --- a/arch/powerpc/include/uapi/asm/kvm.h
  +++ b/arch/powerpc/include/uapi/asm/kvm.h
  @@ -26,6 +26,7 @@
  #define __KVM_HAVE_SPAPR_TCE
  #define __KVM_HAVE_PPC_SMT
  #define __KVM_HAVE_IRQCHIP
  +#define __KVM_HAVE_IRQ_LINE
  struct kvm_regs {
   __u64 pc;
  diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
  index c431fea..874c106 100644
  --- a/arch/powerpc/kvm/powerpc.c
  +++ b/arch/powerpc/kvm/powerpc.c
  @@ -33,6 +33,7 @@
  #include asm/cputhreads.h
  #include asm/irqflags.h
  #include timing.h
  +#include irq.h
  #include ../mm/mmu_decl.h
  #define CREATE_TRACE_POINTS
  @@ -945,6 +946,18 @@ static int kvm_vm_ioctl_get_pvinfo(struct 
  kvm_ppc_pvinfo *pvinfo)
   return 0;
  }
  +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level 
  *irq_event,
  +  bool line_status)
  +{
  +if (!irqchip_in_kernel(kvm))
  +return -ENXIO;
  +
  +irq_event-status = kvm_set_irq(kvm, 
  KVM_USERSPACE_IRQ_SOURCE_ID,
  +irq_event-irq, 
  irq_event-level,
  +line_status);
  +return 0;
  +}
 
  As Paul noted in the XICS patchset, this could reference an MPIC that has 
  gone away if the user never attached any vcpus and then closed the MPIC 
  fd.  It's not a reasonable use case, but it could be used malicously to 
  get the kernel to access a bad pointer.  The irqchip_in_kernel check 
  helps somewhat, but it's meant for ensuring that the creation has 
  happened -- it's racy if used for ensuring that destruction hasn't 
  happened.
 
  The problem is rooted in the awkwardness of performing an operation that 
  logically should be on the MPIC fd, but is instead being done on the vm 
  fd.
 
  I think these three steps would fix it (the first two seem like things we 
  should be doing anyway):
  - During MPIC destruction, make sure MPIC deregisters all routes that 
  reference it.
  - In kvm_set_irq(), do not release the RCU read lock until after the 
  set() function has been called.
  - Do not hook up kvm_send_userspace_msi() to MPIC or other new irqchips, 
  as that bypasses the RCU lock.  It could be supported as a device fd 
  ioctl if desired, or it could be reworked to operate on an RCU-managed 
  list of MSI handlers, though MPIC really doesn't need this at all.
 
  Can't we just add an RCU lock in the send_userspace_msi case? I don't 
  think we should handle MSIs any differently from normal IRQs.
 
 Well, you can't *just* add the RCU lock -- you need to add data to be managed 
 via RCU (e.g. a list of MSI callbacks, or at least a boolean indicating 
 whether calling the MSI code is OK).

Well, we'd just access a random pin routing :).

 
 In fact I'm having a hard time verifying that we're always accessing things 
 with proper locks held. I'm pretty sure we're missing a few cases.
 
 Any path in particular?

I'm already getting confused on whether normal MMIO accesses are always safe.

 
 So how about we delay mpic destruction to vm destruction? We simply add one 
 user too many when we spawn the mpic and put it on vm_destruct. That way 
 users _can_ destroy mpics, but they will only be really free'd once the vm 
 is also gone.
 
 That's what we originally had before the fd conversion.  If we want it again, 
 we'll need to go back to maintaining a list of devices in KVM (though it 
 could be a linked list now that we don't need to use it for lookups), or have 
 some hardcoded MPIC hack.

Well, we could have an anonymous linked list of device pointers with a simple 
registration function. That way it's generic enough for any device to be kept 
alive until vm destruction if it wants that.

 IIRC I said back then that converting to fd would make destruction ordering 
 more of a pain...

I usually like to pick the raisins from everything I can. So while I like the 
fd approach for its universally understandable scheme, simplicity of use, 
extensibility of ioctls etc, I don't really like the headaches that come with 
destroying a device while a VM is running. So having a device keep itself alive 
until the VM is gone is the best of all worlds :).


Alex

--
To unsubscribe 

[PATCH] kvm, svm: Fix typo in printk message

2013-04-25 Thread Borislav Petkov
From: Borislav Petkov b...@suse.de

It is exit_int_info. It is actually EXITINTINFO in the official docs
but we don't like screaming docs.

Signed-off-by: Borislav Petkov b...@suse.de
---
 arch/x86/kvm/svm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a3bba7786ecc..272d29844cc5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3491,7 +3491,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR 
exit_code != SVM_EXIT_NPF  exit_code != SVM_EXIT_TASK_SWITCH 
exit_code != SVM_EXIT_INTR  exit_code != SVM_EXIT_NMI)
-   printk(KERN_ERR %s: unexpected exit_ini_info 0x%x 
+   printk(KERN_ERR %s: unexpected exit_int_info 0x%x 
   exit_code 0x%x\n,
   __func__, svm-vmcb-control.exit_int_info,
   exit_code);
-- 
1.8.2.135.g7b592fa

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] vfio: Use down_reads to protect iommu disconnects

2013-04-25 Thread Alex Williamson
If a group or device is released or a container is unset from a group
it can race against file ops on the container.  Protect these with
down_reads to allow concurrent users.

Signed-off-by: Alex Williamson alex.william...@redhat.com
Reported-by: Michael S. Tsirkin m...@redhat.com
---
 drivers/vfio/vfio.c |   62 ++-
 1 file changed, 46 insertions(+), 16 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 073788e..ac7423b 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -704,9 +704,13 @@ EXPORT_SYMBOL_GPL(vfio_del_group_dev);
 static long vfio_ioctl_check_extension(struct vfio_container *container,
   unsigned long arg)
 {
-   struct vfio_iommu_driver *driver = container-iommu_driver;
+   struct vfio_iommu_driver *driver;
long ret = 0;
 
+   down_read(container-group_lock);
+
+   driver = container-iommu_driver;
+
switch (arg) {
/* No base extensions yet */
default:
@@ -736,6 +740,8 @@ static long vfio_ioctl_check_extension(struct 
vfio_container *container,
 VFIO_CHECK_EXTENSION, arg);
}
 
+   up_read(container-group_lock);
+
return ret;
 }
 
@@ -844,9 +850,6 @@ static long vfio_fops_unl_ioctl(struct file *filep,
if (!container)
return ret;
 
-   driver = container-iommu_driver;
-   data = container-iommu_data;
-
switch (cmd) {
case VFIO_GET_API_VERSION:
ret = VFIO_API_VERSION;
@@ -858,8 +861,15 @@ static long vfio_fops_unl_ioctl(struct file *filep,
ret = vfio_ioctl_set_iommu(container, arg);
break;
default:
+   down_read(container-group_lock);
+
+   driver = container-iommu_driver;
+   data = container-iommu_data;
+
if (driver) /* passthrough all unrecognized ioctls */
ret = driver-ops-ioctl(data, cmd, arg);
+
+   up_read(container-group_lock);
}
 
return ret;
@@ -910,35 +920,55 @@ static ssize_t vfio_fops_read(struct file *filep, char 
__user *buf,
  size_t count, loff_t *ppos)
 {
struct vfio_container *container = filep-private_data;
-   struct vfio_iommu_driver *driver = container-iommu_driver;
+   struct vfio_iommu_driver *driver;
+   ssize_t ret = -EINVAL;
 
-   if (unlikely(!driver || !driver-ops-read))
-   return -EINVAL;
+   down_read(container-group_lock);
+
+   driver = container-iommu_driver;
+   if (likely(driver  driver-ops-read))
+   ret = driver-ops-read(container-iommu_data,
+   buf, count, ppos);
 
-   return driver-ops-read(container-iommu_data, buf, count, ppos);
+   up_read(container-group_lock);
+
+   return ret;
 }
 
 static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
   size_t count, loff_t *ppos)
 {
struct vfio_container *container = filep-private_data;
-   struct vfio_iommu_driver *driver = container-iommu_driver;
+   struct vfio_iommu_driver *driver;
+   ssize_t ret = -EINVAL;
 
-   if (unlikely(!driver || !driver-ops-write))
-   return -EINVAL;
+   down_read(container-group_lock);
+
+   driver = container-iommu_driver;
+   if (likely(driver  driver-ops-write))
+   ret = driver-ops-write(container-iommu_data,
+buf, count, ppos);
+
+   up_read(container-group_lock);
 
-   return driver-ops-write(container-iommu_data, buf, count, ppos);
+   return ret;
 }
 
 static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
 {
struct vfio_container *container = filep-private_data;
-   struct vfio_iommu_driver *driver = container-iommu_driver;
+   struct vfio_iommu_driver *driver;
+   int ret = -EINVAL;
 
-   if (unlikely(!driver || !driver-ops-mmap))
-   return -EINVAL;
+   down_read(container-group_lock);
 
-   return driver-ops-mmap(container-iommu_data, vma);
+   driver = container-iommu_driver;
+   if (likely(driver  driver-ops-mmap))
+   ret = driver-ops-mmap(container-iommu_data, vma);
+
+   up_read(container-group_lock);
+
+   return ret;
 }
 
 static const struct file_operations vfio_fops = {

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] Protect against iommu driver disconnect

2013-04-25 Thread Alex Williamson
Michael Tsirkin pointed out that file operations on /dev/vfio/vfio
dereference iommu_driver and iommu_data without a lock.  If releasing
a group or unsetting the container occurs concurrently, we could race.
We currently use a mutex when setting this association, so we can
convert to a rwsem keeping the existing mutex critical sections as
down_writes and add down_reads where these are used.  Thanks,

Alex

---

Alex Williamson (2):
  vfio: Convert container-group_lock to rwsem
  vfio: Use down_reads to protect iommu disconnects


 drivers/vfio/vfio.c |   83 +++
 1 file changed, 57 insertions(+), 26 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] vfio: Convert container-group_lock to rwsem

2013-04-25 Thread Alex Williamson
All current users are writers, maintaining current mutual exclusion.
This lets us add read users next.

Signed-off-by: Alex Williamson alex.william...@redhat.com
---
 drivers/vfio/vfio.c |   21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 21eddd9..073788e 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -24,6 +24,7 @@
 #include linux/list.h
 #include linux/module.h
 #include linux/mutex.h
+#include linux/rwsem.h
 #include linux/sched.h
 #include linux/slab.h
 #include linux/string.h
@@ -57,7 +58,7 @@ struct vfio_iommu_driver {
 struct vfio_container {
struct kref kref;
struct list_headgroup_list;
-   struct mutexgroup_lock;
+   struct rw_semaphore group_lock;
struct vfio_iommu_driver*iommu_driver;
void*iommu_data;
 };
@@ -738,7 +739,7 @@ static long vfio_ioctl_check_extension(struct 
vfio_container *container,
return ret;
 }
 
-/* hold container-group_lock */
+/* hold write lock on container-group_lock */
 static int __vfio_container_attach_groups(struct vfio_container *container,
  struct vfio_iommu_driver *driver,
  void *data)
@@ -769,7 +770,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container 
*container,
struct vfio_iommu_driver *driver;
long ret = -ENODEV;
 
-   mutex_lock(container-group_lock);
+   down_write(container-group_lock);
 
/*
 * The container is designed to be an unprivileged interface while
@@ -780,7 +781,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container 
*container,
 * the container is deprivileged and returns to an unset state.
 */
if (list_empty(container-group_list) || container-iommu_driver) {
-   mutex_unlock(container-group_lock);
+   up_write(container-group_lock);
return -EINVAL;
}
 
@@ -827,7 +828,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container 
*container,
 
mutex_unlock(vfio.iommu_drivers_lock);
 skip_drivers_unlock:
-   mutex_unlock(container-group_lock);
+   up_write(container-group_lock);
 
return ret;
 }
@@ -882,7 +883,7 @@ static int vfio_fops_open(struct inode *inode, struct file 
*filep)
return -ENOMEM;
 
INIT_LIST_HEAD(container-group_list);
-   mutex_init(container-group_lock);
+   init_rwsem(container-group_lock);
kref_init(container-kref);
 
filep-private_data = container;
@@ -961,7 +962,7 @@ static void __vfio_group_unset_container(struct vfio_group 
*group)
struct vfio_container *container = group-container;
struct vfio_iommu_driver *driver;
 
-   mutex_lock(container-group_lock);
+   down_write(container-group_lock);
 
driver = container-iommu_driver;
if (driver)
@@ -979,7 +980,7 @@ static void __vfio_group_unset_container(struct vfio_group 
*group)
container-iommu_data = NULL;
}
 
-   mutex_unlock(container-group_lock);
+   up_write(container-group_lock);
 
vfio_container_put(container);
 }
@@ -1039,7 +1040,7 @@ static int vfio_group_set_container(struct vfio_group 
*group, int container_fd)
container = f.file-private_data;
WARN_ON(!container); /* fget ensures we don't race vfio_release */
 
-   mutex_lock(container-group_lock);
+   down_write(container-group_lock);
 
driver = container-iommu_driver;
if (driver) {
@@ -1057,7 +1058,7 @@ static int vfio_group_set_container(struct vfio_group 
*group, int container_fd)
atomic_inc(group-container_users);
 
 unlock_out:
-   mutex_unlock(container-group_lock);
+   up_write(container-group_lock);
fdput(f);
return ret;
 }

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH -v2] x86: Add a Kconfig shortcut for kvm guest kernel

2013-04-25 Thread Borislav Petkov
From: Borislav Petkov b...@suse.de
Date: Tue, 16 Apr 2013 18:24:34 +0200
Subject: [PATCH -v2] x86: Add a Kconfig shortcut for kvm guest kernel

This is pretty useful for the case where people want to boot the
resulting kernel in qemu/kvm. Instead of going and searching for each
required option through the Kconfig maze, this single option should
simply enable everything required/good to have to boot the resulting
kernel in the guest.

Cc: Fengguang Wu fengguang...@intel.com
Originally-by: Pekka Enberg penb...@kernel.org
Originally-by: Sasha Levin levinsasha...@gmail.com
Signed-off-by: Borislav Petkov b...@suse.de
---


Here's v2 which should be addressing all review comments so far.


 arch/x86/Kconfig | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5651374d179f..76a95ffa959a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -680,6 +680,44 @@ config KVM_GUEST
  underlying device model, the host provides the guest with
  timing infrastructure such as time of day, and system time
 
+config KVM_GUEST_COMMODITY_OPTIONS
+   bool Enable commodity options for a standalone KVM guest
+   depends on KVM_GUEST
+   select NET
+   select NETDEVICES
+   select BLOCK
+   select BLK_DEV
+   select NETWORK_FILESYSTEMS
+   select INET
+   select EXPERIMENTAL
+   select TTY
+   select SERIAL_8250
+   select SERIAL_8250_CONSOLE
+   select IP_PNP
+   select IP_PNP_DHCP
+   select BINFMT_ELF
+   select PCI_MSI
+   select HAVE_ARCH_KGDB
+   select DEBUG_KERNEL
+   select KGDB
+   select KGDB_SERIAL_CONSOLE
+   select VIRTUALIZATION
+   select VIRTIO
+   select VIRTIO_RING
+   select VIRTIO_PCI
+   select VIRTIO_BLK
+   select VIRTIO_CONSOLE
+   select VIRTIO_NET
+   select 9P_FS
+   select NET_9P
+   select NET_9P_VIRTIO
+   ---help---
+ Select guest kernel functionality which facilitates booting the
+ kernel as a guest in qemu/kvm. This entails basic stuff like
+ serial support, kgdb, virtio and other so that you can be able to
+ have commodity functionality like serial output from the guest,
+ networking, etc.
+
 source arch/x86/lguest/Kconfig
 
 config PARAVIRT_TIME_ACCOUNTING
-- 
1.8.2.135.g7b592fa

-- 
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] kvm/ppc/mpic: Eliminate mmio_mapped

2013-04-25 Thread Scott Wood
We no longer need to keep track of this now that MPIC destruction
always happens either during VM destruction (after MMIO has been
destroyed) or during a failed creation (before the fd has been exposed
to userspace, and thus before the MMIO region could have been
registered).

Signed-off-by: Scott Wood scottw...@freescale.com
---
 arch/powerpc/kvm/mpic.c |   29 +
 1 file changed, 1 insertion(+), 28 deletions(-)

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 4ac98d1..84e828e 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -190,7 +190,6 @@ struct openpic {
struct kvm_io_device mmio;
struct list_head mmio_regions;
atomic_t users;
-   bool mmio_mapped;
 
gpa_t reg_base;
spinlock_t lock;
@@ -1427,24 +1426,13 @@ static int kvm_mpic_write(struct kvm_io_device *this, 
gpa_t addr,
return ret;
 }
 
-static void kvm_mpic_dtor(struct kvm_io_device *this)
-{
-   struct openpic *opp = container_of(this, struct openpic, mmio);
-
-   opp-mmio_mapped = false;
-}
-
 static const struct kvm_io_device_ops mpic_mmio_ops = {
.read = kvm_mpic_read,
.write = kvm_mpic_write,
-   .destructor = kvm_mpic_dtor,
 };
 
 static void map_mmio(struct openpic *opp)
 {
-   BUG_ON(opp-mmio_mapped);
-   opp-mmio_mapped = true;
-
kvm_iodevice_init(opp-mmio, mpic_mmio_ops);
 
kvm_io_bus_register_dev(opp-kvm, KVM_MMIO_BUS,
@@ -1454,10 +1442,7 @@ static void map_mmio(struct openpic *opp)
 
 static void unmap_mmio(struct openpic *opp)
 {
-   if (opp-mmio_mapped) {
-   opp-mmio_mapped = false;
-   kvm_io_bus_unregister_dev(opp-kvm, KVM_MMIO_BUS, opp-mmio);
-   }
+   kvm_io_bus_unregister_dev(opp-kvm, KVM_MMIO_BUS, opp-mmio);
 }
 
 static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
@@ -1636,18 +1621,6 @@ static void mpic_destroy(struct kvm_device *dev)
 {
struct openpic *opp = dev-private;
 
-   if (opp-mmio_mapped) {
-   /*
-* Normally we get unmapped by kvm_io_bus_destroy(),
-* which happens before the VCPUs release their references.
-*
-* Thus, we should only get here if no VCPUs took a reference
-* to us in the first place.
-*/
-   WARN_ON(opp-nb_cpus != 0);
-   unmap_mmio(opp);
-   }
-
dev-kvm-arch.mpic = NULL;
kfree(opp);
 }
-- 
1.7.10.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] kvm: destroy emulated devices on VM exit

2013-04-25 Thread Scott Wood
The hassle of getting refcounting right was greater than the hassle
of keeping a list of devices to destroy on VM exit.

Signed-off-by: Scott Wood scottw...@freescale.com
---
 arch/powerpc/kvm/mpic.c  |2 --
 include/linux/kvm_host.h |3 ++-
 virt/kvm/kvm_main.c  |   29 -
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index d137df8..4ac98d1 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1788,7 +1788,6 @@ int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, 
struct kvm_vcpu *vcpu,
if (opp-mpic_mode_mask == GCR_MODE_PROXY)
vcpu-arch.epr_flags |= KVMPPC_EPR_KERNEL;
 
-   kvm_device_get(dev);
 out:
spin_unlock_irq(opp-lock);
return ret;
@@ -1804,7 +1803,6 @@ void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, 
struct kvm_vcpu *vcpu)
BUG_ON(!opp-dst[vcpu-arch.irq_cpu_id].vcpu);
 
opp-dst[vcpu-arch.irq_cpu_id].vcpu = NULL;
-   kvm_device_put(opp-dev);
 }
 
 /*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index feffbda..36c9776 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -393,6 +393,7 @@ struct kvm {
long mmu_notifier_count;
 #endif
long tlbs_dirty;
+   struct list_head devices;
 };
 
 #define kvm_err(fmt, ...) \
@@ -1069,8 +1070,8 @@ struct kvm_device_ops;
 struct kvm_device {
struct kvm_device_ops *ops;
struct kvm *kvm;
-   atomic_t users;
void *private;
+   struct list_head vm_node;
 };
 
 /* create, destroy, and name are mandatory */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f6cd14d..5da9f02 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -504,6 +504,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
mutex_init(kvm-irq_lock);
mutex_init(kvm-slots_lock);
atomic_set(kvm-users_count, 1);
+   INIT_LIST_HEAD(kvm-devices);
 
r = kvm_init_mmu_notifier(kvm);
if (r)
@@ -581,6 +582,19 @@ void kvm_free_physmem(struct kvm *kvm)
kfree(kvm-memslots);
 }
 
+static void kvm_destroy_devices(struct kvm *kvm)
+{
+   struct list_head *node, *tmp;
+
+   list_for_each_safe(node, tmp, kvm-devices) {
+   struct kvm_device *dev =
+   list_entry(node, struct kvm_device, vm_node);
+
+   list_del(node);
+   dev-ops-destroy(dev);
+   }
+}
+
 static void kvm_destroy_vm(struct kvm *kvm)
 {
int i;
@@ -600,6 +614,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_flush_shadow_all(kvm);
 #endif
kvm_arch_destroy_vm(kvm);
+   kvm_destroy_devices(kvm);
kvm_free_physmem(kvm);
cleanup_srcu_struct(kvm-srcu);
kvm_arch_free_vm(kvm);
@@ -2195,23 +2210,11 @@ static long kvm_device_ioctl(struct file *filp, 
unsigned int ioctl,
}
 }
 
-void kvm_device_get(struct kvm_device *dev)
-{
-   atomic_inc(dev-users);
-}
-
-void kvm_device_put(struct kvm_device *dev)
-{
-   if (atomic_dec_and_test(dev-users))
-   dev-ops-destroy(dev);
-}
-
 static int kvm_device_release(struct inode *inode, struct file *filp)
 {
struct kvm_device *dev = filp-private_data;
struct kvm *kvm = dev-kvm;
 
-   kvm_device_put(dev);
kvm_put_kvm(kvm);
return 0;
 }
@@ -2257,7 +2260,6 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 
dev-ops = ops;
dev-kvm = kvm;
-   atomic_set(dev-users, 1);
 
ret = ops-create(dev, cd-type);
if (ret  0) {
@@ -2271,6 +2273,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
return ret;
}
 
+   list_add(dev-vm_node, kvm-devices);
kvm_get_kvm(kvm);
cd-fd = ret;
return 0;
-- 
1.7.10.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] kvm:book3e: Fix a build error

2013-04-25 Thread tiejun.chen

On 04/25/2013 08:11 PM, Caraman Mihai Claudiu-B02008 wrote:

-Original Message-
From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-
ow...@vger.kernel.org] On Behalf Of Tiejun Chen
Sent: Thursday, April 25, 2013 2:46 PM
To: ga...@kernel.crashing.org
Cc: linuxppc-...@lists.ozlabs.org; kvm-...@vger.kernel.org;
kvm@vger.kernel.org
Subject: [PATCH 1/1] kvm:book3e: Fix a build error

Commit cd66cc2e, powerpc/85xx: Add AltiVec support for e6500, adds
support for AltiVec on a Book-E class processor, but while compiling
in the CONFIG_PPC_BOOK3E_64 and CONFIG_VIRTUALIZATION case, this
introduce the following error:

arch/powerpc/kernel/exceptions-64e.S:402: undefined reference to
`kvmppc_handler_42_0x01B'
arch/powerpc/kernel/built-in.o: In function `exc_altivec_assist_book3e':
arch/powerpc/kernel/exceptions-64e.S:424: undefined reference to
`kvmppc_handler_43_0x01B'
make: *** [vmlinux] Error 1

Looks we should add these altivec kvm handlers.

Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
---
  arch/powerpc/kvm/bookehv_interrupts.S |5 +
  1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/kvm/bookehv_interrupts.S
b/arch/powerpc/kvm/bookehv_interrupts.S
index e8ed7d6..fa9c78a 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -319,6 +319,11 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
SPRN_DSRR0, SPRN_DSRR1, 0
  kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
SPRN_CSRR0, SPRN_CSRR1, 0
+/* altivec */
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
+   SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
+   SPRN_SRR0, SPRN_SRR1, 0
  #else
  /*
   * For input register values, see
arch/powerpc/include/asm/kvm_booke_hv_asm.h
--


It seems that you are not using kvm-ppc-queue branch.


This is just used to fix a build error in powerpc.git when introduce commit 
cd66cc2e, powerpc/85xx: Add AltiVec support for e6500, in *powerpc.git* as I 
mentioned in this patch head.




I already have a patch ready for this (and AltiVec support is work


This change don't block your AltiVec support for kvm unless you think this 
change is wrong. And especially, we always can reproduce this error with/without 
enabling AltiVec, so I also don't think this should be suspended until support 
e6500 in kvm since kvm based on e5500 should work.


Tiejun


in progress) but we need first to pull e6500 kernel patches from
Linux tree into agraf.git.

-Mike










--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig description

2013-04-25 Thread tiejun.chen

On 04/25/2013 07:32 PM, Caraman Mihai Claudiu-B02008 wrote:

Is the flowing is fine with that generic machine, ppce500, to boot
P5040DS with
64bit,

./qemu-system-ppc64 -enable-kvm -m 1048 -nographic -M ppce500 -kernel
uImage
-initrd ramdisk.gz  -L . -append root=/dev/ram rw console=ttyS0,115200
-cpu
e5500 -dtb p5040ds.dtb

Thanks,

Tiejun


There is no need for -dtb.


With your comment, I use kvm-ppc-queue which top commit is be28a27c, kvm/ppc: 
don't call complete_mmio_load when it's a store, in plus that patch you 
pointed	to build one uImage based on corenet64_smp_defconfig, but we need to 
enable CONFIG_PPC_QEMU_E500 manually, and select CONFIG_TICK_CPU_ACCOUNTING 
since the default CONFIG_VIRT_CPU_ACCOUNTING_NATIVE would introduce some trace 
when boot VM.


And perform as follows:

./qemu-system-ppc64 -enable-kvm -m 1048 -nographic -M ppce500 -kernel uImage 
-initrd ramdisk.gz  -L . -append root=/dev/ram rw console=ttyS0,115200 -cpu e5500


But I can't see anything in the serial port.

Tiejun
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v10 7/7] KVM: VMX: Use posted interrupt to deliver virtual interrupt

2013-04-25 Thread Yangminqiang
Hi Yang Zhang,

Could you please let me know your CPU model or the CPU models which supports 
apic-v which your patch requires()? So that I could try you patches. 

  Intel Software Developer's Manualm, Volume 3C, 
  System Programming Guide, Part 3. Ch29, 
  APIC VIRTUALIZATION AND VIRTUAL INTERRUPTS

Or how can I know whether my hardware support those features listed in the
manual above?

Thanks,
Steven

 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
 Behalf Of Yang Zhang
 Sent: Thursday, April 11, 2013 7:25 PM
 To: kvm@vger.kernel.org
 Cc: g...@redhat.com; mtosa...@redhat.com; xiantao.zh...@intel.com;
 jun.nakaj...@intel.com; Yang Zhang
 Subject: [PATCH v10 7/7] KVM: VMX: Use posted interrupt to deliver virtual
 interrupt
 
 From: Yang Zhang yang.z.zh...@intel.com
 
 If posted interrupt is avaliable, then uses it to inject virtual
 interrupt to guest.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  arch/x86/kvm/lapic.c |   30 +++---
  arch/x86/kvm/vmx.c   |2 +-
  arch/x86/kvm/x86.c   |1 +
  3 files changed, 21 insertions(+), 12 deletions(-)
 
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index dbf74c9..e29883c 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -353,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic
 *apic)
   if (!apic-irr_pending)
   return -1;
 
 + kvm_x86_ops-sync_pir_to_irr(apic-vcpu);
   result = apic_search_irr(apic);
   ASSERT(result == -1 || result = 16);
 
 @@ -683,18 +684,25 @@ static int __apic_accept_irq(struct kvm_lapic *apic,
 int delivery_mode,
   if (dest_map)
   __set_bit(vcpu-vcpu_id, dest_map);
 
 - result = !apic_test_and_set_irr(vector, apic);
 - trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode,
 -   trig_mode, vector, !result);
 - if (!result) {
 - if (trig_mode)
 - apic_debug(level trig mode repeatedly for 
 - vector %d, vector);
 - break;
 - }
 + if (kvm_x86_ops-deliver_posted_interrupt) {
 + result = 1;
 + kvm_x86_ops-deliver_posted_interrupt(vcpu, vector);
 + } else {
 + result = !apic_test_and_set_irr(vector, apic);
 
 - kvm_make_request(KVM_REQ_EVENT, vcpu);
 - kvm_vcpu_kick(vcpu);
 + if (!result) {
 + if (trig_mode)
 + apic_debug(level trig mode repeatedly 
 + for vector %d, vector);
 + goto out;
 + }
 +
 + kvm_make_request(KVM_REQ_EVENT, vcpu);
 + kvm_vcpu_kick(vcpu);
 + }
 +out:
 + trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode,
 + trig_mode, vector, !result);
   break;
 
   case APIC_DM_REMRD:
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 314b2ed..52b21da 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -84,7 +84,7 @@ module_param(vmm_exclusive, bool, S_IRUGO);
  static bool __read_mostly fasteoi = 1;
  module_param(fasteoi, bool, S_IRUGO);
 
 -static bool __read_mostly enable_apicv;
 +static bool __read_mostly enable_apicv = 1;
  module_param(enable_apicv, bool, S_IRUGO);
 
  /*
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 6147d24..628582f 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -2685,6 +2685,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
   struct kvm_lapic_state *s)
  {
 + kvm_x86_ops-sync_pir_to_irr(vcpu);
   memcpy(s-regs, vcpu-arch.apic-regs, sizeof *s);
 
   return 0;
 --
 1.7.1
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v10 7/7] KVM: VMX: Use posted interrupt to deliver virtual interrupt

2013-04-25 Thread Zhang, Yang Z
Yangminqiang wrote on 2013-04-26:
 Hi Yang Zhang,
 
 Could you please let me know your CPU model or the CPU models which
 supports apic-v which your patch requires()? So that I could try you
 patches.
 
   Intel Software Developer's Manualm, Volume 3C,
   System Programming Guide, Part 3. Ch29,
   APIC VIRTUALIZATION AND VIRTUAL INTERRUPTS
 Or how can I know whether my hardware support those features listed in the
 manual above?
Ivytown or newer platform supported it. 

 Thanks,
 Steven
 
 kvm-ow...@vger.kernel.org wrote on 2013-04-11:
 Subject: [PATCH v10 7/7] KVM: VMX: Use posted interrupt to deliver virtual
 interrupt
 
 From: Yang Zhang yang.z.zh...@intel.com
 
 If posted interrupt is avaliable, then uses it to inject virtual
 interrupt to guest.
 
 Signed-off-by: Yang Zhang yang.z.zh...@intel.com
 ---
  arch/x86/kvm/lapic.c |   30 +++---
  arch/x86/kvm/vmx.c   |2 +-
  arch/x86/kvm/x86.c   |1 +
  3 files changed, 21 insertions(+), 12 deletions(-)
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index dbf74c9..e29883c 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -353,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic
 *apic)
  if (!apic-irr_pending)
  return -1;
 +kvm_x86_ops-sync_pir_to_irr(apic-vcpu);
  result = apic_search_irr(apic);
  ASSERT(result == -1 || result = 16);
 @@ -683,18 +684,25 @@ static int __apic_accept_irq(struct kvm_lapic *apic,
 int delivery_mode,
  if (dest_map)
  __set_bit(vcpu-vcpu_id, dest_map);
 -result = !apic_test_and_set_irr(vector, apic);
 -trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode,
 -  trig_mode, vector, !result);
 -if (!result) {
 -if (trig_mode)
 -apic_debug(level trig mode repeatedly for 
 -vector %d, vector);
 -break;
 -}
 +if (kvm_x86_ops-deliver_posted_interrupt) {
 +result = 1;
 +kvm_x86_ops-deliver_posted_interrupt(vcpu, vector);
 +} else {
 +result = !apic_test_and_set_irr(vector, apic);
 
 -kvm_make_request(KVM_REQ_EVENT, vcpu);
 -kvm_vcpu_kick(vcpu);
 +if (!result) {
 +if (trig_mode)
 +apic_debug(level trig mode repeatedly 
 +for vector %d, vector);
 +goto out;
 +}
 +
 +kvm_make_request(KVM_REQ_EVENT, vcpu);
 +kvm_vcpu_kick(vcpu);
 +}
 +out:
 +trace_kvm_apic_accept_irq(vcpu-vcpu_id, delivery_mode,
 +trig_mode, vector, !result);
  break;
  
  case APIC_DM_REMRD:
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index 314b2ed..52b21da 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -84,7 +84,7 @@ module_param(vmm_exclusive, bool, S_IRUGO);
  static bool __read_mostly fasteoi = 1;
  module_param(fasteoi, bool, S_IRUGO);
 -static bool __read_mostly enable_apicv;
 +static bool __read_mostly enable_apicv = 1;
  module_param(enable_apicv, bool, S_IRUGO);
  
  /*
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 6147d24..628582f 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -2685,6 +2685,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,  

  struct kvm_lapic_state *s) { +  kvm_x86_ops-sync_pir_to_irr(vcpu);
  memcpy(s-regs, vcpu-arch.apic-regs, sizeof *s);
  
  return 0;
 --
 1.7.1
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html


Best regards,
Yang




Re: [PATCH 0/7] KVM: irqfd generalization prepare patch set

2013-04-25 Thread Gleb Natapov
On Wed, Apr 24, 2013 at 01:20:31PM +0300, Gleb Natapov wrote:
 On Tue, Apr 16, 2013 at 07:26:08PM +0200, Alexander Graf wrote:
  The concept of an irqfd and interrupt routing are nothing particularly tied
  into the IOAPIC implementation. In fact, most of the code already is 
  perfectly
  generic.
  
  This patch set decouples most bits of the existing irqchip and irqfd
  implementation to make it reusable for non-IOAPIC platforms, like the PPC 
  MPIC.
  
  I also have a patch that implements working irqfd support on top of these,
  but that requires the in-kernel MPIC implementation to go upstream first, so
  I'm holding off on it until we settled everything there, so the concept
  certainly does work.
  
  Alex
  
 Nice cleanup, thanks! Should expect a new series with ifdef
 kvm_irqchip and ia64 compilation fixed. The fixes are minor enough for
 me to fix them while applying.
 
Actually the series does not apply any more and has to be rebased on top of the
current queue.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig description

2013-04-25 Thread Caraman Mihai Claudiu-B02008
 -Original Message-
 From: tiejun.chen [mailto:tiejun.c...@windriver.com]
 Sent: Friday, April 19, 2013 1:03 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: kvm-ppc@vger.kernel.org; k...@vger.kernel.org
 Subject: Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig
 description
 
 On 04/11/2013 06:03 PM, Mihai Caraman wrote:
  Add e6500 core to Kconfig description.
 
  Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
  ---
  v3:
- No change
 
arch/powerpc/kvm/Kconfig |6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
 
  diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
  index 63c67ec..4489520 100644
  --- a/arch/powerpc/kvm/Kconfig
  +++ b/arch/powerpc/kvm/Kconfig
  @@ -136,15 +136,15 @@ config KVM_E500V2
If unsure, say N.
 
config KVM_E500MC
  -   bool KVM support for PowerPC E500MC/E5500 processors
  +   bool KVM support for PowerPC E500MC/E5500/E6500 processors
  depends on PPC_E500MC
  select KVM
  select KVM_MMIO
  select KVM_BOOKE_HV
  select MMU_NOTIFIER
  ---help---
  - Support running unmodified E500MC/E5500 (32-bit) guest kernels in
 
 I ever tried p5040ds but failed with 64-bit, but looks are you saying
 this patch
 set can make e5500/e6500 work well with 64-bit? If so, will we need to
 upgrade
 qemu or something else like dtb?

KVM should work on p5040ds with and without this patchset. The latest 
qemu requires this patch: powerpc: Add paravirt idle loop for 64-bit Book-E,
you will not pass guest udev without it.
Please details what fails on p5040ds.

-Mike


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig description

2013-04-25 Thread tiejun.chen

On 04/25/2013 05:09 PM, Caraman Mihai Claudiu-B02008 wrote:

-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Friday, April 19, 2013 1:03 PM
To: Caraman Mihai Claudiu-B02008
Cc: kvm-ppc@vger.kernel.org; k...@vger.kernel.org
Subject: Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig
description

On 04/11/2013 06:03 PM, Mihai Caraman wrote:

Add e6500 core to Kconfig description.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
   - No change

   arch/powerpc/kvm/Kconfig |6 +++---
   1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 63c67ec..4489520 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -136,15 +136,15 @@ config KVM_E500V2
  If unsure, say N.

   config KVM_E500MC
-   bool KVM support for PowerPC E500MC/E5500 processors
+   bool KVM support for PowerPC E500MC/E5500/E6500 processors
depends on PPC_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
select MMU_NOTIFIER
---help---
- Support running unmodified E500MC/E5500 (32-bit) guest kernels in


I ever tried p5040ds but failed with 64-bit, but looks are you saying
this patch
set can make e5500/e6500 work well with 64-bit? If so, will we need to
upgrade
qemu or something else like dtb?


KVM should work on p5040ds with and without this patchset. The latest
qemu requires this patch: powerpc: Add paravirt idle loop for 64-bit Book-E,
you will not pass guest udev without it.


Which should qemu tree be used here?

My tree is cloned from:

git://repo.or.cz/qemu/agraf.git ppc-next

But I can't find this commit.

Tiejun

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/7] KVM: irqfd generalization prepare patch set

2013-04-25 Thread Alexander Graf

On 25.04.2013, at 09:28, Gleb Natapov wrote:

 On Wed, Apr 24, 2013 at 01:20:31PM +0300, Gleb Natapov wrote:
 On Tue, Apr 16, 2013 at 07:26:08PM +0200, Alexander Graf wrote:
 The concept of an irqfd and interrupt routing are nothing particularly tied
 into the IOAPIC implementation. In fact, most of the code already is 
 perfectly
 generic.
 
 This patch set decouples most bits of the existing irqchip and irqfd
 implementation to make it reusable for non-IOAPIC platforms, like the PPC 
 MPIC.
 
 I also have a patch that implements working irqfd support on top of these,
 but that requires the in-kernel MPIC implementation to go upstream first, so
 I'm holding off on it until we settled everything there, so the concept
 certainly does work.
 
 Alex
 
 Nice cleanup, thanks! Should expect a new series with ifdef
 kvm_irqchip and ia64 compilation fixed. The fixes are minor enough for
 me to fix them while applying.
 
 Actually the series does not apply any more and has to be rebased on top of 
 the
 current queue.

Heh, we're already at v3:

  http://www.mail-archive.com/kvm-ppc@vger.kernel.org/msg06214.html


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/7] KVM: irqfd generalization prepare patch set

2013-04-25 Thread Alexander Graf

On 21.04.2013, at 12:51, Michael S. Tsirkin wrote:

 On Tue, Apr 16, 2013 at 07:26:08PM +0200, Alexander Graf wrote:
 The concept of an irqfd and interrupt routing are nothing particularly tied
 into the IOAPIC implementation. In fact, most of the code already is 
 perfectly
 generic.
 
 This patch set decouples most bits of the existing irqchip and irqfd
 implementation to make it reusable for non-IOAPIC platforms, like the PPC 
 MPIC.
 
 I also have a patch that implements working irqfd support on top of these,
 but that requires the in-kernel MPIC implementation to go upstream first, so
 I'm holding off on it until we settled everything there, so the concept
 certainly does work.
 
 Alex
 
 Nothing to object to here really, this is just
 moving code around.
 And patches 3 and 4 are definitely cleanups.
 Assuming this helps PPC gain in-kernel irqchip support:
 
 Acked-by: Michael S. Tsirkin m...@redhat.com

Could you please check the newer version of this patch set again and give your 
ack if it still holds?

  http://www.mail-archive.com/kvm-ppc@vger.kernel.org/msg06214.html


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/6] kvm: add device control API

2013-04-25 Thread Gleb Natapov
On Fri, Apr 12, 2013 at 07:08:42PM -0500, Scott Wood wrote:
 Currently, devices that are emulated inside KVM are configured in a
 hardcoded manner based on an assumption that any given architecture
 only has one way to do it.  If there's any need to access device state,
 it is done through inflexible one-purpose-only IOCTLs (e.g.
 KVM_GET/SET_LAPIC).  Defining new IOCTLs for every little thing is
 cumbersome and depletes a limited numberspace.
 
 This API provides a mechanism to instantiate a device of a certain
 type, returning an ID that can be used to set/get attributes of the
 device.  Attributes may include configuration parameters (e.g.
 register base address), device state, operational commands, etc.  It
 is similar to the ONE_REG API, except that it acts on devices rather
 than vcpus.
 
 Both device types and individual attributes can be tested without having
 to create the device or get/set the attribute, without the need for
 separately managing enumerated capabilities.
 
 Signed-off-by: Scott Wood scottw...@freescale.com
 ---
 v4:
  - Move some boilerplate back into generic code, as requested by Gleb.
File descriptor management and reference counting is no longer the
concern of the device implementation.
 
  - Don't hold kvm-lock during create.  The original reasons
for doing so have vanished as for as MPIC is concerned, and
this avoids needing to answer the question of whether to
hold the lock during destroy as well.
 
Paul, you may need to acquire the lock yourself in kvm_create_xics()
to protect the -EEXIST check.
 
 v3: remove some changes that were merged into this patch by accident,
 and fix the error documentation for KVM_CREATE_DEVICE.
 ---
  Documentation/virtual/kvm/api.txt|   70 
  Documentation/virtual/kvm/devices/README |1 +
  include/linux/kvm_host.h |   35 
  include/uapi/linux/kvm.h |   27 +++
  virt/kvm/kvm_main.c  |  129 
 ++
  5 files changed, 262 insertions(+)
  create mode 100644 Documentation/virtual/kvm/devices/README
 
 diff --git a/Documentation/virtual/kvm/api.txt 
 b/Documentation/virtual/kvm/api.txt
 index 976eb65..d52f3f9 100644
 --- a/Documentation/virtual/kvm/api.txt
 +++ b/Documentation/virtual/kvm/api.txt
 @@ -2173,6 +2173,76 @@ header; first `n_valid' valid entries with contents 
 from the data
  written, then `n_invalid' invalid entries, invalidating any previously
  valid entries found.
  
 +4.79 KVM_CREATE_DEVICE
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: vm ioctl
 +Parameters: struct kvm_create_device (in/out)
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENODEV: The device type is unknown or unsupported
 +  EEXIST: Device already created, and this type of device may not
 +  be instantiated multiple times
 +
 +  Other error conditions may be defined by individual device types or
 +  have their standard meanings.
 +
 +Creates an emulated device in the kernel.  The file descriptor returned
 +in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
 +
 +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
 +device type is supported (not necessarily whether it can be created
 +in the current vm).
 +
 +Individual devices should not define flags.  Attributes should be used
 +for specifying any behavior that is not implied by the device type
 +number.
 +
 +struct kvm_create_device {
 + __u32   type;   /* in: KVM_DEV_TYPE_xxx */
 + __u32   fd; /* out: device handle */
 + __u32   flags;  /* in: KVM_CREATE_DEVICE_xxx */
 +};
Should we add __u32 padding here to make struct size multiple of u64?

 +
 +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: device ioctl
 +Parameters: struct kvm_device_attr
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENXIO:  The group or attribute is unknown/unsupported for this device
 +  EPERM:  The attribute cannot (currently) be accessed this way
 +  (e.g. read-only attribute, or attribute that only makes
 +  sense when the device is in a different state)
 +
 +  Other error conditions may be defined by individual device types.
 +
 +Gets/sets a specified piece of device configuration and/or state.  The
 +semantics are device-specific.  See individual device documentation in
 +the devices directory.  As with ONE_REG, the size of the data
 +transferred is defined by the particular attribute.
 +
 +struct kvm_device_attr {
 + __u32   flags;  /* no flags currently defined */
 + __u32   group;  /* device-defined */
 + __u64   attr;   /* group-defined */
 + __u64   addr;   /* userspace address of attr data */
 +};
 +
 +4.81 KVM_HAS_DEVICE_ATTR
 +
 +Capability: KVM_CAP_DEVICE_CTRL
 +Type: device ioctl
 +Parameters: struct kvm_device_attr
 +Returns: 0 on success, -1 on error
 +Errors:
 +  ENXIO:  The group or attribute is unknown/unsupported for this device
 

Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig description

2013-04-25 Thread tiejun.chen

On 04/25/2013 05:32 PM, Caraman Mihai Claudiu-B02008 wrote:

-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Thursday, April 25, 2013 12:17 PM
To: Caraman Mihai Claudiu-B02008
Cc: kvm-ppc@vger.kernel.org; k...@vger.kernel.org
Subject: Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig
description

On 04/25/2013 05:09 PM, Caraman Mihai Claudiu-B02008 wrote:

-Original Message-
From: tiejun.chen [mailto:tiejun.c...@windriver.com]
Sent: Friday, April 19, 2013 1:03 PM
To: Caraman Mihai Claudiu-B02008
Cc: kvm-ppc@vger.kernel.org; k...@vger.kernel.org
Subject: Re: [PATCH 8/8 v3] KVM: PPC: e500: Add e6500 core to Kconfig
description

On 04/11/2013 06:03 PM, Mihai Caraman wrote:

Add e6500 core to Kconfig description.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
- No change

arch/powerpc/kvm/Kconfig |6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 63c67ec..4489520 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -136,15 +136,15 @@ config KVM_E500V2
  If unsure, say N.

config KVM_E500MC
-   bool KVM support for PowerPC E500MC/E5500 processors
+   bool KVM support for PowerPC E500MC/E5500/E6500 processors
depends on PPC_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
select MMU_NOTIFIER
---help---
- Support running unmodified E500MC/E5500 (32-bit) guest kernels in


I ever tried p5040ds but failed with 64-bit, but looks are you saying
this patch
set can make e5500/e6500 work well with 64-bit? If so, will we need to
upgrade
qemu or something else like dtb?


KVM should work on p5040ds with and without this patchset. The latest
qemu requires this patch: powerpc: Add paravirt idle loop for 64-bit

Book-E,

you will not pass guest udev without it.


This is a kernel patch required by latest qemu.


Looks this commit is applied only into galak/powerpc.git, next, but still not 
merged into agraf/linux-2.6.git, so I'm confused which tree can support 64bit 
Book3E KVM as you point.


Tiejun
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC

2013-04-25 Thread Alexander Graf

On 19.04.2013, at 20:02, Scott Wood wrote:

 On 04/19/2013 09:06:26 AM, Alexander Graf wrote:
 diff --git a/Documentation/virtual/kvm/devices/mpic.txt 
 b/Documentation/virtual/kvm/devices/mpic.txt
 index ce98e32..dadc1e0 100644
 --- a/Documentation/virtual/kvm/devices/mpic.txt
 +++ b/Documentation/virtual/kvm/devices/mpic.txt
 @@ -35,3 +35,14 @@ Groups:
 attr is the IRQ number.  IRQ numbers for standard sources are the
 byte offset of the relevant IVPR from EIVPR0, divided by 32.
 +
 +IRQ Routing:
 +
 +  The MPIC emulation supports IRQ routing. Only a single MPIC device can
 +  be instantiated. Once that device has been created, it's available as
 +  irqchip id 0.
 +
 
 +  This irqchip 0 has 256 interrupt pins. These pins reflect the SRC pins
 +  on the MPIC controller.
 
 This irqchip 0 has 256 interrupt pins, which expose the interrupts in the 
 main array of interrupt sources (a.k.a. SRC interrupts).  The numbering is 
 the same as the MPIC device tree binding -- based on the register offset from 
 the beginning of the sources array, without regard to any subdivisions in 
 chip documentation such as internal or external interrupts.  Default 
 routes are established for these pins, with the GSI being equal to the pin 
 number.
 
 +  Access to on-SRC registers is not implemented through IRQ routing 
 mechanisms.
 
 s/on-SRC registers/non-SRC interrupts/
 
 diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
 index 10bc08a..d137df8 100644
 --- a/arch/powerpc/kvm/mpic.c
 +++ b/arch/powerpc/kvm/mpic.c
 @@ -1029,6 +1029,7 @@ static int openpic_cpu_write_internal(void *opaque, 
 gpa_t addr,
  struct irq_source *src;
  struct irq_dest *dst;
  int s_IRQ, n_IRQ;
 +int notify_eoi = -1;
  pr_debug(%s: cpu %d addr %#llx = 0x%08x\n, __func__, idx,
  addr, val);
 @@ -1087,6 +1088,8 @@ static int openpic_cpu_write_internal(void *opaque, 
 gpa_t addr,
  }
  IRQ_resetbit(dst-servicing, s_IRQ);
 +/* Notify listeners that the IRQ is over */
 +notify_eoi = s_IRQ;
  /* Set up next servicing IRQ */
  s_IRQ = IRQ_get_next(opp, dst-servicing);
  /* Check queued interrupts. */
 @@ -1104,6 +1107,12 @@ static int openpic_cpu_write_internal(void *opaque, 
 gpa_t addr,
  break;
  }
 +if (notify_eoi != -1) {
 +spin_unlock_irq(opp-lock);
 +kvm_notify_acked_irq(opp-kvm, 0, notify_eoi);
 +spin_lock_irq(opp-lock);
 +}
 
 I'd rather not have the _irq here, which could break if we enter this patch 
 via an _irqsave (I realize there currently is no such path that reaches EOI 
 emulation).
 
 Will we ever set notify_eoi when addr != EOI?  I'm wondering why it was moved 
 out of the switch statement, instead of being put at the end of the case EOI: 
 code.

I doubt it, but that's for the compiler to optimize away. I found it cleaner 
for some reason to put it down there. I don't think it really matters.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC

2013-04-25 Thread Alexander Graf

On 23.04.2013, at 08:38, Paul Mackerras wrote:

 On Fri, Apr 19, 2013 at 04:06:26PM +0200, Alexander Graf wrote:
 Now that all the irq routing and irqfd pieces are generic, we can expose
 real irqchip support to all of KVM's internal helpers.
 
 This allows us to use irqfd with the in-kernel MPIC.
 
 [snip]
 diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
 index 10bc08a..d137df8 100644
 --- a/arch/powerpc/kvm/mpic.c
 +++ b/arch/powerpc/kvm/mpic.c
 [snip]
 +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 +struct kvm *kvm, int irq_source_id, int level, bool line_status)
 [snip]
 +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 +  struct kvm_kernel_irq_routing_entry *e,
 +  const struct kvm_irq_routing_entry *ue)
 
 How do you see this working once we have more than one interrupt
 controller emulation in the kernel?  Presumably these two will have to
 move out to a common file, rather than being in mpic.c, but then the
 question is how do we know which interrupt controller to send the GSI
 to?  Were you thinking we would have a restriction that you can only
 instantiate one interrupt controller of any type?  Or were you
 thinking we would have an enum for kvm_irq_routing_irqchip::irqchip?
 In that case how would we handle MSIs?

In a first version of having 2 interrupt controllers, I'd make them mutually 
exclusive in Kconfig. That way each interrupt controller implements these 
functions itself.

Later we can sit down and generalize this support. Then we would need to have a 
mapping table which irqchip type each irqchip number is and call the respective 
functions.

But the use for that is so incredibly slim and the user space API would still 
be the same, that I don't think we need to worry about it today.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/17] KVM: Add KVM_IRQCHIP_NUM_PINS in addition to KVM_IOAPIC_NUM_PINS

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:12PM +0200, Alexander Graf wrote:
 The concept of routing interrupt lines to an irqchip is nothing
 that is IOAPIC specific. Every irqchip has a maximum number of pins
 that can be linked to irq lines.
 
 So let's add a new define that allows us to reuse generic code for
 non-IOAPIC platforms.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  arch/x86/include/asm/kvm_host.h |2 ++
  include/linux/kvm_host.h|2 +-
  virt/kvm/irq_comm.c |2 +-
  3 files changed, 4 insertions(+), 2 deletions(-)
 
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index 599f98b..f44c3fe 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -43,6 +43,8 @@
  #define KVM_PIO_PAGE_OFFSET 1
  #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
  
 +#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 +
  #define CR0_RESERVED_BITS   \
   (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index 93a5005..bf3b1dc 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -307,7 +307,7 @@ struct kvm_kernel_irq_routing_entry {
  #ifdef __KVM_HAVE_IOAPIC
  
  struct kvm_irq_routing_table {
 - int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
 + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
   struct kvm_kernel_irq_routing_entry *rt_entries;
   u32 nr_rt_entries;
   /*
 diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
 index 25ab480..7c0071d 100644
 --- a/virt/kvm/irq_comm.c
 +++ b/virt/kvm/irq_comm.c
 @@ -480,7 +480,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
  
   new-nr_rt_entries = nr_rt_entries;
   for (i = 0; i  3; i++)
 - for (j = 0; j  KVM_IOAPIC_NUM_PINS; j++)
 + for (j = 0; j  KVM_IRQCHIP_NUM_PINS; j++)
   new-chip[i][j] = -1;
  
   for (i = 0; i  nr; ++i) {
 -- 
 1.6.0.2
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 02/17] KVM: Introduce CONFIG_HAVE_KVM_IRQ_ROUTING

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:13PM +0200, Alexander Graf wrote:
 Quite a bit of code in KVM has been conditionalized on availability of
 IOAPIC emulation. However, most of it is generically applicable to
 platforms that don't have an IOPIC, but a different type of irq chip.
 
 Make code that only relies on IRQ routing, not an APIC itself, on
 CONFIG_HAVE_KVM_IRQ_ROUTING, so that we can reuse it later.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  arch/x86/kvm/Kconfig |1 +
  include/linux/kvm_host.h |6 +++---
  virt/kvm/Kconfig |3 +++
  virt/kvm/eventfd.c   |6 +++---
  virt/kvm/kvm_main.c  |2 +-
  5 files changed, 11 insertions(+), 7 deletions(-)
 
 diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
 index 586f000..9d50efd 100644
 --- a/arch/x86/kvm/Kconfig
 +++ b/arch/x86/kvm/Kconfig
 @@ -29,6 +29,7 @@ config KVM
   select MMU_NOTIFIER
   select ANON_INODES
   select HAVE_KVM_IRQCHIP
 + select HAVE_KVM_IRQ_ROUTING
   select HAVE_KVM_EVENTFD
   select KVM_APIC_ARCHITECTURE
   select KVM_ASYNC_PF
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index bf3b1dc..4215d4f 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -304,7 +304,7 @@ struct kvm_kernel_irq_routing_entry {
   struct hlist_node link;
  };
  
 -#ifdef __KVM_HAVE_IOAPIC
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  
  struct kvm_irq_routing_table {
   int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
 @@ -432,7 +432,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
  int __must_check vcpu_load(struct kvm_vcpu *vcpu);
  void vcpu_put(struct kvm_vcpu *vcpu);
  
 -#ifdef __KVM_HAVE_IOAPIC
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  int kvm_irqfd_init(void);
  void kvm_irqfd_exit(void);
  #else
 @@ -957,7 +957,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, 
 unsigned long mmu_seq)
  }
  #endif
  
 -#ifdef KVM_CAP_IRQ_ROUTING
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  
  #define KVM_MAX_IRQ_ROUTES 1024
  
 diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
 index d01b24b..779262f 100644
 --- a/virt/kvm/Kconfig
 +++ b/virt/kvm/Kconfig
 @@ -6,6 +6,9 @@ config HAVE_KVM
  config HAVE_KVM_IRQCHIP
 bool
  
 +config HAVE_KVM_IRQ_ROUTING
 +   bool
 +
  config HAVE_KVM_EVENTFD
 bool
 select EVENTFD
 diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
 index c5d43ff..64ee720 100644
 --- a/virt/kvm/eventfd.c
 +++ b/virt/kvm/eventfd.c
 @@ -35,7 +35,7 @@
  
  #include iodev.h
  
 -#ifdef __KVM_HAVE_IOAPIC
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  /*
   * 
   * irqfd: Allows an fd to be used to inject an interrupt to the guest
 @@ -433,7 +433,7 @@ fail:
  void
  kvm_eventfd_init(struct kvm *kvm)
  {
 -#ifdef __KVM_HAVE_IOAPIC
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
   spin_lock_init(kvm-irqfds.lock);
   INIT_LIST_HEAD(kvm-irqfds.items);
   INIT_LIST_HEAD(kvm-irqfds.resampler_list);
 @@ -442,7 +442,7 @@ kvm_eventfd_init(struct kvm *kvm)
   INIT_LIST_HEAD(kvm-ioeventfds);
  }
  
 -#ifdef __KVM_HAVE_IOAPIC
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
  /*
   * shutdown any irqfd's that match fd+gsi
   */
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index aaac1a7..2c3b226 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -2404,7 +2404,7 @@ static long kvm_dev_ioctl_check_extension_generic(long 
 arg)
   case KVM_CAP_SIGNAL_MSI:
  #endif
   return 1;
 -#ifdef KVM_CAP_IRQ_ROUTING
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
   case KVM_CAP_IRQ_ROUTING:
   return KVM_MAX_IRQ_ROUTES;
  #endif
 -- 
 1.6.0.2
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 03/17] KVM: Drop __KVM_HAVE_IOAPIC condition on irq routing

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:14PM +0200, Alexander Graf wrote:
 We have a capability enquire system that allows user space to ask kvm
 whether a feature is available.
 
 The point behind this system is that we can have different kernel
 configurations with different capabilities and user space can adjust
 accordingly.
 
 Because features can always be non existent, we can drop any #ifdefs
 on CAP defines that could be used generically, like the irq routing
 bits. These can be easily reused for non-IOAPIC systems as well.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  include/uapi/linux/kvm.h |2 --
  1 files changed, 0 insertions(+), 2 deletions(-)
 
 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
 index 74d0ff3..c741902 100644
 --- a/include/uapi/linux/kvm.h
 +++ b/include/uapi/linux/kvm.h
 @@ -579,9 +579,7 @@ struct kvm_ppc_smmu_info {
  #ifdef __KVM_HAVE_PIT
  #define KVM_CAP_REINJECT_CONTROL 24
  #endif
 -#ifdef __KVM_HAVE_IOAPIC
  #define KVM_CAP_IRQ_ROUTING 25
 -#endif
  #define KVM_CAP_IRQ_INJECT_STATUS 26
  #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
  #define KVM_CAP_DEVICE_DEASSIGNMENT 27
 -- 
 1.6.0.2
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 05/17] KVM: Move irq routing to generic code

2013-04-25 Thread Michael S. Tsirkin
On Fri, Apr 19, 2013 at 04:06:16PM +0200, Alexander Graf wrote:
 The IRQ routing set ioctl lives in the hacky device assignment code inside
 of KVM today. This is definitely the wrong place for it. Move it to the much
 more natural kvm_main.c.
 
 Signed-off-by: Alexander Graf ag...@suse.de

Acked-by: Michael S. Tsirkin m...@redhat.com

 ---
  virt/kvm/assigned-dev.c |   30 --
  virt/kvm/kvm_main.c |   30 ++
  2 files changed, 30 insertions(+), 30 deletions(-)
 
 diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
 index f4c7f59..8db4370 100644
 --- a/virt/kvm/assigned-dev.c
 +++ b/virt/kvm/assigned-dev.c
 @@ -983,36 +983,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, 
 unsigned ioctl,
   goto out;
   break;
   }
 -#ifdef KVM_CAP_IRQ_ROUTING
 - case KVM_SET_GSI_ROUTING: {
 - struct kvm_irq_routing routing;
 - struct kvm_irq_routing __user *urouting;
 - struct kvm_irq_routing_entry *entries;
 -
 - r = -EFAULT;
 - if (copy_from_user(routing, argp, sizeof(routing)))
 - goto out;
 - r = -EINVAL;
 - if (routing.nr = KVM_MAX_IRQ_ROUTES)
 - goto out;
 - if (routing.flags)
 - goto out;
 - r = -ENOMEM;
 - entries = vmalloc(routing.nr * sizeof(*entries));
 - if (!entries)
 - goto out;
 - r = -EFAULT;
 - urouting = argp;
 - if (copy_from_user(entries, urouting-entries,
 -routing.nr * sizeof(*entries)))
 - goto out_free_irq_routing;
 - r = kvm_set_irq_routing(kvm, entries, routing.nr,
 - routing.flags);
 - out_free_irq_routing:
 - vfree(entries);
 - break;
 - }
 -#endif /* KVM_CAP_IRQ_ROUTING */
  #ifdef __KVM_HAVE_MSIX
   case KVM_ASSIGN_SET_MSIX_NR: {
   struct kvm_assigned_msix_nr entry_nr;
 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index 2c3b226..b6f3354 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -2274,6 +2274,36 @@ static long kvm_vm_ioctl(struct file *filp,
   break;
   }
  #endif
 +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 + case KVM_SET_GSI_ROUTING: {
 + struct kvm_irq_routing routing;
 + struct kvm_irq_routing __user *urouting;
 + struct kvm_irq_routing_entry *entries;
 +
 + r = -EFAULT;
 + if (copy_from_user(routing, argp, sizeof(routing)))
 + goto out;
 + r = -EINVAL;
 + if (routing.nr = KVM_MAX_IRQ_ROUTES)
 + goto out;
 + if (routing.flags)
 + goto out;
 + r = -ENOMEM;
 + entries = vmalloc(routing.nr * sizeof(*entries));
 + if (!entries)
 + goto out;
 + r = -EFAULT;
 + urouting = argp;
 + if (copy_from_user(entries, urouting-entries,
 +routing.nr * sizeof(*entries)))
 + goto out_free_irq_routing;
 + r = kvm_set_irq_routing(kvm, entries, routing.nr,
 + routing.flags);
 + out_free_irq_routing:
 + vfree(entries);
 + break;
 + }
 +#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
   default:
   r = kvm_arch_vm_ioctl(filp, ioctl, arg);
   if (r == -ENOTTY)
 -- 
 1.6.0.2
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >