[PATCH-v3 8/9] vhost/scsi: Drop left-over scsi_tcq.h include

2015-02-02 Thread Nicholas A. Bellinger
From: Nicholas Bellinger 

With the recent removal of MSG_*_TAG defines in commit 68d81f40,
vhost-scsi is now using TCM_*_TAG and doesn't depend upon host
side scsi_tcq.h definitions anymore.

Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Signed-off-by: Nicholas Bellinger 
---
 drivers/vhost/scsi.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 9af93d0..2b4b002 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -38,7 +38,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH-v3 4/9] vhost/scsi: Add ANY_LAYOUT iov -> sgl mapping prerequisites

2015-02-02 Thread Nicholas A. Bellinger
From: Nicholas Bellinger 

This patch adds ANY_LAYOUT prerequisites logic for accepting a set of
protection + data payloads via iov_iter.  Also includes helpers for
calcuating SGLs + invoking vhost_scsi_map_to_sgl() with a known number
of iovecs.

Required by ANY_LAYOUT processing when struct iovec may be offset into
the first outgoing virtio-scsi request header.

v2 changes:
  - Clear ->tvc_sgl_count for vhost_scsi_mapal failure
  - Make vhost_scsi_mapal + vhost_scsi_calc_sgls accept max_niov
  - Minor cleanups

v3 changes:
  - Update vhost_scsi_mapal + friends to use iov_iter
  - Move iov_iter sanity checks into vhost_scsi_calc_sgls
  - Convert vhost_scsi_calc_sgls() to iov_iter_npages()

Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Signed-off-by: Nicholas Bellinger 
---
 drivers/vhost/scsi.c | 93 
 1 file changed, 93 insertions(+)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index c3b12b3..7dfff15 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -914,6 +914,99 @@ vhost_scsi_map_iov_to_prot(struct tcm_vhost_cmd *cmd,
return 0;
 }
 
+static int
+vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls)
+{
+   int sgl_count = 0;
+
+   if (!iter || !iter->iov) {
+   pr_err("%s: iter->iov is NULL, but expected bytes: %zu"
+  " present\n", __func__, bytes);
+   return -EINVAL;
+   }
+
+   sgl_count = iov_iter_npages(iter, 0x);
+   if (sgl_count > max_sgls) {
+   pr_err("%s: requested sgl_count: %d exceeds pre-allocated"
+  " max_sgls: %d\n", __func__, sgl_count, max_sgls);
+   return -EINVAL;
+   }
+   return sgl_count;
+}
+
+static int
+vhost_scsi_iov_to_sgl(struct tcm_vhost_cmd *cmd, bool write,
+ struct iov_iter *iter, struct scatterlist *sg,
+ int sg_count)
+{
+   size_t off = iter->iov_offset;
+   int i, ret;
+
+   for (i = 0; i < iter->nr_segs; i++) {
+   void __user *base = iter->iov[i].iov_base + off;
+   size_t len = iter->iov[i].iov_len - off;
+
+   ret = vhost_scsi_map_to_sgl(cmd, base, len, sg, write);
+   if (ret < 0) {
+   for (i = 0; i < sg_count; i++) {
+   struct page *page = sg_page(&sg[i]);
+   if (page)
+   put_page(page);
+   }
+   return ret;
+   }
+   sg += ret;
+   off = 0;
+   }
+   return 0;
+}
+
+static int
+vhost_scsi_mapal(struct tcm_vhost_cmd *cmd,
+size_t prot_bytes, struct iov_iter *prot_iter,
+size_t data_bytes, struct iov_iter *data_iter)
+{
+   int sgl_count, ret;
+   bool write = (cmd->tvc_data_direction == DMA_FROM_DEVICE);
+
+   if (prot_bytes) {
+   sgl_count = vhost_scsi_calc_sgls(prot_iter, prot_bytes,
+TCM_VHOST_PREALLOC_PROT_SGLS);
+   if (sgl_count < 0)
+   return sgl_count;
+
+   sg_init_table(cmd->tvc_prot_sgl, sgl_count);
+   cmd->tvc_prot_sgl_count = sgl_count;
+   pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__,
+cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count);
+
+   ret = vhost_scsi_iov_to_sgl(cmd, write, prot_iter,
+   cmd->tvc_prot_sgl,
+   cmd->tvc_prot_sgl_count);
+   if (ret < 0) {
+   cmd->tvc_prot_sgl_count = 0;
+   return ret;
+   }
+   }
+   sgl_count = vhost_scsi_calc_sgls(data_iter, data_bytes,
+TCM_VHOST_PREALLOC_SGLS);
+   if (sgl_count < 0)
+   return sgl_count;
+
+   sg_init_table(cmd->tvc_sgl, sgl_count);
+   cmd->tvc_sgl_count = sgl_count;
+   pr_debug("%s data_sg %p data_sgl_count %u\n", __func__,
+ cmd->tvc_sgl, cmd->tvc_sgl_count);
+
+   ret = vhost_scsi_iov_to_sgl(cmd, write, data_iter,
+   cmd->tvc_sgl, cmd->tvc_sgl_count);
+   if (ret < 0) {
+   cmd->tvc_sgl_count = 0;
+   return ret;
+   }
+   return 0;
+}
+
 static void tcm_vhost_submission_work(struct work_struct *work)
 {
struct tcm_vhost_cmd *cmd =
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH-v3 6/9] vhost/scsi: Set VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits

2015-02-02 Thread Nicholas A. Bellinger
From: Nicholas Bellinger 

Signal support of VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits
required for virtio-scsi 1.0 spec layout requirements.

Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Signed-off-by: Nicholas Bellinger 
---
 drivers/vhost/scsi.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index e1fe003..c25fdd7 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -173,7 +173,9 @@ enum {
 /* Note: can't set VIRTIO_F_VERSION_1 yet, since that implies ANY_LAYOUT. */
 enum {
VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) |
-  (1ULL << VIRTIO_SCSI_F_T10_PI)
+  (1ULL << VIRTIO_SCSI_F_T10_PI) |
+  (1ULL << VIRTIO_F_ANY_LAYOUT) |
+  (1ULL << VIRTIO_F_VERSION_1)
 };
 
 #define VHOST_SCSI_MAX_TARGET  256
@@ -1626,7 +1628,10 @@ static void vhost_scsi_handle_kick(struct vhost_work 
*work)
poll.work);
struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
 
-   vhost_scsi_handle_vq(vs, vq);
+   if (vhost_has_feature(vq, VIRTIO_F_ANY_LAYOUT))
+   vhost_scsi_handle_vqal(vs, vq);
+   else
+   vhost_scsi_handle_vq(vs, vq);
 }
 
 static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH-v3 9/9] vhost/scsi: Global tcm_vhost -> vhost_scsi rename

2015-02-02 Thread Nicholas A. Bellinger
From: Nicholas Bellinger 

There is a large amount of code that still references the original
'tcm_vhost' naming conventions, instead of modern 'vhost_scsi'.

Go ahead and do a global rename to make the usage consistent.

Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Signed-off-by: Nicholas Bellinger 
---
 drivers/vhost/scsi.c | 662 +--
 1 file changed, 331 insertions(+), 331 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 2b4b002..66f682c 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -51,13 +51,13 @@
 
 #include "vhost.h"
 
-#define TCM_VHOST_VERSION  "v0.1"
-#define TCM_VHOST_NAMELEN 256
-#define TCM_VHOST_MAX_CDB_SIZE 32
-#define TCM_VHOST_DEFAULT_TAGS 256
-#define TCM_VHOST_PREALLOC_SGLS 2048
-#define TCM_VHOST_PREALLOC_UPAGES 2048
-#define TCM_VHOST_PREALLOC_PROT_SGLS 512
+#define VHOST_SCSI_VERSION  "v0.1"
+#define VHOST_SCSI_NAMELEN 256
+#define VHOST_SCSI_MAX_CDB_SIZE 32
+#define VHOST_SCSI_DEFAULT_TAGS 256
+#define VHOST_SCSI_PREALLOC_SGLS 2048
+#define VHOST_SCSI_PREALLOC_UPAGES 2048
+#define VHOST_SCSI_PREALLOC_PROT_SGLS 512
 
 struct vhost_scsi_inflight {
/* Wait for the flush operation to finish */
@@ -66,7 +66,7 @@ struct vhost_scsi_inflight {
struct kref kref;
 };
 
-struct tcm_vhost_cmd {
+struct vhost_scsi_cmd {
/* Descriptor from vhost_get_vq_desc() for virt_queue segment */
int tvc_vq_desc;
/* virtio-scsi initiator task attribute */
@@ -82,7 +82,7 @@ struct tcm_vhost_cmd {
/* The number of scatterlists associated with this cmd */
u32 tvc_sgl_count;
u32 tvc_prot_sgl_count;
-   /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
+   /* Saved unpacked SCSI LUN for vhost_scsi_submission_work() */
u32 tvc_lun;
/* Pointer to the SGL formatted memory from virtio-scsi */
struct scatterlist *tvc_sgl;
@@ -95,13 +95,13 @@ struct tcm_vhost_cmd {
/* Pointer to vhost_virtqueue for the cmd */
struct vhost_virtqueue *tvc_vq;
/* Pointer to vhost nexus memory */
-   struct tcm_vhost_nexus *tvc_nexus;
+   struct vhost_scsi_nexus *tvc_nexus;
/* The TCM I/O descriptor that is accessed via container_of() */
struct se_cmd tvc_se_cmd;
-   /* work item used for cmwq dispatch to tcm_vhost_submission_work() */
+   /* work item used for cmwq dispatch to vhost_scsi_submission_work() */
struct work_struct work;
/* Copy of the incoming SCSI command descriptor block (CDB) */
-   unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
+   unsigned char tvc_cdb[VHOST_SCSI_MAX_CDB_SIZE];
/* Sense buffer that will be mapped into outgoing status */
unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
/* Completed commands list, serviced from vhost worker thread */
@@ -110,53 +110,53 @@ struct tcm_vhost_cmd {
struct vhost_scsi_inflight *inflight;
 };
 
-struct tcm_vhost_nexus {
+struct vhost_scsi_nexus {
/* Pointer to TCM session for I_T Nexus */
struct se_session *tvn_se_sess;
 };
 
-struct tcm_vhost_nacl {
+struct vhost_scsi_nacl {
/* Binary World Wide unique Port Name for Vhost Initiator port */
u64 iport_wwpn;
/* ASCII formatted WWPN for Sas Initiator port */
-   char iport_name[TCM_VHOST_NAMELEN];
-   /* Returned by tcm_vhost_make_nodeacl() */
+   char iport_name[VHOST_SCSI_NAMELEN];
+   /* Returned by vhost_scsi_make_nodeacl() */
struct se_node_acl se_node_acl;
 };
 
-struct tcm_vhost_tpg {
+struct vhost_scsi_tpg {
/* Vhost port target portal group tag for TCM */
u16 tport_tpgt;
/* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus 
shutdown */
int tv_tpg_port_count;
/* Used for vhost_scsi device reference to tpg_nexus, protected by 
tv_tpg_mutex */
int tv_tpg_vhost_count;
-   /* list for tcm_vhost_list */
+   /* list for vhost_scsi_list */
struct list_head tv_tpg_list;
/* Used to protect access for tpg_nexus */
struct mutex tv_tpg_mutex;
/* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
-   struct tcm_vhost_nexus *tpg_nexus;
-   /* Pointer back to tcm_vhost_tport */
-   struct tcm_vhost_tport *tport;
-   /* Returned by tcm_vhost_make_tpg() */
+   struct vhost_scsi_nexus *tpg_nexus;
+   /* Pointer back to vhost_scsi_tport */
+   struct vhost_scsi_tport *tport;
+   /* Returned by vhost_scsi_make_tpg() */
struct se_portal_group se_tpg;
/* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
struct vhost_scsi *vhost_scsi;
 };
 
-struct tcm_vhost_tport {
+struct vhost_scsi_tport {
/* SCSI protocol the tport is providing */
u8 tport_proto_id;
/* Binary World Wide unique Port Name for Vhost Target port */
u64 tport_wwpn;
/* ASCII formatted WWPN for Vhos

[PATCH-v3 7/9] vhost/scsi: Drop legacy pre virtio v1.0 !ANY_LAYOUT logic

2015-02-02 Thread Nicholas A. Bellinger
From: Nicholas Bellinger 

With the new ANY_LAYOUT logic in place for vhost_scsi_handle_vqal(),
there is no longer a reason to keep around the legacy code with
!ANY_LAYOUT assumptions.

Go ahead and drop the pre virtio 1.0 logic in vhost_scsi_handle_vq()
and associated helpers.

Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Signed-off-by: Nicholas Bellinger 
---
 drivers/vhost/scsi.c | 340 +--
 1 file changed, 1 insertion(+), 339 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index c25fdd7..9af93d0 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -830,93 +830,6 @@ out:
 }
 
 static int
-vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd,
- struct iovec *iov,
- int niov,
- bool write)
-{
-   struct scatterlist *sg = cmd->tvc_sgl;
-   unsigned int sgl_count = 0;
-   int ret, i;
-
-   for (i = 0; i < niov; i++)
-   sgl_count += iov_num_pages(iov[i].iov_base, iov[i].iov_len);
-
-   if (sgl_count > TCM_VHOST_PREALLOC_SGLS) {
-   pr_err("vhost_scsi_map_iov_to_sgl() sgl_count: %u greater than"
-   " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n",
-   sgl_count, TCM_VHOST_PREALLOC_SGLS);
-   return -ENOBUFS;
-   }
-
-   pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count);
-   sg_init_table(sg, sgl_count);
-   cmd->tvc_sgl_count = sgl_count;
-
-   pr_debug("Mapping iovec %p for %u pages\n", &iov[0], sgl_count);
-
-   for (i = 0; i < niov; i++) {
-   ret = vhost_scsi_map_to_sgl(cmd, iov[i].iov_base, 
iov[i].iov_len,
-   sg, write);
-   if (ret < 0) {
-   for (i = 0; i < cmd->tvc_sgl_count; i++) {
-   struct page *page = sg_page(&cmd->tvc_sgl[i]);
-   if (page)
-   put_page(page);
-   }
-   cmd->tvc_sgl_count = 0;
-   return ret;
-   }
-   sg += ret;
-   sgl_count -= ret;
-   }
-   return 0;
-}
-
-static int
-vhost_scsi_map_iov_to_prot(struct tcm_vhost_cmd *cmd,
-  struct iovec *iov,
-  int niov,
-  bool write)
-{
-   struct scatterlist *prot_sg = cmd->tvc_prot_sgl;
-   unsigned int prot_sgl_count = 0;
-   int ret, i;
-
-   for (i = 0; i < niov; i++)
-   prot_sgl_count += iov_num_pages(iov[i].iov_base, 
iov[i].iov_len);
-
-   if (prot_sgl_count > TCM_VHOST_PREALLOC_PROT_SGLS) {
-   pr_err("vhost_scsi_map_iov_to_prot() sgl_count: %u greater than"
-   " preallocated TCM_VHOST_PREALLOC_PROT_SGLS: %u\n",
-   prot_sgl_count, TCM_VHOST_PREALLOC_PROT_SGLS);
-   return -ENOBUFS;
-   }
-
-   pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__,
-prot_sg, prot_sgl_count);
-   sg_init_table(prot_sg, prot_sgl_count);
-   cmd->tvc_prot_sgl_count = prot_sgl_count;
-
-   for (i = 0; i < niov; i++) {
-   ret = vhost_scsi_map_to_sgl(cmd, iov[i].iov_base, 
iov[i].iov_len,
-   prot_sg, write);
-   if (ret < 0) {
-   for (i = 0; i < cmd->tvc_prot_sgl_count; i++) {
-   struct page *page = 
sg_page(&cmd->tvc_prot_sgl[i]);
-   if (page)
-   put_page(page);
-   }
-   cmd->tvc_prot_sgl_count = 0;
-   return ret;
-   }
-   prot_sg += ret;
-   prot_sgl_count -= ret;
-   }
-   return 0;
-}
-
-static int
 vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls)
 {
int sgl_count = 0;
@@ -1323,254 +1236,6 @@ out:
mutex_unlock(&vq->mutex);
 }
 
-static void
-vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
-{
-   struct tcm_vhost_tpg **vs_tpg;
-   struct virtio_scsi_cmd_req v_req;
-   struct virtio_scsi_cmd_req_pi v_req_pi;
-   struct tcm_vhost_tpg *tpg;
-   struct tcm_vhost_cmd *cmd;
-   u64 tag;
-   u32 exp_data_len, data_first, data_num, data_direction, prot_first;
-   unsigned out, in, i;
-   int head, ret, data_niov, prot_niov, prot_bytes;
-   size_t req_size;
-   u16 lun;
-   u8 *target, *lunp, task_attr;
-   bool hdr_pi;
-   void *req, *cdb;
-
-   mutex_lock(&vq->mutex);
-   /*
-* We can handle the vq only after the endpoint is setup by calling the
-* VHOST_SCSI_SET_ENDPOINT ioctl.
-*/
-   vs_tpg = vq->private_data;
-   if (!vs_tpg)
-   goto out;
-
-

[PATCH-v3 5/9] vhost/scsi: Add ANY_LAYOUT vhost_virtqueue callback

2015-02-02 Thread Nicholas A. Bellinger
From: Nicholas Bellinger 

This patch adds ANY_LAYOUT support with a new vqs[].vq.handle_kick()
callback in vhost_scsi_handle_vqal().

It calculates data_direction + exp_data_len for the new tcm_vhost_cmd
descriptor by walking both outgoing + incoming iovecs using iov_iter,
assuming the layout of outgoing request header + T10_PI + Data payload
comes first.

It also uses copy_from_iter() to copy leading virtio-scsi request header
that may or may not include SCSI CDB, that returns a re-calculated iovec
to start of T10_PI or Data SGL memory.

v2 changes:
  - Fix up vhost_scsi_handle_vqal comments
  - Minor vhost_scsi_handle_vqal simplifications
  - Add missing minimum virtio-scsi response buffer size check
  - Fix pi_bytes* error message typo
  - Convert to use vhost_skip_iovec_bytes() common code
  - Add max_niov sanity checks vs. out + in offset into vq

v3 changes:
  - Convert to copy_from_iter usage
  - Update vhost_scsi_handle_vqal comments for iov_iter usage
  - Convert prot_bytes offset to use iov_iter_advance
  - Drop max_niov usage in vhost_scsi_handle_vqal
  - Drop vhost_skip_iovec_bytes in favour of iov_iter

Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Signed-off-by: Nicholas Bellinger 
---
 drivers/vhost/scsi.c | 260 +++
 1 file changed, 260 insertions(+)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 7dfff15..e1fe003 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1062,6 +1062,266 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs,
 }
 
 static void
+vhost_scsi_handle_vqal(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+{
+   struct tcm_vhost_tpg **vs_tpg, *tpg;
+   struct virtio_scsi_cmd_req v_req;
+   struct virtio_scsi_cmd_req_pi v_req_pi;
+   struct tcm_vhost_cmd *cmd;
+   struct iov_iter out_iter, in_iter, prot_iter, data_iter;
+   u64 tag;
+   u32 exp_data_len, data_direction;
+   unsigned out, in, i;
+   int head, ret, prot_bytes;
+   size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
+   size_t out_size, in_size;
+   u16 lun;
+   u8 *target, *lunp, task_attr;
+   bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI);
+   void *req, *cdb;
+
+   mutex_lock(&vq->mutex);
+   /*
+* We can handle the vq only after the endpoint is setup by calling the
+* VHOST_SCSI_SET_ENDPOINT ioctl.
+*/
+   vs_tpg = vq->private_data;
+   if (!vs_tpg)
+   goto out;
+
+   vhost_disable_notify(&vs->dev, vq);
+
+   for (;;) {
+   head = vhost_get_vq_desc(vq, vq->iov,
+ARRAY_SIZE(vq->iov), &out, &in,
+NULL, NULL);
+   pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
+head, out, in);
+   /* On error, stop handling until the next kick. */
+   if (unlikely(head < 0))
+   break;
+   /* Nothing new?  Wait for eventfd to tell us they refilled. */
+   if (head == vq->num) {
+   if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
+   vhost_disable_notify(&vs->dev, vq);
+   continue;
+   }
+   break;
+   }
+   /*
+* Check for a sane response buffer so we can report early
+* errors back to the guest.
+*/
+   if (unlikely(vq->iov[out].iov_len < rsp_size)) {
+   vq_err(vq, "Expecting at least virtio_scsi_cmd_resp"
+   " size, got %zu bytes\n", vq->iov[out].iov_len);
+   break;
+   }
+   /*
+* Setup pointers and values based upon different virtio-scsi
+* request header if T10_PI is enabled in KVM guest.
+*/
+   if (t10_pi) {
+   req = &v_req_pi;
+   req_size = sizeof(v_req_pi);
+   lunp = &v_req_pi.lun[0];
+   target = &v_req_pi.lun[1];
+   } else {
+   req = &v_req;
+   req_size = sizeof(v_req);
+   lunp = &v_req.lun[0];
+   target = &v_req.lun[1];
+   }
+   /*
+* Determine data_direction for ANY_LAYOUT by calculating the
+* total outgoing iovec sizes / incoming iovec sizes vs.
+* virtio-scsi request / response headers respectively.
+*
+* FIXME: Not correct for BIDI operation
+*/
+   out_size = in_size = 0;
+   for (i = 0; i < out; i++)
+   out_size += vq->iov[i].iov_len;
+   for (i = out; i < out + in; i++)
+   

[PATCH-v3 2/9] vhost/scsi: Fix incorrect early vhost_scsi_handle_vq failures

2015-02-02 Thread Nicholas A. Bellinger
From: Nicholas Bellinger 

This patch fixes vhost_scsi_handle_vq() failure cases that result in BUG_ON()
getting triggered when vhost_scsi_free_cmd() is called, and ->tvc_se_cmd has
not been initialized by target_submit_cmd_map_sgls().

It changes tcm_vhost_release_cmd() to use tcm_vhost_cmd->tvc_nexus for obtaining
se_session pointer reference.  Also, avoid calling put_page() on NULL sg->page
entries in vhost_scsi_map_to_sgl() failure path.

Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Signed-off-by: Nicholas Bellinger 
---
 drivers/vhost/scsi.c | 52 +---
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 1ad5b0f..e4e9f39 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -462,7 +462,7 @@ static void tcm_vhost_release_cmd(struct se_cmd *se_cmd)
 {
struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd,
struct tcm_vhost_cmd, tvc_se_cmd);
-   struct se_session *se_sess = se_cmd->se_sess;
+   struct se_session *se_sess = tv_cmd->tvc_nexus->tvn_se_sess;
int i;
 
if (tv_cmd->tvc_sgl_count) {
@@ -864,9 +864,11 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd,
ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i],
cmd->tvc_upages, write);
if (ret < 0) {
-   for (i = 0; i < cmd->tvc_sgl_count; i++)
-   put_page(sg_page(&cmd->tvc_sgl[i]));
-
+   for (i = 0; i < cmd->tvc_sgl_count; i++) {
+   struct page *page = sg_page(&cmd->tvc_sgl[i]);
+   if (page)
+   put_page(page);
+   }
cmd->tvc_sgl_count = 0;
return ret;
}
@@ -905,9 +907,11 @@ vhost_scsi_map_iov_to_prot(struct tcm_vhost_cmd *cmd,
ret = vhost_scsi_map_to_sgl(cmd, prot_sg, prot_sgl_count, 
&iov[i],
cmd->tvc_upages, write);
if (ret < 0) {
-   for (i = 0; i < cmd->tvc_prot_sgl_count; i++)
-   put_page(sg_page(&cmd->tvc_prot_sgl[i]));
-
+   for (i = 0; i < cmd->tvc_prot_sgl_count; i++) {
+   struct page *page = 
sg_page(&cmd->tvc_prot_sgl[i]);
+   if (page)
+   put_page(page);
+   }
cmd->tvc_prot_sgl_count = 0;
return ret;
}
@@ -1065,12 +1069,14 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct 
vhost_virtqueue *vq)
if (unlikely(vq->iov[0].iov_len < req_size)) {
pr_err("Expecting virtio-scsi header: %zu, got %zu\n",
   req_size, vq->iov[0].iov_len);
-   break;
+   vhost_scsi_send_bad_target(vs, vq, head, out);
+   continue;
}
ret = memcpy_fromiovecend(req, &vq->iov[0], 0, req_size);
if (unlikely(ret)) {
vq_err(vq, "Faulted on virtio_scsi_cmd_req\n");
-   break;
+   vhost_scsi_send_bad_target(vs, vq, head, out);
+   continue;
}
 
/* virtio-scsi spec requires byte 0 of the lun to be 1 */
@@ -1101,14 +1107,16 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct 
vhost_virtqueue *vq)
if (data_direction != DMA_TO_DEVICE) {
vq_err(vq, "Received non zero 
do_pi_niov"
", but wrong data_direction\n");
-   goto err_cmd;
+   vhost_scsi_send_bad_target(vs, vq, 
head, out);
+   continue;
}
prot_bytes = vhost32_to_cpu(vq, 
v_req_pi.pi_bytesout);
} else if (v_req_pi.pi_bytesin) {
if (data_direction != DMA_FROM_DEVICE) {
vq_err(vq, "Received non zero 
di_pi_niov"
", but wrong data_direction\n");
-   goto err_cmd;
+   vhost_scsi_send_bad_target(vs, vq, 
head, out);
+   continue;
}
prot_bytes = vhost32_to_cpu(vq, 
v_req_pi.pi_bytesin);
}
@@ -1148,7 +1156,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct 
vhost_virtqueue *vq)
  

[PATCH-v3 1/9] vhost/scsi: Convert completion path to use copy_to_iser

2015-02-02 Thread Nicholas A. Bellinger
From: Nicholas Bellinger 

Required for ANY_LAYOUT support when the incoming virtio-scsi response
header + fixed size sense buffer payload may span more than a single
iovec entry.

This changes existing code to save cmd->tvc_resp_iov instead of the
first single iovec base pointer from &vq->iov[out].

v3 changes:
  - Convert memcpy_toiovecend -> copy_to_iser usage

Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Signed-off-by: Nicholas Bellinger 
---
 drivers/vhost/scsi.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 01c01cb..1ad5b0f 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -72,6 +72,8 @@ struct tcm_vhost_cmd {
int tvc_vq_desc;
/* virtio-scsi initiator task attribute */
int tvc_task_attr;
+   /* virtio-scsi response incoming iovecs */
+   int tvc_in_iovs;
/* virtio-scsi initiator data direction */
enum dma_data_direction tvc_data_direction;
/* Expected data transfer length from virtio-scsi header */
@@ -87,8 +89,8 @@ struct tcm_vhost_cmd {
struct scatterlist *tvc_sgl;
struct scatterlist *tvc_prot_sgl;
struct page **tvc_upages;
-   /* Pointer to response */
-   struct virtio_scsi_cmd_resp __user *tvc_resp;
+   /* Pointer to response header iovec */
+   struct iovec *tvc_resp_iov;
/* Pointer to vhost_scsi for our device */
struct vhost_scsi *tvc_vhost;
/* Pointer to vhost_virtqueue for the cmd */
@@ -682,6 +684,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work 
*work)
struct tcm_vhost_cmd *cmd;
struct llist_node *llnode;
struct se_cmd *se_cmd;
+   struct iov_iter iov_iter;
int ret, vq;
 
bitmap_zero(signal, VHOST_SCSI_MAX_VQ);
@@ -703,8 +706,11 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work 
*work)
 se_cmd->scsi_sense_length);
memcpy(v_rsp.sense, cmd->tvc_sense_buf,
   se_cmd->scsi_sense_length);
-   ret = copy_to_user(cmd->tvc_resp, &v_rsp, sizeof(v_rsp));
-   if (likely(ret == 0)) {
+
+   iov_iter_init(&iov_iter, WRITE, cmd->tvc_resp_iov,
+ cmd->tvc_in_iovs, sizeof(v_rsp));
+   ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter);
+   if (likely(ret == sizeof(v_rsp))) {
struct vhost_scsi_virtqueue *q;
vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
q = container_of(cmd->tvc_vq, struct 
vhost_scsi_virtqueue, vq);
@@ -1159,7 +1165,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct 
vhost_virtqueue *vq)
 
cmd->tvc_vhost = vs;
cmd->tvc_vq = vq;
-   cmd->tvc_resp = vq->iov[out].iov_base;
+   cmd->tvc_resp_iov = &vq->iov[out];
+   cmd->tvc_in_iovs = in;
 
pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
cmd->tvc_cdb[0], cmd->tvc_lun);
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH-v3 3/9] vhost/scsi: Change vhost_scsi_map_to_sgl to accept iov ptr + len

2015-02-02 Thread Nicholas A. Bellinger
From: Nicholas Bellinger 

This patch changes vhost_scsi_map_to_sgl() parameters to accept virtio
iovec ptr + len when determing pages_nr.

This is currently done with iov_num_pages() -> PAGE_ALIGN, so allow
the same parameters as well.

Cc: Michael S. Tsirkin 
Cc: Paolo Bonzini 
Signed-off-by: Nicholas Bellinger 
---
 drivers/vhost/scsi.c | 37 +++--
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index e4e9f39..c3b12b3 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -222,10 +222,10 @@ static struct workqueue_struct *tcm_vhost_workqueue;
 static DEFINE_MUTEX(tcm_vhost_mutex);
 static LIST_HEAD(tcm_vhost_list);
 
-static int iov_num_pages(struct iovec *iov)
+static int iov_num_pages(void __user *iov_base, size_t iov_len)
 {
-   return (PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
-  ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT;
+   return (PAGE_ALIGN((unsigned long)iov_base + iov_len) -
+  ((unsigned long)iov_base & PAGE_MASK)) >> PAGE_SHIFT;
 }
 
 static void tcm_vhost_done_inflight(struct kref *kref)
@@ -782,25 +782,18 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct 
tcm_vhost_tpg *tpg,
  * Returns the number of scatterlist entries used or -errno on error.
  */
 static int
-vhost_scsi_map_to_sgl(struct tcm_vhost_cmd *tv_cmd,
+vhost_scsi_map_to_sgl(struct tcm_vhost_cmd *cmd,
+ void __user *ptr,
+ size_t len,
  struct scatterlist *sgl,
- unsigned int sgl_count,
- struct iovec *iov,
- struct page **pages,
  bool write)
 {
-   unsigned int npages = 0, pages_nr, offset, nbytes;
+   unsigned int npages = 0, offset, nbytes;
+   unsigned int pages_nr = iov_num_pages(ptr, len);
struct scatterlist *sg = sgl;
-   void __user *ptr = iov->iov_base;
-   size_t len = iov->iov_len;
+   struct page **pages = cmd->tvc_upages;
int ret, i;
 
-   pages_nr = iov_num_pages(iov);
-   if (pages_nr > sgl_count) {
-   pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
-  " sgl_count: %u\n", pages_nr, sgl_count);
-   return -ENOBUFS;
-   }
if (pages_nr > TCM_VHOST_PREALLOC_UPAGES) {
pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
   " preallocated TCM_VHOST_PREALLOC_UPAGES: %u\n",
@@ -845,7 +838,7 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd,
int ret, i;
 
for (i = 0; i < niov; i++)
-   sgl_count += iov_num_pages(&iov[i]);
+   sgl_count += iov_num_pages(iov[i].iov_base, iov[i].iov_len);
 
if (sgl_count > TCM_VHOST_PREALLOC_SGLS) {
pr_err("vhost_scsi_map_iov_to_sgl() sgl_count: %u greater than"
@@ -861,8 +854,8 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd,
pr_debug("Mapping iovec %p for %u pages\n", &iov[0], sgl_count);
 
for (i = 0; i < niov; i++) {
-   ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i],
-   cmd->tvc_upages, write);
+   ret = vhost_scsi_map_to_sgl(cmd, iov[i].iov_base, 
iov[i].iov_len,
+   sg, write);
if (ret < 0) {
for (i = 0; i < cmd->tvc_sgl_count; i++) {
struct page *page = sg_page(&cmd->tvc_sgl[i]);
@@ -889,7 +882,7 @@ vhost_scsi_map_iov_to_prot(struct tcm_vhost_cmd *cmd,
int ret, i;
 
for (i = 0; i < niov; i++)
-   prot_sgl_count += iov_num_pages(&iov[i]);
+   prot_sgl_count += iov_num_pages(iov[i].iov_base, 
iov[i].iov_len);
 
if (prot_sgl_count > TCM_VHOST_PREALLOC_PROT_SGLS) {
pr_err("vhost_scsi_map_iov_to_prot() sgl_count: %u greater than"
@@ -904,8 +897,8 @@ vhost_scsi_map_iov_to_prot(struct tcm_vhost_cmd *cmd,
cmd->tvc_prot_sgl_count = prot_sgl_count;
 
for (i = 0; i < niov; i++) {
-   ret = vhost_scsi_map_to_sgl(cmd, prot_sg, prot_sgl_count, 
&iov[i],
-   cmd->tvc_upages, write);
+   ret = vhost_scsi_map_to_sgl(cmd, iov[i].iov_base, 
iov[i].iov_len,
+   prot_sg, write);
if (ret < 0) {
for (i = 0; i < cmd->tvc_prot_sgl_count; i++) {
struct page *page = 
sg_page(&cmd->tvc_prot_sgl[i]);
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH-v3 0/9] vhost/scsi: Add ANY_LAYOUT + VERSION_1 support

2015-02-02 Thread Nicholas A. Bellinger
From: Nicholas Bellinger 

Hi MST, Paolo, Al & Co,

Here is -v3 for adding vhost/scsi ANY_LAYOUT + VERSION_1 host feature
bit support.

It adds a new vhost_virtqueue ->handle_kick() callback to determine the
start of protection and data payloads iovecs past starting virtio-scsi
request and response headers, based upon data_direction using iov_iter
primitives.

It assumes request/CDB and response/sense_buffer headers may span more
than a single iovec using mm/iov_iter.c logic.

It also allows virtio-scsi headers + T10_PI + Data SGL payloads to span
the same iovec when pinning user-space memory via get_user_pages_fast()
code.  (Not tested yet)

Based upon Al & HCH's feedback, the patch series has been converted to
use copy_*_iter() for virtio-scsi header copy.  Also, patch #4 has been
updated to use iov_iter_npages() for sgl_count, and patch #5 updated to
use iov_iter_advance() for calculating prot_bytes offset to the start
of data_iter.

v3 changelog:
  - Convert memcpy_toiovecend -> copy_to_iter usage
  - Update vhost_scsi_mapal + friends to use iov_iter
  - Move iov_iter sanity checks into vhost_scsi_calc_sgls
  - Convert vhost_scsi_calc_sgls() to iov_iter_npages()
  - Convert to vhost_scsi_handle_vqal to copy_from_iter usage
  - Update vhost_scsi_handle_vqal comments for iov_iter usage
  - Convert prot_bytes offset to use iov_iter_advance
  - Drop max_niov usage in vhost_scsi_handle_vqal
  - Drop vhost_skip_iovec_bytes in favour of iov_iter

Note the one part that has been left unchanged is vhost_scsi_map_to_sgl()
into get_user_pages_fast(), for which existing iov_iter_get_pages() code
will need to allow for a callback to perform the associated scatterlists
setup from **pages for protection + data payloads.

It's functioning against v3.19-rc1 virtio-scsi LLD in T10_PI mode using
TYPE-1 DIF with ANY_LAYOUT -> VERSION_1 guest feature bits enabled, using
the layout following existing convention with protection/data SGL payloads
residing within seperate iovecs.

Also included in patch #9 is an over-due change to rename code in scsi.c
to line up with modern vhost_scsi naming convention.

Please review.

Thank you,

Nicholas Bellinger (9):
  vhost/scsi: Convert completion path to use copy_to_iser
  vhost/scsi: Fix incorrect early vhost_scsi_handle_vq failures
  vhost/scsi: Change vhost_scsi_map_to_sgl to accept iov ptr + len
  vhost/scsi: Add ANY_LAYOUT iov -> sgl mapping prerequisites
  vhost/scsi: Add ANY_LAYOUT vhost_virtqueue callback
  vhost/scsi: Set VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits
  vhost/scsi: Drop legacy pre virtio v1.0 !ANY_LAYOUT logic
  vhost/scsi: Drop left-over scsi_tcq.h include
  vhost/scsi: Global tcm_vhost -> vhost_scsi rename

 drivers/vhost/scsi.c | 1073 ++
 1 file changed, 549 insertions(+), 524 deletions(-)

-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2] kvmppc: Implement H_LOGICAL_CI_{LOAD,STORE} in KVM

2015-02-02 Thread David Gibson
On POWER, storage caching is usually configured via the MMU - attributes
such as cache-inhibited are stored in the TLB and the hashed page table.

This makes correctly performing cache inhibited IO accesses awkward when
the MMU is turned off (real mode).  Some CPU models provide special
registers to control the cache attributes of real mode load and stores but
this is not at all consistent.  This is a problem in particular for SLOF,
the firmware used on KVM guests, which runs entirely in real mode, but
which needs to do IO to load the kernel.

To simplify this qemu implements two special hypercalls, H_LOGICAL_CI_LOAD
and H_LOGICAL_CI_STORE which simulate a cache-inhibited load or store to
a logical address (aka guest physical address).  SLOF uses these for IO.

However, because these are implemented within qemu, not the host kernel,
these bypass any IO devices emulated within KVM itself.  The simplest way
to see this problem is to attempt to boot a KVM guest from a virtio-blk
device with iothread / dataplane enabled.  The iothread code relies on an
in kernel implementation of the virtio queue notification, which is not
triggered by the IO hcalls, and so the guest will stall in SLOF unable to
load the guest OS.

This patch addresses this by providing in-kernel implementations of the
2 hypercalls, which correctly scan the KVM IO bus.  Any access to an
address not handled by the KVM IO bus will cause a VM exit, hitting the
qemu implementation as before.

Note that a userspace change is also required, in order to enable these
new hcall implementations with KVM_CAP_PPC_ENABLE_HCALL.

Signed-off-by: David Gibson 
---
 arch/powerpc/include/asm/kvm_book3s.h |  3 ++
 arch/powerpc/kvm/book3s.c | 76 +++
 arch/powerpc/kvm/book3s_hv.c  | 12 ++
 arch/powerpc/kvm/book3s_pr_papr.c | 28 +
 4 files changed, 119 insertions(+)

v2:
  - Removed some debugging printk()s that were accidentally left in
  - Fix endianness; like all PAPR hypercalls, these should always act
big-endian, even if the guest is little-endian (in practice this
makes no difference, since the only user is SLOF, which is always
big-endian)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 942c7b1..578e550 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -292,6 +292,9 @@ static inline bool kvmppc_supports_magic_page(struct 
kvm_vcpu *vcpu)
return !is_kvmppc_hv_enabled(vcpu->kvm);
 }
 
+extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
+extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
+
 /* Magic register values loaded into r3 and r4 before the 'sc' assembly
  * instruction for the OSI hypercalls */
 #define OSI_SC_MAGIC_R30x113724FA
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 888bf46..7b51492 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -820,6 +820,82 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 #endif
 }
 
+int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+   unsigned long size = kvmppc_get_gpr(vcpu, 4);
+   unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+   u64 buf;
+   int ret;
+
+   if (!is_power_of_2(size) || (size > sizeof(buf)))
+   return H_TOO_HARD;
+
+   ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, size, &buf);
+   if (ret != 0)
+   return H_TOO_HARD;
+
+   switch (size) {
+   case 1:
+   kvmppc_set_gpr(vcpu, 4, *(u8 *)&buf);
+   break;
+
+   case 2:
+   kvmppc_set_gpr(vcpu, 4, be16_to_cpu(*(u16 *)&buf));
+   break;
+
+   case 4:
+   kvmppc_set_gpr(vcpu, 4, be32_to_cpu(*(u32 *)&buf));
+   break;
+
+   case 8:
+   kvmppc_set_gpr(vcpu, 4, be64_to_cpu(*(u64 *)&buf));
+   break;
+
+   default:
+   BUG();
+   }
+
+   return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_load); /* For use by the kvm-pr module */
+
+int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+   unsigned long size = kvmppc_get_gpr(vcpu, 4);
+   unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+   unsigned long val = kvmppc_get_gpr(vcpu, 6);
+   u64 buf;
+   int ret;
+
+   switch (size) {
+   case 1:
+   *(u8 *)&buf = val;
+   break;
+
+   case 2:
+   *(u16 *)&buf = cpu_to_be16(val);
+   break;
+
+   case 4:
+   *(u32 *)&buf = cpu_to_be32(val);
+   break;
+
+   case 8:
+   *(u64 *)&buf = cpu_to_be64(val);
+   break;
+
+   default:
+   return H_TOO_HARD;
+   }
+
+   ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, size, &buf);
+   if (ret != 0)
+   return H_TOO_HARD;
+
+   return H_SUCCESS;
+}
+EXPORT

Re: [PATCH v4 6/6] KVM: nVMX: Enable nested posted interrupt processing

2015-02-02 Thread Wincy Van
On Tue, Feb 3, 2015 at 9:21 AM, Zhang, Yang Z  wrote:
> Paolo Bonzini wrote on 2015-02-03:
>>
>>
>> On 02/02/2015 16:33, Wincy Van wrote:
>>> static void vmx_accomp_nested_posted_intr(struct kvm_vcpu *vcpu) {
>>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>
>>> if (is_guest_mode(vcpu) &&
>>> vmx->nested.posted_intr_nv != -1 &&
>>> pi_test_on(vmx->nested.pi_desc))
>>> kvm_apic_set_irr(vcpu,
>>> vmx->nested.posted_intr_nv); }
>>> Then we will get an nested-vmexit in vmx_check_nested_events, that
>>> posted intr will be handled by L1 immediately.
>>> This mechanism will also emulate the hardware's behavior: If a
>>> posted intr was not accomplished by hardware, we will get an
>
> Actually, we cannot say "not accomplished by hardware". It more like we don't 
> do the job well. See my below answer.
>

Yes, exactly.

>>> interrupt with POSTED_INTR_NV.
>>
>> Yes.
>
> This is not enough. From L1's point, L2 is in vmx non-root mode. So we should 
> emulate the posted interrupt in L0 correctly, say:
> 1. clear ON bit
> 2. ack interrupt
> 3, syn pir to virr
> 4. update RVI.
> Then let the hardware(virtual interrupt delivery) to accomplish interrupt 
> injection.
>
> Force a vmexit more like a trick. It's better to follow the hardware's 
> behavior unless we cannot do it.
>

Yes, I will try again to do this.


Thanks,
Wincy
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] PCI: Add guard to avoid mapping a invalid msix base address

2015-02-02 Thread Yijing Wang
> I applied this to pci/msi for v3.20, thanks!  I reworked the changelog as
> follows; let me know if it still doesn't make things clear:
> 

It's more clear, thanks for your improvement very much!

Thanks!
Yijing.

> 
> commit 6a878e5085fe97bd1e222b7883a1b815fcbbe4ed
> Author: Yijing Wang 
> Date:   Wed Jan 28 09:52:17 2015 +0800
> 
> PCI: Fail MSI-X mappings if there's no space assigned to MSI-X BAR
> 
> Unlike MSI, which is configured via registers in the MSI capability in
> Configuration Space, MSI-X is configured via tables in Memory Space.
> These MSI-X tables are mapped by a device BAR, and if no Memory Space
> has been assigned to the BAR, MSI-X cannot be used.
> 
> Fail MSI-X setup if no space has been assigned for the BAR.
> 
> Previously, we ioremapped the MSI-X table even if the resource hadn't been
> assigned.  In this case, the resource address is undefined (and is often
> zero), which may lead to warnings or oopses in this path:
> 
>   pci_enable_msix
> msix_capability_init
>   msix_map_region
> ioremap_nocache
> 
> The PCI core sets resource flags to zero when it can't assign space for 
> the
> resource (see reset_resource()).  There are also some cases where it sets
> the IORESOURCE_UNSET flag, e.g., pci_reassigndev_resource_alignment(),
> pci_assign_resource(), etc.  So we must check for both cases.
> 
> [bhelgaas: changelog]
> Reported-by: Zhang Jukuo 
> Tested-by: Zhang Jukuo 
> Signed-off-by: Yijing Wang 
> Signed-off-by: Bjorn Helgaas 
> 
> diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
> index c489ef2c1a39..34fc4189ebf0 100644
> --- a/arch/x86/pci/xen.c
> +++ b/arch/x86/pci/xen.c
> @@ -298,12 +298,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev 
> *dev, int nvec, int type)
>   map_irq.entry_nr = nvec;
>   } else if (type == PCI_CAP_ID_MSIX) {
>   int pos;
> + unsigned long flags;
>   u32 table_offset, bir;
>  
>   pos = dev->msix_cap;
>   pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
> &table_offset);
>   bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
> + flags = pci_resource_flags(dev, bir);
> + if (!flags || (flags & IORESOURCE_UNSET))
> + return -EINVAL;
>  
>   map_irq.table_base = pci_resource_start(dev, bir);
>   map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index fd60806d3fd0..c3e7dfcf9ff5 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -694,11 +694,16 @@ static void __iomem *msix_map_region(struct pci_dev 
> *dev, unsigned nr_entries)
>  {
>   resource_size_t phys_addr;
>   u32 table_offset;
> + unsigned long flags;
>   u8 bir;
>  
>   pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
> &table_offset);
>   bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
> + flags = pci_resource_flags(dev, bir);
> + if (!flags || (flags & IORESOURCE_UNSET))
> + return NULL;
> +
>   table_offset &= PCI_MSIX_TABLE_OFFSET;
>   phys_addr = pci_resource_start(dev, bir) + table_offset;
>  
> 
> .
> 


-- 
Thanks!
Yijing

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v4 6/6] KVM: nVMX: Enable nested posted interrupt processing

2015-02-02 Thread Zhang, Yang Z
Paolo Bonzini wrote on 2015-02-03:
> 
> 
> On 02/02/2015 16:33, Wincy Van wrote:
>> static void vmx_accomp_nested_posted_intr(struct kvm_vcpu *vcpu) {
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> 
>> if (is_guest_mode(vcpu) &&
>> vmx->nested.posted_intr_nv != -1 &&
>> pi_test_on(vmx->nested.pi_desc))
>> kvm_apic_set_irr(vcpu,
>> vmx->nested.posted_intr_nv); }
>> Then we will get an nested-vmexit in vmx_check_nested_events, that
>> posted intr will be handled by L1 immediately.
>> This mechanism will also emulate the hardware's behavior: If a
>> posted intr was not accomplished by hardware, we will get an

Actually, we cannot say "not accomplished by hardware". It more like we don't 
do the job well. See my below answer.

>> interrupt with POSTED_INTR_NV.
> 
> Yes.

This is not enough. From L1's point, L2 is in vmx non-root mode. So we should 
emulate the posted interrupt in L0 correctly, say:
1. clear ON bit
2. ack interrupt
3, syn pir to virr
4. update RVI.
Then let the hardware(virtual interrupt delivery) to accomplish interrupt 
injection.

Force a vmexit more like a trick. It's better to follow the hardware's behavior 
unless we cannot do it. 

> 
>> Would this be better?
> 
> I think you do not even need a new bit.  You can use KVM_REQ_EVENT and
> (to complete my suggestion, which was not enough) do the above in
> vmx_check_nested_events.
> 
> Paolo


Best regards,
Yang



Re: [PATCH v2 18/18] vhost: vhost_scsi_handle_vq() should just use copy_from_user()

2015-02-02 Thread Nicholas A. Bellinger
Hi Al,

On Mon, 2015-02-02 at 07:59 +, Al Viro wrote:
> From: Al Viro 
> 
> it has just verified that it asks no more than the length of the
> first segment of iovec.
> 
> And with that the last user of stuff in lib/iovec.c is gone.
> RIP.
> 
> Cc: Michael S. Tsirkin 
> Cc: Nicholas A. Bellinger 
> Cc: kvm@vger.kernel.org
> Signed-off-by: Al Viro 
> ---
>  drivers/vhost/scsi.c |  2 +-
>  include/linux/uio.h  |  2 --
>  lib/Makefile |  2 +-
>  lib/iovec.c  | 36 
>  4 files changed, 2 insertions(+), 40 deletions(-)
>  delete mode 100644 lib/iovec.c
> 
> diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
> index d695b16..dc78d87 100644
> --- a/drivers/vhost/scsi.c
> +++ b/drivers/vhost/scsi.c
> @@ -1079,7 +1079,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct 
> vhost_virtqueue *vq)
>  req_size, vq->iov[0].iov_len);
>   break;
>   }
> - ret = memcpy_fromiovecend(req, &vq->iov[0], 0, req_size);
> + ret = copy_from_user(req, vq->iov[0].iov_base, req_size);
>   if (unlikely(ret)) {
>   vq_err(vq, "Faulted on virtio_scsi_cmd_req\n");
>   break;

Is this in for-next yet..?

If not, please push out ASAP so SFR can hit the conflict between
target-pending/for-next as a heads up for Linus..

--nab

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 2/2] x86/xen: allow privcmd hypercalls to be preempted on 64-bit

2015-02-02 Thread Luis R. Rodriguez
On Thu, Jan 29, 2015 at 12:35 PM, Luis R. Rodriguez  wrote:
> On Tue, Jan 27, 2015 at 10:06:44AM +, David Vrabel wrote:
>> On 27/01/15 08:35, Jan Beulich wrote:
>>  On 27.01.15 at 02:51,  wrote:
>> >
>> > Even if David told you this would be acceptable, I have to question
>> > an abstract model of fixing issues on only 64-bit kernels - this may
>> > be acceptable for distro purposes, but seems hardly the right
>> > approach for upstream. If 32-bit ones are to become deliberately
>> > broken, the XEN config option should become dependent on !X86_32.
>>
>> I'd rather have something omitted (keeping the current behaviour) than
>> something that has not been tested at all.
>>
>> Obviously it would be preferable to to fix both 32-bit and 64-bit x86
>> (and ARM as well) but I'm not going to block an important bug fix for
>> the majority use case (64-bit x86).
>
> Hey folks, what is the status of these patches? Any more feedback?

As I waited I decided to finally install a 32-bit OS but I found now
that as of Xen commit 5d1181a5 which went upstream as of xen 4.3 Xen
no longer supports 32-bit for x86. So -- Xen no loner even builds on
32-bit systems. The latest version I could use then would be Xen 4.2
but the even the stable-4.2 branch fails to build for me even after
fixing quite a bit of compile failures, I'm just going to give up now.
I want to build Xen as I'm relying on some customized emulated delays
on the hypervisor in order to test the issue, otherwise I'd have to
dedicate a huge system to reproduce this issue.

While it is possible folks on old xen may use newer kernels this issue
is likely not a high priority on those 32-bit systems and likely hard
to reproduce there. When and if someone is able to test this on a
32-bit kernel / hypervisor then they can test the small patch below.

Can we move forward with the 64-bit part then?

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 000d419..b4b1f42 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -982,6 +982,8 @@ ENTRY(xen_hypervisor_callback)
 ENTRY(xen_do_upcall)
 1:mov %esp, %eax
 call xen_evtchn_do_upcall
+movl %esp,%eax
+call xen_end_upcall
 jmp  ret_from_intr
 CFI_ENDPROC
 ENDPROC(xen_hypervisor_callback)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [target:for-next 16/21] drivers/vhost/scsi.c:1081:5: sparse: symbol 'vhost_skip_iovec_bytes' was not declared. Should it be static?

2015-02-02 Thread Nicholas A. Bellinger
On Mon, 2015-02-02 at 14:25 +0800, kbuild test robot wrote:
> tree:   git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git 
> for-next
> head:   2936f1d4f3e8247bd519feba7892371d5e4c6603
> commit: 105acf608f25d5e0d9fef669299a5438b7b114ee [16/21] vhost/scsi: Add 
> ANY_LAYOUT vhost_skip_iovec_bytes helper
> reproduce:
>   # apt-get install sparse
>   git checkout 105acf608f25d5e0d9fef669299a5438b7b114ee
>   make ARCH=x86_64 allmodconfig
>   make C=1 CF=-D__CHECK_ENDIAN__
> 
> 
> sparse warnings: (new ones prefixed by >>)
> 
> >> drivers/vhost/scsi.c:1081:5: sparse: symbol 'vhost_skip_iovec_bytes' was 
> >> not declared. Should it be static?
>drivers/vhost/scsi.c:969:1: warning: 'vhost_scsi_mapal' defined but not 
> used [-Wunused-function]
> vhost_scsi_mapal(struct tcm_vhost_cmd *cmd, int max_niov,
> ^
> 
> Please review and possibly fold the followup patch.

Fixing up for -v3.

Thanks Fengguang!

--nab

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/8] vhost/scsi: Add ANY_LAYOUT support

2015-02-02 Thread Nicholas A. Bellinger
On Mon, 2015-02-02 at 00:15 -0800, Christoph Hellwig wrote:
> Hi Nic,
> 
> Al has been rewriting the vhost code to use iov_iter primitives,
> can you please rebase it on top of that istead of using the obsolete
> infrastructure?
> 

Yep, already done with the copy_[from,to]_iter() conversion in
vhost_scsi_handle_vqal() + vhost_scsi_complete_cmd_work() respectively.

Looking at iov_iter_get_pages() now to see if it can replace the other
code, or if that should be a post merge improvement instead.

--nab



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


revisiting speech recognition in a Windows guest on KVM

2015-02-02 Thread Eric S. Johansson
Apologies for taking so long to revisit this topic but well, new 
hardware, new software, new microphone and, you know, life…


Summary:
I have unable to forward my USB microphone connection to a Windows guest 
and I could use a little bit of assistance pointing me in the right 
direction.


Long story:
On my new laptop, speech recognition is working well on the Windows 
guest. I don't remember what client I was working with but I'm now using 
vmm-view in spice mode. I wasn't able to get the spice connection to 
make the USB microphone interface visible in Windows but I was able to 
get the built-in host to guest USB binding as supplied by the vmm manger.


It's almost exactly right. What I'm noticing is that the audio stream is 
not as clean as it could be. About every 15 to 20 seconds I hear a very 
quiet but distinct "thump". The reason this is a bad thing is because 
the recognition engine thinks it's the start of an utterance, spins its 
wheels for about 10+ seconds trying to decode what it hears. When 
NaturallySpeaking is spinning its wheels, it kills off all input because 
it is trying to avoid a race condition between other inputs and itself.


I'm not sure where the thump is coming from. I don't hear it on my other 
laptop and it's not external noise because it's extremely regular.


I'm wondering if the same problem would occur over the spice USB 
forwarding. I could use a bit of assistance figuring out what I'm doing 
wrong with spice USB forwarding so I can test out importing audio that way.


Thanks
--- eric



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 0/2] x86/arm64: add xenconfig

2015-02-02 Thread Luis R. Rodriguez
On Mon, Feb 2, 2015 at 1:13 PM, Michal Marek  wrote:
> Dne 30.1.2015 v 19:25 Luis R. Rodriguez napsal(a):
>> On Fri, Jan 30, 2015 at 2:49 AM, Michal Marek  wrote:
>>> On 2015-01-29 21:47, Paul Bolle wrote:
 [Added Michal. Removed Yann.]

 On Thu, 2015-01-29 at 12:38 -0800, Luis R. Rodriguez wrote:
> On Tue, Jan 27, 2015 at 12:00 PM, Luis R. Rodriguez  
> wrote:
>> On Fri, Jan 23, 2015 at 03:19:25PM +, Stefano Stabellini wrote:
>>> On Fri, 23 Jan 2015, Luis R. Rodriguez wrote:
 On Wed, Jan 14, 2015 at 11:33:45AM -0800, Luis R. Rodriguez wrote:
> From: "Luis R. Rodriguez" 
>
> This v3 addresses Stefano's feedback from the v2 series, namely
> moving PCI stuff to x86 as its all x86 specific and also just
> removing the CONFIG_TCG_XEN=m from the general config. To be
> clear the changes from the v2 series are below.
>
> Luis R. Rodriguez (2):
>   x86, platform, xen, kconfig: clarify kvmconfig is for kvm
>   x86, arm, platform, xen, kconfig: add xen defconfig helper
>
>  arch/x86/configs/xen.config | 10 ++
>  kernel/configs/xen.config   | 26 ++
>  scripts/kconfig/Makefile|  7 ++-
>  3 files changed, 42 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/configs/xen.config
>  create mode 100644 kernel/configs/xen.config

 Who could these changes go through?
>>>
>>> I would be OK with taking it in the Xen tree, but I would feel more
>>> comfortable doing that if you had an ack from Yann Morin (CC'ed).
>>
>> *Poke*
>
> Hey Yann, wondering if you had any feedback. Thanks.

 Yann has disappeared a year ago. Michal now, informally, keeps an eye on
 kconfig related patches.
>>>
>>> I did not see this. Can you please bounce me the original series?
>>
>> Done, let me know if you did not get them
>
> I got them, sorry for the delay. You can add Acked-by: Michal Marek
>  if you want.

Thanks Michal, what tree should this go through though?

 Luis
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 0/2] x86/arm64: add xenconfig

2015-02-02 Thread Michal Marek
Dne 30.1.2015 v 19:25 Luis R. Rodriguez napsal(a):
> On Fri, Jan 30, 2015 at 2:49 AM, Michal Marek  wrote:
>> On 2015-01-29 21:47, Paul Bolle wrote:
>>> [Added Michal. Removed Yann.]
>>>
>>> On Thu, 2015-01-29 at 12:38 -0800, Luis R. Rodriguez wrote:
 On Tue, Jan 27, 2015 at 12:00 PM, Luis R. Rodriguez  
 wrote:
> On Fri, Jan 23, 2015 at 03:19:25PM +, Stefano Stabellini wrote:
>> On Fri, 23 Jan 2015, Luis R. Rodriguez wrote:
>>> On Wed, Jan 14, 2015 at 11:33:45AM -0800, Luis R. Rodriguez wrote:
 From: "Luis R. Rodriguez" 

 This v3 addresses Stefano's feedback from the v2 series, namely
 moving PCI stuff to x86 as its all x86 specific and also just
 removing the CONFIG_TCG_XEN=m from the general config. To be
 clear the changes from the v2 series are below.

 Luis R. Rodriguez (2):
   x86, platform, xen, kconfig: clarify kvmconfig is for kvm
   x86, arm, platform, xen, kconfig: add xen defconfig helper

  arch/x86/configs/xen.config | 10 ++
  kernel/configs/xen.config   | 26 ++
  scripts/kconfig/Makefile|  7 ++-
  3 files changed, 42 insertions(+), 1 deletion(-)
  create mode 100644 arch/x86/configs/xen.config
  create mode 100644 kernel/configs/xen.config
>>>
>>> Who could these changes go through?
>>
>> I would be OK with taking it in the Xen tree, but I would feel more
>> comfortable doing that if you had an ack from Yann Morin (CC'ed).
>
> *Poke*

 Hey Yann, wondering if you had any feedback. Thanks.
>>>
>>> Yann has disappeared a year ago. Michal now, informally, keeps an eye on
>>> kconfig related patches.
>>
>> I did not see this. Can you please bounce me the original series?
> 
> Done, let me know if you did not get them

I got them, sorry for the delay. You can add Acked-by: Michal Marek
 if you want.

Michal
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] PCI: Add guard to avoid mapping a invalid msix base address

2015-02-02 Thread Bjorn Helgaas
[+cc Jan]

On Thu, Jan 29, 2015 at 11:54:43AM +0800, Yijing Wang wrote:
> Sometimes, a pci bridge device BAR was not assigned
> properly. After we call pci_bus_assign_resources(), the
> resource of the BAR would be reseted. So if we try to
> enable msix for this device, it will map a invalid
> resource as the msix base address, and a warning call trace
> will report.
> 
> pci_bus_assign_resources()
>   __pci_bus_assign_resources()
>   pbus_assign_resources_sorted()
>   __assign_resources_sorted()
>   assign_requested_resources_sorted()
>   pci_assign_resource() -->fail
>   reset_resource()
> -->res->start/end/flags = 0
> 
> pcie_port_device_register()
>   init_service_irqs()
>   pcie_port_enable_msix()
>   ...
>   msix_capability_init()
>   msix_map_region()
>   phys_addr = 
> pci_resource_start(dev, bir) + table_offset;
> If BAR(index=bir) was not assign properly, pci_resource_start(dev, bir)
> here would return 0, so phys_addr is a invalid physical
> address of msix.
> 
> [   43.094087] [ cut here ]
> [   43.097418] WARNING: CPU: 1 PID: 1800 at arch/arm64/mm/ioremap.c:58 
> __ioremap_caller+0xd4/0xe8()
> ...
> [   43.121694] CPU: 1 PID: 1800 Comm: insmod Tainted: G   O  3.16.0 #5
> [   43.127374] Call trace:
> [   43.128522] [] dump_backtrace+0x0/0x130
> [   43.132637] [] show_stack+0x10/0x1c
> [   43.136402] [] dump_stack+0x74/0x94
> [   43.140166] [] warn_slowpath_common+0x8c/0xb4
> [   43.144804] [] warn_slowpath_null+0x14/0x20
> [   43.149266] [] __ioremap_caller+0xd0/0xe8
> [   43.153555] [] __ioremap+0xc/0x18
> [   43.157145] [] pci_enable_msix+0x238/0x44c
> [   43.161521] [] pci_enable_msix_range+0x34/0x80
> [   43.166243] [] pcie_port_device_register+0x104/0x480
> [   43.171491] [] pcie_portdrv_probe+0x38/0xa0
> [   43.175952] [] pci_device_probe+0x78/0xd4
> [   43.180238] [] really_probe+0x6c/0x22c
> [   43.184265] [] __device_attach+0x5c/0x6c
> [   43.188466] [] bus_for_each_drv+0x50/0x94
> [   43.192755] [] device_attach+0x9c/0xc0
> [   43.196780] [] pci_bus_add_device+0x38/0x80
> [   43.201243] [] pci_bus_add_devices+0x4c/0xd4
> [   43.205791] [] pci_common_init+0x274/0x378
> [   43.210170] [] $x+0xb8c/0xc88 [pcie]
> [   43.214024] [] platform_drv_probe+0x20/0x58
> [   43.218483] [] really_probe+0xc4/0x22c
> [   43.222510] [] __driver_attach+0xa0/0xa8
> [   43.226708] [] bus_for_each_dev+0x54/0x98
> [   43.231000] [] driver_attach+0x1c/0x28
> [   43.235024] [] bus_add_driver+0x14c/0x204
> [   43.239309] [] driver_register+0x64/0x130
> [   43.243598] [] __platform_driver_register+0x5c/0x68
> [   43.248757] [] platform_driver_probe+0x28/0xac
> [   43.253485] [] pcie_init+0x30/0x3c [pcie]
> [   43.258293] [] do_one_initcall+0x88/0x19c
> [   43.262585] [] load_module+0xc2c/0xf2c
> [   43.266609] [] SyS_finit_module+0x78/0x88
> [   43.270897] ---[ end trace ea5eb60837afb5aa ]---
> 
> Reported-by: Zhang Jukuo 
> Tested-by: Zhang Jukuo 
> Signed-off-by: Yijing Wang 

I applied this to pci/msi for v3.20, thanks!  I reworked the changelog as
follows; let me know if it still doesn't make things clear:


commit 6a878e5085fe97bd1e222b7883a1b815fcbbe4ed
Author: Yijing Wang 
Date:   Wed Jan 28 09:52:17 2015 +0800

PCI: Fail MSI-X mappings if there's no space assigned to MSI-X BAR

Unlike MSI, which is configured via registers in the MSI capability in
Configuration Space, MSI-X is configured via tables in Memory Space.
These MSI-X tables are mapped by a device BAR, and if no Memory Space
has been assigned to the BAR, MSI-X cannot be used.

Fail MSI-X setup if no space has been assigned for the BAR.

Previously, we ioremapped the MSI-X table even if the resource hadn't been
assigned.  In this case, the resource address is undefined (and is often
zero), which may lead to warnings or oopses in this path:

  pci_enable_msix
msix_capability_init
  msix_map_region
ioremap_nocache

The PCI core sets resource flags to zero when it can't assign space for the
resource (see reset_resource()).  There are also some cases where it sets
the IORESOURCE_UNSET flag, e.g., pci_reassigndev_resource_alignment(),
pci_assign_resource(), etc.  So we must check for both cases.

[bhelgaas: changelog]
Reported-by: Zhang Jukuo 
Tested-by: Zhang Jukuo 
Signed-off-by: Yijing Wang 
Signed-off-by: Bjorn Helgaas 

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index c489ef2c1a39..34fc4189ebf0 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -298,12 +298,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev 
*dev, int nvec, int type)
map_irq.entry_nr = nve

Re: [patch 0/2] Fix busy-spin of lapic_timer_int_injected with APIC-v

2015-02-02 Thread Paolo Bonzini


On 02/02/2015 18:44, Marcelo Tosatti wrote:
> > Nicer indeed.  It would be good to know what bug made you do this, though.
> 
> kvm-unit-test tscdeadline-latency shows negative delta.
> 
> Do you want me to resend with the modified changelog?

No, it's okay.

Thanks,

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 0/2] Fix busy-spin of lapic_timer_int_injected with APIC-v

2015-02-02 Thread Marcelo Tosatti
On Mon, Feb 02, 2015 at 06:37:08PM +0100, Paolo Bonzini wrote:
> 
> 
> On 02/02/2015 18:26, Marcelo Tosatti wrote:
> > See patches for details.
> 
> Nicer indeed.  It would be good to know what bug made you do this, though.
> 
> Paolo

kvm-unit-test tscdeadline-latency shows negative delta.

Do you want me to resend with the modified changelog?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 0/2] Fix busy-spin of lapic_timer_int_injected with APIC-v

2015-02-02 Thread Paolo Bonzini


On 02/02/2015 18:26, Marcelo Tosatti wrote:
> See patches for details.

Nicer indeed.  It would be good to know what bug made you do this, though.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 0/2] Fix busy-spin of lapic_timer_int_injected with APIC-v

2015-02-02 Thread Marcelo Tosatti
See patches for details.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 1/2] KVM: x86: fix lapic_timer_int_injected with APIC-v

2015-02-02 Thread Marcelo Tosatti
With APICv, LAPIC timer interrupt is always delivered via IRR:
apic_find_highest_irr syncs PIR to IRR.

Signed-off-by: Marcelo Tosatti 

---
 arch/x86/kvm/lapic.c |   12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

Index: kvm/arch/x86/kvm/lapic.c
===
--- kvm.orig/arch/x86/kvm/lapic.c   2015-02-02 14:57:19.903066983 -0200
+++ kvm/arch/x86/kvm/lapic.c2015-02-02 15:25:40.197299923 -0200
@@ -1086,13 +1086,13 @@
 
if (kvm_apic_hw_enabled(apic)) {
int vec = reg & APIC_VECTOR_MASK;
+   void *bitmap = apic->regs + APIC_ISR;
 
-   if (kvm_x86_ops->test_posted_interrupt)
-   return kvm_x86_ops->test_posted_interrupt(vcpu, vec);
-   else {
-   if (apic_test_vector(vec, apic->regs + APIC_ISR))
-   return true;
-   }
+   if (kvm_x86_ops->deliver_posted_interrupt)
+   bitmap = apic->regs + APIC_IRR;
+
+   if (apic_test_vector(vec, bitmap))
+   return true;
}
return false;
 }


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 2/2] KVM: x86: revert "add method to test PIR bitmap vector"

2015-02-02 Thread Marcelo Tosatti
Revert 7c6a98dfa1ba9dc64a62e73624ecea9995736bbd, given 
that testing PIR is not necessary anymore.

Signed-off-by: Marcelo Tosatti 

---
 arch/x86/include/asm/kvm_host.h |1 -
 arch/x86/kvm/vmx.c  |   14 --
 2 files changed, 15 deletions(-)

Index: kvm/arch/x86/include/asm/kvm_host.h
===
--- kvm.orig/arch/x86/include/asm/kvm_host.h2015-02-02 15:19:06.361568508 
-0200
+++ kvm/arch/x86/include/asm/kvm_host.h 2015-02-02 15:19:46.376032645 -0200
@@ -767,7 +767,6 @@
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
-   bool (*test_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
Index: kvm/arch/x86/kvm/vmx.c
===
--- kvm.orig/arch/x86/kvm/vmx.c 2015-02-02 15:19:06.361568508 -0200
+++ kvm/arch/x86/kvm/vmx.c  2015-02-02 15:19:46.377032631 -0200
@@ -438,11 +438,6 @@
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
 
-static int pi_test_pir(int vector, struct pi_desc *pi_desc)
-{
-   return test_bit(vector, (unsigned long *)pi_desc->pir);
-}
-
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
unsigned long host_rsp;
@@ -5908,7 +5903,6 @@
kvm_x86_ops->hwapic_irr_update = NULL;
kvm_x86_ops->hwapic_isr_update = NULL;
kvm_x86_ops->deliver_posted_interrupt = NULL;
-   kvm_x86_ops->test_posted_interrupt = NULL;
kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
}
 
@@ -6994,13 +6988,6 @@
return 1;
 }
 
-static bool vmx_test_pir(struct kvm_vcpu *vcpu, int vector)
-{
-   struct vcpu_vmx *vmx = to_vmx(vcpu);
-
-   return pi_test_pir(vector, &vmx->pi_desc);
-}
-
 static int handle_pml_full(struct kvm_vcpu *vcpu)
 {
unsigned long exit_qualification;
@@ -9753,7 +9740,6 @@
.hwapic_isr_update = vmx_hwapic_isr_update,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
-   .test_posted_interrupt = vmx_test_pir,
 
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 6/6] KVM: nVMX: Enable nested posted interrupt processing

2015-02-02 Thread Paolo Bonzini


On 02/02/2015 16:33, Wincy Van wrote:
> static void vmx_accomp_nested_posted_intr(struct kvm_vcpu *vcpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> 
> if (is_guest_mode(vcpu) &&
> vmx->nested.posted_intr_nv != -1 &&
> pi_test_on(vmx->nested.pi_desc))
> kvm_apic_set_irr(vcpu,
> vmx->nested.posted_intr_nv);
> }
> 
> Then we will get an nested-vmexit in vmx_check_nested_events, that
> posted intr will be handled by L1 immediately.
> This mechanism will also emulate the hardware's behavior: If a posted
> intr was not accomplished by hardware, we will get an interrupt with
> POSTED_INTR_NV.

Yes.

> Would this be better?

I think you do not even need a new bit.  You can use KVM_REQ_EVENT and
(to complete my suggestion, which was not enough) do the above in
vmx_check_nested_events.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 6/6] KVM: nVMX: Enable nested posted interrupt processing

2015-02-02 Thread Wincy Van
On Mon, Feb 2, 2015 at 7:03 PM, Paolo Bonzini  wrote:
>
>
> On 28/01/2015 17:02, Wincy Van wrote:
>> +static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
>> +   int vector)
>> +{
>> +   if (is_guest_mode(vcpu) &&
>> +   vector == to_vmx(vcpu)->nested.posted_intr_nv &&
>> +   vcpu->mode == IN_GUEST_MODE) {
>> +   /* the PIR and ON have been set by L1. */
>
> What happens if there is a L2->L0->L2 exit on the target VCPU, and the
> guest exits before apic->send_IPI_mask sends the IPI?
>
> The L1 hypervisor might "know" somehow that there cannot be a concurrent
> L2->L1->L2 exit, and not do the equivalent of KVM's
>

In non-nested case, if a posted intr was not accomplished by hardware,
we will sync the pir to irr and set rvi to accomplish it.
Current implementation may lead some of the nested posted intrs delay
for a short time(wait for a nested-vmexit).

> kvm_make_request(KVM_REQ_EVENT, vcpu);
>
> after it sets ON.
>
> So I think you have to do something like
>
> static bool vmx_is_nested_posted_interrupt(struct kvm_vcpu *vcpu,
>int vector)
> {
> return (is_guest_mode(vcpu) &&
> vector == to_vmx(vcpu)->nested.posted_intr_nv);
> }
>
> and in vmx_deliver_posted_interrupt:
>
> r = 0;
> if (!vmx_is_nested_posted_interrupt(vcpu, vector)) {
> if (pi_test_and_set_pir(vector, &vmx->pi_desc))
> return;
>
> r = pi_test_and_set_on(&vmx->pi_desc);
> }
> kvm_make_request(KVM_REQ_EVENT, vcpu);
> #ifdef CONFIG_SMP
> if (!r && (vcpu->mode == IN_GUEST_MODE))
> apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
> POSTED_INTR_VECTOR);
> else
> #endif
> kvm_vcpu_kick(vcpu);
>
>
> What do you think?
>

I think that there would be a way to avoid that delay, but may hurt performance:
When doing nested posted intr, we can set a request bit:

   if (is_guest_mode(vcpu) &&
vector == to_vmx(vcpu)->nested.posted_intr_nv &&
vcpu->mode == IN_GUEST_MODE) {
/* the PIR and ON have been set by L1. */
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
+kvm_make_request(KVM_REQ_ACCOMP_POSTED_INTR, vcpu);
return 0;
}

If a posted intr was not accomplished by hardware,  we can check that
bit before checking KVM_REQ_EVENT, and if that bit is set, we can do:

static void vmx_accomp_nested_posted_intr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

if (is_guest_mode(vcpu) &&
vmx->nested.posted_intr_nv != -1 &&
pi_test_on(vmx->nested.pi_desc))
kvm_apic_set_irr(vcpu,
vmx->nested.posted_intr_nv);
}

Then we will get an nested-vmexit in vmx_check_nested_events, that
posted intr will be handled by L1 immediately.
This mechanism will also emulate the hardware's behavior: If a posted
intr was not accomplished by hardware, we will get an interrupt with
POSTED_INTR_NV.

Would this be better?

Thanks,
Wincy
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 17/18] arm/arm64: add smp_boot_secondary

2015-02-02 Thread Andrew Jones
On Mon, Feb 02, 2015 at 01:29:59PM +0100, Andrew Jones wrote:
> Add a common entry point, present/online cpu masks, and
> smp_boot_secondary() to support booting secondary cpus.
> Adds a bit more PSCI API that we need too. We also
> adjust THREAD_START_SP for arm to make some room for
> exception stacks.
> 
> Signed-off-by: Andrew Jones 
> ---
> v3:
> - fix bug in cstart.S:secondary_entry, enable mmu before loading
>   secondary_data to set the stack and exception stacks base
> - remove unnecessary asm-offsets:SECONDARY_DATA_STACK, just require
>   stack to always be the first member of the struct
> v2:
> - Remove secondary_data.exception_stacks. Originally I didn't have
>   the exception stacks at the top of the stack, so this member was
>   useful. After choosing to just use the top of the stack for the
>   base, the member has become unnecessary, and can been removed.
> 
> - Fix bug for exceptions taken in svc mode on a secondary processor.
>   It was using cpu0's exception stacks region instead of its own.

I'm back. And you guessed it, I need a v4. What an embarrassment this
patch is... I actually introduced two bugs with v2, while fixing one.
One step forward, two steps back. I didn't see a problem while testing
this time, but it popped into my head shortly after hitting enter on
git-send-email. (git-send-email is a great trigger for my brain, that's
when all the thinking starts. Too bad it doesn't work with --dry-run.)

So, v2 fixed secondary processors using cpu0's exception stacks, but
broke cpu0's use in the process. My plan had been to leave cpu0 on
the linker setup exception_stacks, but now that the handler looks at
the top of the stack for exception stacks for all cpus, we need to setup
cpu0's exception stacks the same way, and we can remove the one from the
linker script. v4 coming... Hopefully when I hit send my mind will just
go blank this time - like it generally is. I've added a v4 branch to
github as well.

https://github.com/rhdrjones/kvm-unit-tests/tree/arm/smp-v4


> 
>  arm/cstart.S | 41 +-
>  arm/cstart64.S   | 25 +
>  config/config-arm-common.mak |  1 +
>  lib/arm/asm-offsets.c|  1 +
>  lib/arm/asm/psci.h   |  2 ++
>  lib/arm/asm/smp.h| 49 
>  lib/arm/asm/thread_info.h| 26 ++
>  lib/arm/psci.c   | 19 
>  lib/arm/setup.c  |  8 +--
>  lib/arm/smp.c| 53 
> 
>  lib/arm64/asm-offsets.c  |  1 +
>  lib/arm64/asm/psci.h |  2 ++
>  lib/arm64/asm/smp.h  |  1 +
>  13 files changed, 217 insertions(+), 12 deletions(-)
>  create mode 100644 lib/arm/asm/smp.h
>  create mode 100644 lib/arm/smp.c
>  create mode 100644 lib/arm64/asm/smp.h
> 
> diff --git a/arm/cstart.S b/arm/cstart.S
> index 08a0b3ecc61f6..3839549742fee 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -6,10 +6,13 @@
>   * This work is licensed under the terms of the GNU LGPL, version 2.
>   */
>  #define __ASSEMBLY__
> +#include 
>  #include 
>  #include 
>  #include 
>  
> +#define THREAD_START_SP ((THREAD_SIZE - S_FRAME_SIZE * 8) & ~7)
> +
>  .arm
>  
>  .section .init
> @@ -32,6 +35,7 @@ start:
>   push{r0-r1}
>  
>   /* set up vector table and mode stacks */
> + ldr r0, =exception_stacks
>   bl  exceptions_init
>  
>   /* complete setup */
> @@ -62,13 +66,12 @@ exceptions_init:
>   mcr p15, 0, r2, c12, c0, 0  @ write VBAR
>  
>   mrs r2, cpsr
> - ldr r1, =exception_stacks
>  
>   /* first frame reserved for svc mode */
> - set_mode_stack  UND_MODE, r1
> - set_mode_stack  ABT_MODE, r1
> - set_mode_stack  IRQ_MODE, r1
> - set_mode_stack  FIQ_MODE, r1
> + set_mode_stack  UND_MODE, r0
> + set_mode_stack  ABT_MODE, r0
> + set_mode_stack  IRQ_MODE, r0
> + set_mode_stack  FIQ_MODE, r0
>  
>   msr cpsr_cxsf, r2   @ back to svc mode
>   isb
> @@ -76,6 +79,30 @@ exceptions_init:
>  
>  .text
>  
> +.global secondary_entry
> +secondary_entry:
> + /* enable the MMU */
> + mov r1, #0
> + ldr r0, =mmu_idmap
> + ldr r0, [r0]
> + bl  asm_mmu_enable
> +
> + /*
> +  * Set the stack, and set up vector table
> +  * and exception stacks. Exception stacks
> +  * space starts at stack top and grows up.
> +  */
> + ldr r4, =secondary_data
> + ldr r0, [r4]
> + mov sp, r0
> + bl  exceptions_init
> +
> + /* finish init in C code */
> + bl  secondary_cinit
> +
> + /* r0 is now the entry function, run it */
> + mov pc, r0
> +
>  .globl halt
>  halt:
>  1:   wfi
> @@ -168,7 +195,9 @@ vector_svc:
>* and spsr_ (parent CPSR)
>*/
>   push{ r1 }
> - ldr r1, =exception_stacks
> 

[PATCH v4 17/18] arm/arm64: add smp_boot_secondary

2015-02-02 Thread Andrew Jones
Add a common entry point, present/online cpu masks, and
smp_boot_secondary() to support booting secondary cpus.
Adds a bit more PSCI API that we need too. We also
adjust THREAD_START_SP for arm to make some room for
exception stacks.

Signed-off-by: Andrew Jones 
---
v4:
- Fix yet another bug... v2 fixed secondary processors using cpu0's
  exception stacks, but broke cpu0 in the process. Until now,
  my plan was to just leave cpu0 on 'exception_stacks', set up by
  the linker, but v2's change means we can't do that. So set the
  initial stack up correctly instead.
- Also remove the smp.h includes from arm[64]/asm-offsets.c, they
  should have been removed with the 2nd change made for v3 of this
  patch.
v3:
- fix bug in cstart.S:secondary_entry, enable mmu before loading
  secondary_data to set the stack and exception stacks base
- remove unnecessary asm-offsets:SECONDARY_DATA_STACK, just require
  stack to always be the first member of the struct
v2:
- Remove secondary_data.exception_stacks. Originally I didn't have
  the exception stacks at the top of the stack, so this member was
  useful. After choosing to just use the top of the stack for the
  base, the member has become unnecessary, and can been removed.

- Fix bug for exceptions taken in svc mode on a secondary processor.
  It was using cpu0's exception stacks region instead of its own.

 arm/cstart.S | 59 ++--
 arm/cstart64.S   | 25 +++
 arm/flat.lds |  3 ---
 config/config-arm-common.mak |  1 +
 lib/arm/asm/psci.h   |  2 ++
 lib/arm/asm/smp.h| 49 
 lib/arm/asm/thread_info.h| 26 ---
 lib/arm/psci.c   | 19 ++
 lib/arm/setup.c  |  8 --
 lib/arm/smp.c| 53 +++
 lib/arm64/asm/psci.h |  2 ++
 lib/arm64/asm/smp.h  |  1 +
 12 files changed, 232 insertions(+), 16 deletions(-)
 create mode 100644 lib/arm/asm/smp.h
 create mode 100644 lib/arm/smp.c
 create mode 100644 lib/arm64/asm/smp.h

diff --git a/arm/cstart.S b/arm/cstart.S
index 08a0b3ecc61f6..2b7f8f3d200ef 100644
--- a/arm/cstart.S
+++ b/arm/cstart.S
@@ -6,10 +6,13 @@
  * This work is licensed under the terms of the GNU LGPL, version 2.
  */
 #define __ASSEMBLY__
+#include 
 #include 
 #include 
 #include 
 
+#define THREAD_START_SP ((THREAD_SIZE - S_FRAME_SIZE * 8) & ~7)
+
 .arm
 
 .section .init
@@ -17,6 +20,22 @@
 .globl start
 start:
/*
+* set stack, making room at top of stack for cpu0's
+* exception stacks. Must start wtih stackptr, not
+* stacktop, so the thread size masking (shifts) work.
+*/
+   ldr sp, =stackptr
+   lsr sp, #THREAD_SHIFT
+   lsl sp, #THREAD_SHIFT
+   add sp, #THREAD_START_SP
+
+   /*
+* save sp before pushing anything on the stack
+* lr makes a good temp register right now
+*/
+   mov lr, sp
+
+   /*
 * bootloader params are in r0-r2
 * See the kernel doc Documentation/arm/Booting
 *   r0 = 0
@@ -27,11 +46,12 @@ start:
 * put the dtb in r0. This allows setup to be consistent
 * with arm64.
 */
-   ldr sp, =stackptr
mov r0, r2
push{r0-r1}
 
/* set up vector table and mode stacks */
+   mov r0, lr  @ lr is stack top (see above),
+   @ which is the exception stacks base
bl  exceptions_init
 
/* complete setup */
@@ -62,13 +82,12 @@ exceptions_init:
mcr p15, 0, r2, c12, c0, 0  @ write VBAR
 
mrs r2, cpsr
-   ldr r1, =exception_stacks
 
/* first frame reserved for svc mode */
-   set_mode_stack  UND_MODE, r1
-   set_mode_stack  ABT_MODE, r1
-   set_mode_stack  IRQ_MODE, r1
-   set_mode_stack  FIQ_MODE, r1
+   set_mode_stack  UND_MODE, r0
+   set_mode_stack  ABT_MODE, r0
+   set_mode_stack  IRQ_MODE, r0
+   set_mode_stack  FIQ_MODE, r0
 
msr cpsr_cxsf, r2   @ back to svc mode
isb
@@ -76,6 +95,30 @@ exceptions_init:
 
 .text
 
+.global secondary_entry
+secondary_entry:
+   /* enable the MMU */
+   mov r1, #0
+   ldr r0, =mmu_idmap
+   ldr r0, [r0]
+   bl  asm_mmu_enable
+
+   /*
+* Set the stack, and set up vector table
+* and exception stacks. Exception stacks
+* space starts at stack top and grows up.
+*/
+   ldr r1, =secondary_data
+   ldr r0, [r1]
+   mov sp, r0
+   bl  exceptions_init
+
+   /* finish init in C code */
+   bl  secondary_cinit
+
+   /* r0 is now the entry function, run it */
+   mov pc, r0
+
 .globl halt
 halt:
 1:

Re: [PATCH 2/8] KVM: x86: cleanup kvm_apic_match_*()

2015-02-02 Thread Radim Krčmář
2015-02-02 15:28+0100, Paolo Bonzini:
> 
> 
> On 02/02/2015 15:26, Radim Krčmář wrote:
> >>> > > + return ((logical_id >> 4) == (mda >> 4))
> >>> > > +&& (logical_id & mda & 0xf);
> > was merged as
> > 
> > +   return ((logical_id >> 4) == (mda >> 4))
> > +  && (logical_id & mda & 0xf) != 0;
> > 
> > but it has to be parenthesized ('&&' has lower precedence than '!=').
> 
> Lower precedence means that the merged version is right (unless my brain
> went bonkers, which I cannot exclude).  "!=" has higher precedence and
> thus it is implicitly parenthesized.
> 
> In fact the first comparison could have its parentheses removed as well.

Yes, it could be,
  logical_id >> 4 == mda >> 4 && (logical_id & mda & 0xf) != 0

I missed the point and thought that you wanted to turn the whole
expression into bool, when it already was one.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/8] KVM: x86: cleanup kvm_apic_match_*()

2015-02-02 Thread Paolo Bonzini


On 02/02/2015 15:26, Radim Krčmář wrote:
>>> > > +   return ((logical_id >> 4) == (mda >> 4))
>>> > > +  && (logical_id & mda & 0xf);
> was merged as
> 
> + return ((logical_id >> 4) == (mda >> 4))
> +&& (logical_id & mda & 0xf) != 0;
> 
> but it has to be parenthesized ('&&' has lower precedence than '!=').

Lower precedence means that the merged version is right (unless my brain
went bonkers, which I cannot exclude).  "!=" has higher precedence and
thus it is implicitly parenthesized.

In fact the first comparison could have its parentheses removed as well.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/8] KVM: x86: cleanup kvm_apic_match_*()

2015-02-02 Thread Radim Krčmář
2015-02-02 15:26+0100, Radim Krčmář:
> 2015-01-30 09:52+0100, Paolo Bonzini:
> + return ((logical_id >> 4) == (mda >> 4))
> +&& (logical_id & mda & 0xf) != 0;
> 
> but it has to be parenthesized ('&&' has lower precedence than '!=').

No, my bad, I understood it now.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/8] KVM: x86: cleanup kvm_apic_match_*()

2015-02-02 Thread Radim Krčmář
2015-01-30 09:52+0100, Paolo Bonzini:
> On 29/01/2015 22:48, Radim Krčmář wrote:
> > The majority of this patch turns
> >   result = 0; if (CODE) result = 1; return result;
> > into
> >   return CODE;
> > because we return bool now.
> 
> Added a bunch of "!= 0" to avoid automagic conversion to bool.

(I haven't checked it earlier ... sorry.)

> > -   if (((logical_id >> 4) == (mda >> 0x4))
> > -   && (logical_id & mda & 0xf))
> > -   result = 1;
> > -   break;
> > +   return ((logical_id >> 4) == (mda >> 4))
> > +  && (logical_id & mda & 0xf);

was merged as

+   return ((logical_id >> 4) == (mda >> 4))
+  && (logical_id & mda & 0xf) != 0;

but it has to be parenthesized ('&&' has lower precedence than '!=').
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 17/18] arm/arm64: add smp_boot_secondary

2015-02-02 Thread Andrew Jones
Add a common entry point, present/online cpu masks, and
smp_boot_secondary() to support booting secondary cpus.
Adds a bit more PSCI API that we need too. We also
adjust THREAD_START_SP for arm to make some room for
exception stacks.

Signed-off-by: Andrew Jones 
---
v3:
- fix bug in cstart.S:secondary_entry, enable mmu before loading
  secondary_data to set the stack and exception stacks base
- remove unnecessary asm-offsets:SECONDARY_DATA_STACK, just require
  stack to always be the first member of the struct
v2:
- Remove secondary_data.exception_stacks. Originally I didn't have
  the exception stacks at the top of the stack, so this member was
  useful. After choosing to just use the top of the stack for the
  base, the member has become unnecessary, and can been removed.

- Fix bug for exceptions taken in svc mode on a secondary processor.
  It was using cpu0's exception stacks region instead of its own.

 arm/cstart.S | 41 +-
 arm/cstart64.S   | 25 +
 config/config-arm-common.mak |  1 +
 lib/arm/asm-offsets.c|  1 +
 lib/arm/asm/psci.h   |  2 ++
 lib/arm/asm/smp.h| 49 
 lib/arm/asm/thread_info.h| 26 ++
 lib/arm/psci.c   | 19 
 lib/arm/setup.c  |  8 +--
 lib/arm/smp.c| 53 
 lib/arm64/asm-offsets.c  |  1 +
 lib/arm64/asm/psci.h |  2 ++
 lib/arm64/asm/smp.h  |  1 +
 13 files changed, 217 insertions(+), 12 deletions(-)
 create mode 100644 lib/arm/asm/smp.h
 create mode 100644 lib/arm/smp.c
 create mode 100644 lib/arm64/asm/smp.h

diff --git a/arm/cstart.S b/arm/cstart.S
index 08a0b3ecc61f6..3839549742fee 100644
--- a/arm/cstart.S
+++ b/arm/cstart.S
@@ -6,10 +6,13 @@
  * This work is licensed under the terms of the GNU LGPL, version 2.
  */
 #define __ASSEMBLY__
+#include 
 #include 
 #include 
 #include 
 
+#define THREAD_START_SP ((THREAD_SIZE - S_FRAME_SIZE * 8) & ~7)
+
 .arm
 
 .section .init
@@ -32,6 +35,7 @@ start:
push{r0-r1}
 
/* set up vector table and mode stacks */
+   ldr r0, =exception_stacks
bl  exceptions_init
 
/* complete setup */
@@ -62,13 +66,12 @@ exceptions_init:
mcr p15, 0, r2, c12, c0, 0  @ write VBAR
 
mrs r2, cpsr
-   ldr r1, =exception_stacks
 
/* first frame reserved for svc mode */
-   set_mode_stack  UND_MODE, r1
-   set_mode_stack  ABT_MODE, r1
-   set_mode_stack  IRQ_MODE, r1
-   set_mode_stack  FIQ_MODE, r1
+   set_mode_stack  UND_MODE, r0
+   set_mode_stack  ABT_MODE, r0
+   set_mode_stack  IRQ_MODE, r0
+   set_mode_stack  FIQ_MODE, r0
 
msr cpsr_cxsf, r2   @ back to svc mode
isb
@@ -76,6 +79,30 @@ exceptions_init:
 
 .text
 
+.global secondary_entry
+secondary_entry:
+   /* enable the MMU */
+   mov r1, #0
+   ldr r0, =mmu_idmap
+   ldr r0, [r0]
+   bl  asm_mmu_enable
+
+   /*
+* Set the stack, and set up vector table
+* and exception stacks. Exception stacks
+* space starts at stack top and grows up.
+*/
+   ldr r4, =secondary_data
+   ldr r0, [r4]
+   mov sp, r0
+   bl  exceptions_init
+
+   /* finish init in C code */
+   bl  secondary_cinit
+
+   /* r0 is now the entry function, run it */
+   mov pc, r0
+
 .globl halt
 halt:
 1: wfi
@@ -168,7 +195,9 @@ vector_svc:
 * and spsr_ (parent CPSR)
 */
push{ r1 }
-   ldr r1, =exception_stacks
+   lsr r1, sp, #THREAD_SHIFT
+   lsl r1, #THREAD_SHIFT
+   add r1, #THREAD_START_SP
str r0, [r1, #S_R0]
pop { r0 }
str r0, [r1, #S_R1]
diff --git a/arm/cstart64.S b/arm/cstart64.S
index 58e4040cfb40f..cdda13c17af9e 100644
--- a/arm/cstart64.S
+++ b/arm/cstart64.S
@@ -55,6 +55,31 @@ exceptions_init:
 
 .text
 
+.globl secondary_entry
+secondary_entry:
+   /* Enable FP/ASIMD */
+   mov x0, #(3 << 20)
+   msr cpacr_el1, x0
+
+   /* set up exception handling */
+   bl  exceptions_init
+
+   /* enable the MMU */
+   adr x0, mmu_idmap
+   ldr x0, [x0]
+   bl  asm_mmu_enable
+
+   /* set the stack */
+   adr x1, secondary_data
+   ldr x0, [x1]
+   mov sp, x0
+
+   /* finish init in C code */
+   bl  secondary_cinit
+
+   /* x0 is now the entry function, run it */
+   br  x0
+
 .globl halt
 halt:
 1: wfi
diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak
index 13f5338a35a02..314261ef60cf7 100644
--- a/config/config-arm-common.mak
+++ b/config/config-arm-common.mak
@@ -36,6 +36,7 @@ cflatobjs += lib/arm/setup.o
 c

Re: [PATCH v2 17/18] arm/arm64: add smp_boot_secondary

2015-02-02 Thread Andrew Jones
On Mon, Feb 02, 2015 at 11:23:28AM +0100, Andrew Jones wrote:
> Add a common entry point, present/online cpu masks, and
> smp_boot_secondary() to support booting secondary cpus.
> Adds a bit more PSCI API that we need too. We also
> adjust THREAD_START_SP for arm to make some room for
> exception stacks.
> 
> Signed-off-by: Andrew Jones 
> ---
> v2:
> - Remove secondary_data.exception_stacks. Originally I didn't have
>   the exception stacks at the top of the stack, so this member was
>   useful. After choosing to just use the top of the stack for the
>   base, the member has become unnecessary, and can been removed.
> 
> - Fix bug for exceptions taken in svc mode on a secondary processor.
>   It was using cpu0's exception stacks region instead of its own.

Drat. Just tested this v2 on hardware and see I introduced a bug with
it. See below

> 
>  arm/cstart.S | 41 +-
>  arm/cstart64.S   | 25 +
>  config/config-arm-common.mak |  1 +
>  lib/arm/asm-offsets.c|  2 ++
>  lib/arm/asm/psci.h   |  2 ++
>  lib/arm/asm/smp.h| 49 
>  lib/arm/asm/thread_info.h| 26 ++
>  lib/arm/psci.c   | 19 
>  lib/arm/setup.c  |  8 +--
>  lib/arm/smp.c| 53 
> 
>  lib/arm64/asm-offsets.c  |  2 ++
>  lib/arm64/asm/psci.h |  2 ++
>  lib/arm64/asm/smp.h  |  1 +
>  13 files changed, 219 insertions(+), 12 deletions(-)
>  create mode 100644 lib/arm/asm/smp.h
>  create mode 100644 lib/arm/smp.c
>  create mode 100644 lib/arm64/asm/smp.h
> 
> diff --git a/arm/cstart.S b/arm/cstart.S
> index 08a0b3ecc61f6..5b0b8e62cec65 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -6,10 +6,13 @@
>   * This work is licensed under the terms of the GNU LGPL, version 2.
>   */
>  #define __ASSEMBLY__
> +#include 
>  #include 
>  #include 
>  #include 
>  
> +#define THREAD_START_SP ((THREAD_SIZE - S_FRAME_SIZE * 8) & ~7)
> +
>  .arm
>  
>  .section .init
> @@ -32,6 +35,7 @@ start:
>   push{r0-r1}
>  
>   /* set up vector table and mode stacks */
> + ldr r0, =exception_stacks
>   bl  exceptions_init
>  
>   /* complete setup */
> @@ -62,13 +66,12 @@ exceptions_init:
>   mcr p15, 0, r2, c12, c0, 0  @ write VBAR
>  
>   mrs r2, cpsr
> - ldr r1, =exception_stacks
>  
>   /* first frame reserved for svc mode */
> - set_mode_stack  UND_MODE, r1
> - set_mode_stack  ABT_MODE, r1
> - set_mode_stack  IRQ_MODE, r1
> - set_mode_stack  FIQ_MODE, r1
> + set_mode_stack  UND_MODE, r0
> + set_mode_stack  ABT_MODE, r0
> + set_mode_stack  IRQ_MODE, r0
> + set_mode_stack  FIQ_MODE, r0
>  
>   msr cpsr_cxsf, r2   @ back to svc mode
>   isb
> @@ -76,6 +79,30 @@ exceptions_init:
>  
>  .text
>  
> +.global secondary_entry
> +secondary_entry:
> + /*
> +  * Set the stack, and set up vector table
> +  * and exception stacks. Exception stacks
> +  * space starts at stack top and grows up.
> +  */
> + ldr r4, =secondary_data
> + ldr r0, [r4, #SECONDARY_DATA_STACK]
> + mov sp, r0

Moving the stack setup from just above the call to secondary_cinit
to here leads to sp := 0 on hardware.  I should only load secondary_data
after the mmu is setup.  Actually, I didn't notice that the exception
stacks base was getting set to zero in v1, as I hadn't tested it. The
fix for the stack will now fix them too though. I have a v3 ready to
send, which has been tested on hardware, and with exceptions tested on
secondaries. Sending...

> + bl  exceptions_init
> +
> + /* enable the MMU */
> + mov r1, #0
> + ldr r0, =mmu_idmap
> + ldr r0, [r0]
> + bl  asm_mmu_enable
> +
> + /* finish init in C code */
> + bl  secondary_cinit
> +
> + /* r0 is now the entry function, run it */
> + mov pc, r0
> +
>  .globl halt
>  halt:
>  1:   wfi
> @@ -168,7 +195,9 @@ vector_svc:
>* and spsr_ (parent CPSR)
>*/
>   push{ r1 }
> - ldr r1, =exception_stacks
> + lsr r1, sp, #THREAD_SHIFT
> + lsl r1, #THREAD_SHIFT
> + add r1, #THREAD_START_SP
>   str r0, [r1, #S_R0]
>   pop { r0 }
>   str r0, [r1, #S_R1]
> diff --git a/arm/cstart64.S b/arm/cstart64.S
> index 58e4040cfb40f..b4d7f1939793b 100644
> --- a/arm/cstart64.S
> +++ b/arm/cstart64.S
> @@ -55,6 +55,31 @@ exceptions_init:
>  
>  .text
>  
> +.globl secondary_entry
> +secondary_entry:
> + /* Enable FP/ASIMD */
> + mov x0, #(3 << 20)
> + msr cpacr_el1, x0
> +
> + /* set up exception handling */
> + bl  exceptions_init
> +
> + /* enable the MMU */
> + adr x0, mmu_idmap
> + ldr x0, [x0]
> + bl  asm_mmu_ena

Re: [PATCH v4 6/6] KVM: nVMX: Enable nested posted interrupt processing

2015-02-02 Thread Paolo Bonzini


On 28/01/2015 17:02, Wincy Van wrote:
> +static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
> +   int vector)
> +{
> +   if (is_guest_mode(vcpu) &&
> +   vector == to_vmx(vcpu)->nested.posted_intr_nv &&
> +   vcpu->mode == IN_GUEST_MODE) {
> +   /* the PIR and ON have been set by L1. */

What happens if there is a L2->L0->L2 exit on the target VCPU, and the
guest exits before apic->send_IPI_mask sends the IPI?

The L1 hypervisor might "know" somehow that there cannot be a concurrent
L2->L1->L2 exit, and not do the equivalent of KVM's

kvm_make_request(KVM_REQ_EVENT, vcpu);

after it sets ON.

So I think you have to do something like

static bool vmx_is_nested_posted_interrupt(struct kvm_vcpu *vcpu,
   int vector)
{
return (is_guest_mode(vcpu) &&
vector == to_vmx(vcpu)->nested.posted_intr_nv);
}

and in vmx_deliver_posted_interrupt:

r = 0;
if (!vmx_is_nested_posted_interrupt(vcpu, vector)) {
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
return;

r = pi_test_and_set_on(&vmx->pi_desc);
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
#ifdef CONFIG_SMP
if (!r && (vcpu->mode == IN_GUEST_MODE))
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
else
#endif
kvm_vcpu_kick(vcpu);


What do you think?

Paolo

> +   apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
> +   POSTED_INTR_VECTOR);
> +   return 0;
> +   }
> +   return -1;
> +}
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 17/18] arm/arm64: add smp_boot_secondary

2015-02-02 Thread Andrew Jones
Add a common entry point, present/online cpu masks, and
smp_boot_secondary() to support booting secondary cpus.
Adds a bit more PSCI API that we need too. We also
adjust THREAD_START_SP for arm to make some room for
exception stacks.

Signed-off-by: Andrew Jones 
---
v2:
- Remove secondary_data.exception_stacks. Originally I didn't have
  the exception stacks at the top of the stack, so this member was
  useful. After choosing to just use the top of the stack for the
  base, the member has become unnecessary, and can been removed.

- Fix bug for exceptions taken in svc mode on a secondary processor.
  It was using cpu0's exception stacks region instead of its own.

 arm/cstart.S | 41 +-
 arm/cstart64.S   | 25 +
 config/config-arm-common.mak |  1 +
 lib/arm/asm-offsets.c|  2 ++
 lib/arm/asm/psci.h   |  2 ++
 lib/arm/asm/smp.h| 49 
 lib/arm/asm/thread_info.h| 26 ++
 lib/arm/psci.c   | 19 
 lib/arm/setup.c  |  8 +--
 lib/arm/smp.c| 53 
 lib/arm64/asm-offsets.c  |  2 ++
 lib/arm64/asm/psci.h |  2 ++
 lib/arm64/asm/smp.h  |  1 +
 13 files changed, 219 insertions(+), 12 deletions(-)
 create mode 100644 lib/arm/asm/smp.h
 create mode 100644 lib/arm/smp.c
 create mode 100644 lib/arm64/asm/smp.h

diff --git a/arm/cstart.S b/arm/cstart.S
index 08a0b3ecc61f6..5b0b8e62cec65 100644
--- a/arm/cstart.S
+++ b/arm/cstart.S
@@ -6,10 +6,13 @@
  * This work is licensed under the terms of the GNU LGPL, version 2.
  */
 #define __ASSEMBLY__
+#include 
 #include 
 #include 
 #include 
 
+#define THREAD_START_SP ((THREAD_SIZE - S_FRAME_SIZE * 8) & ~7)
+
 .arm
 
 .section .init
@@ -32,6 +35,7 @@ start:
push{r0-r1}
 
/* set up vector table and mode stacks */
+   ldr r0, =exception_stacks
bl  exceptions_init
 
/* complete setup */
@@ -62,13 +66,12 @@ exceptions_init:
mcr p15, 0, r2, c12, c0, 0  @ write VBAR
 
mrs r2, cpsr
-   ldr r1, =exception_stacks
 
/* first frame reserved for svc mode */
-   set_mode_stack  UND_MODE, r1
-   set_mode_stack  ABT_MODE, r1
-   set_mode_stack  IRQ_MODE, r1
-   set_mode_stack  FIQ_MODE, r1
+   set_mode_stack  UND_MODE, r0
+   set_mode_stack  ABT_MODE, r0
+   set_mode_stack  IRQ_MODE, r0
+   set_mode_stack  FIQ_MODE, r0
 
msr cpsr_cxsf, r2   @ back to svc mode
isb
@@ -76,6 +79,30 @@ exceptions_init:
 
 .text
 
+.global secondary_entry
+secondary_entry:
+   /*
+* Set the stack, and set up vector table
+* and exception stacks. Exception stacks
+* space starts at stack top and grows up.
+*/
+   ldr r4, =secondary_data
+   ldr r0, [r4, #SECONDARY_DATA_STACK]
+   mov sp, r0
+   bl  exceptions_init
+
+   /* enable the MMU */
+   mov r1, #0
+   ldr r0, =mmu_idmap
+   ldr r0, [r0]
+   bl  asm_mmu_enable
+
+   /* finish init in C code */
+   bl  secondary_cinit
+
+   /* r0 is now the entry function, run it */
+   mov pc, r0
+
 .globl halt
 halt:
 1: wfi
@@ -168,7 +195,9 @@ vector_svc:
 * and spsr_ (parent CPSR)
 */
push{ r1 }
-   ldr r1, =exception_stacks
+   lsr r1, sp, #THREAD_SHIFT
+   lsl r1, #THREAD_SHIFT
+   add r1, #THREAD_START_SP
str r0, [r1, #S_R0]
pop { r0 }
str r0, [r1, #S_R1]
diff --git a/arm/cstart64.S b/arm/cstart64.S
index 58e4040cfb40f..b4d7f1939793b 100644
--- a/arm/cstart64.S
+++ b/arm/cstart64.S
@@ -55,6 +55,31 @@ exceptions_init:
 
 .text
 
+.globl secondary_entry
+secondary_entry:
+   /* Enable FP/ASIMD */
+   mov x0, #(3 << 20)
+   msr cpacr_el1, x0
+
+   /* set up exception handling */
+   bl  exceptions_init
+
+   /* enable the MMU */
+   adr x0, mmu_idmap
+   ldr x0, [x0]
+   bl  asm_mmu_enable
+
+   /* set the stack */
+   adr x1, secondary_data
+   ldr x0, [x1, #SECONDARY_DATA_STACK]
+   mov sp, x0
+
+   /* finish init in C code */
+   bl  secondary_cinit
+
+   /* x0 is now the entry function, run it */
+   br  x0
+
 .globl halt
 halt:
 1: wfi
diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak
index 13f5338a35a02..314261ef60cf7 100644
--- a/config/config-arm-common.mak
+++ b/config/config-arm-common.mak
@@ -36,6 +36,7 @@ cflatobjs += lib/arm/setup.o
 cflatobjs += lib/arm/mmu.o
 cflatobjs += lib/arm/bitops.o
 cflatobjs += lib/arm/psci.o
+cflatobjs += lib/arm/smp.o
 
 libeabi = lib/arm/libeabi.a
 eabiobjs = lib/arm/eabi_compat.o
diff --git a/lib/arm/asm-offsets.c b/l

Re: [PATCH 1/4] stubs for xsavec support

2015-02-02 Thread Paolo Bonzini


On 02/02/2015 08:04, Jan Kiszka wrote:
>> > +#if X86_FEATURE_XSAVEOPT < 10 * 32
>> > +#undef X86_FEATURE_XSAVEOPT
>> > +#endif
>> > +#define X86_FEATURE_XSAVEOPT  (10*32+0) /* XSAVEOPT instruction */
> This causes redefinition warnings if the condition is not met. Was the
> plan to put the define before the #endif?

The plan was to match the kernel's definition, which however has a space:

#define X86_FEATURE_XSAVEOPT(10*32+ 0)

But putting the define before the #endif also works.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvmppc: Implement H_LOGICAL_CI_{LOAD,STORE} in KVM

2015-02-02 Thread David Gibson
On Mon, Feb 02, 2015 at 09:59:16AM +0100, Paolo Bonzini wrote:
> 
> 
> On 02/02/2015 08:45, David Gibson wrote:
> > +   case H_LOGICAL_CI_LOAD:
> > +   ret = kvmppc_h_logical_ci_load(vcpu);
> > +   if (ret == H_TOO_HARD) {
> > +   printk("Punting H_LOGICAL_CI_LOAD\n");
> > +   return RESUME_HOST;
> > +   }
> > +   break;
> > +   case H_LOGICAL_CI_STORE:
> > +   ret = kvmppc_h_logical_ci_store(vcpu);
> > +   if (ret == H_TOO_HARD) {
> > +   printk("Punting H_LOGICAL_CI_STORE\n");
> > +   return RESUME_HOST;
> > +   }
> > +   break;
> > case H_SET_MODE:
> > ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
> > kvmppc_get_gpr(vcpu, 5),
> 
> KERN_DEBUG I guess?  Or even no printk at all perhaps.

Oh, bugger, removed most of the debug code, but not all of it.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


pgpkeMd8TAcW1.pgp
Description: PGP signature


Re: [PATCH] kvmppc: Implement H_LOGICAL_CI_{LOAD,STORE} in KVM

2015-02-02 Thread Paolo Bonzini


On 02/02/2015 08:45, David Gibson wrote:
> + case H_LOGICAL_CI_LOAD:
> + ret = kvmppc_h_logical_ci_load(vcpu);
> + if (ret == H_TOO_HARD) {
> + printk("Punting H_LOGICAL_CI_LOAD\n");
> + return RESUME_HOST;
> + }
> + break;
> + case H_LOGICAL_CI_STORE:
> + ret = kvmppc_h_logical_ci_store(vcpu);
> + if (ret == H_TOO_HARD) {
> + printk("Punting H_LOGICAL_CI_STORE\n");
> + return RESUME_HOST;
> + }
> + break;
>   case H_SET_MODE:
>   ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
>   kvmppc_get_gpr(vcpu, 5),

KERN_DEBUG I guess?  Or even no printk at all perhaps.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/8] vhost/scsi: Add ANY_LAYOUT support

2015-02-02 Thread Christoph Hellwig
Hi Nic,

Al has been rewriting the vhost code to use iov_iter primitives,
can you please rebase it on top of that istead of using the obsolete
infrastructure?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 15/18] vhost: switch vhost get_indirect() to iov_iter, kill memcpy_fromiovec()

2015-02-02 Thread Al Viro
From: Al Viro 

Cc: Michael S. Tsirkin 
Cc: kvm@vger.kernel.org
Signed-off-by: Al Viro 
---
 drivers/vhost/vhost.c |  6 --
 include/linux/uio.h   |  1 -
 lib/iovec.c   | 25 -
 3 files changed, 4 insertions(+), 28 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index cb807d0..2ee2826 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1125,6 +1125,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
struct vring_desc desc;
unsigned int i = 0, count, found = 0;
u32 len = vhost32_to_cpu(vq, indirect->len);
+   struct iov_iter from;
int ret;
 
/* Sanity check */
@@ -1142,6 +1143,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
vq_err(vq, "Translation failure %d in indirect.\n", ret);
return ret;
}
+   iov_iter_init(&from, READ, vq->indirect, ret, len);
 
/* We will use the result as an address to read from, so most
 * architectures only need a compiler barrier here. */
@@ -1164,8 +1166,8 @@ static int get_indirect(struct vhost_virtqueue *vq,
   i, count);
return -EINVAL;
}
-   if (unlikely(memcpy_fromiovec((unsigned char *)&desc,
- vq->indirect, sizeof desc))) {
+   if (unlikely(copy_from_iter(&desc, sizeof(desc), &from) !=
+sizeof(desc))) {
vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
   i, (size_t)vhost64_to_cpu(vq, indirect->addr) + 
i * sizeof desc);
return -EINVAL;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 1c5e453..af3439f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -135,7 +135,6 @@ static inline void iov_iter_reexpand(struct iov_iter *i, 
size_t count)
 size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct 
iov_iter *i);
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct 
iov_iter *i);
 
-int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
int offset, int len);
 int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
diff --git a/lib/iovec.c b/lib/iovec.c
index 2d99cb4..4a90875 100644
--- a/lib/iovec.c
+++ b/lib/iovec.c
@@ -3,31 +3,6 @@
 #include 
 
 /*
- * Copy iovec to kernel. Returns -EFAULT on error.
- *
- * Note: this modifies the original iovec.
- */
-
-int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
-{
-   while (len > 0) {
-   if (iov->iov_len) {
-   int copy = min_t(unsigned int, len, iov->iov_len);
-   if (copy_from_user(kdata, iov->iov_base, copy))
-   return -EFAULT;
-   len -= copy;
-   kdata += copy;
-   iov->iov_base += copy;
-   iov->iov_len -= copy;
-   }
-   iov++;
-   }
-
-   return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovec);
-
-/*
  * Copy kernel to iovec. Returns -EFAULT on error.
  */
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 18/18] vhost: vhost_scsi_handle_vq() should just use copy_from_user()

2015-02-02 Thread Al Viro
From: Al Viro 

it has just verified that it asks no more than the length of the
first segment of iovec.

And with that the last user of stuff in lib/iovec.c is gone.
RIP.

Cc: Michael S. Tsirkin 
Cc: Nicholas A. Bellinger 
Cc: kvm@vger.kernel.org
Signed-off-by: Al Viro 
---
 drivers/vhost/scsi.c |  2 +-
 include/linux/uio.h  |  2 --
 lib/Makefile |  2 +-
 lib/iovec.c  | 36 
 4 files changed, 2 insertions(+), 40 deletions(-)
 delete mode 100644 lib/iovec.c

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index d695b16..dc78d87 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1079,7 +1079,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct 
vhost_virtqueue *vq)
   req_size, vq->iov[0].iov_len);
break;
}
-   ret = memcpy_fromiovecend(req, &vq->iov[0], 0, req_size);
+   ret = copy_from_user(req, vq->iov[0].iov_base, req_size);
if (unlikely(ret)) {
vq_err(vq, "Faulted on virtio_scsi_cmd_req\n");
break;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 02bd8a9..3e0cb4e 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -135,6 +135,4 @@ static inline void iov_iter_reexpand(struct iov_iter *i, 
size_t count)
 size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct 
iov_iter *i);
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct 
iov_iter *i);
 
-int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
-   int offset, int len);
 #endif
diff --git a/lib/Makefile b/lib/Makefile
index 3c3b30b..1071d06 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -24,7 +24,7 @@ obj-y += lockref.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
+gcd.o lcm.o list_sort.o uuid.o flex_array.o clz_ctz.o \
 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
 obj-y += string_helpers.o
diff --git a/lib/iovec.c b/lib/iovec.c
deleted file mode 100644
index d8f17a9..000
--- a/lib/iovec.c
+++ /dev/null
@@ -1,36 +0,0 @@
-#include 
-#include 
-#include 
-
-/*
- * Copy iovec to kernel. Returns -EFAULT on error.
- */
-
-int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
-   int offset, int len)
-{
-   /* No data? Done! */
-   if (len == 0)
-   return 0;
-
-   /* Skip over the finished iovecs */
-   while (offset >= iov->iov_len) {
-   offset -= iov->iov_len;
-   iov++;
-   }
-
-   while (len > 0) {
-   u8 __user *base = iov->iov_base + offset;
-   int copy = min_t(unsigned int, len, iov->iov_len - offset);
-
-   offset = 0;
-   if (copy_from_user(kdata, base, copy))
-   return -EFAULT;
-   len -= copy;
-   kdata += copy;
-   iov++;
-   }
-
-   return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovecend);
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 17/18] vhost: don't bother copying iovecs in handle_rx(), kill memcpy_toiovecend()

2015-02-02 Thread Al Viro
From: Al Viro 

Cc: Michael S. Tsirkin 
Cc: kvm@vger.kernel.org
Signed-off-by: Al Viro 
---
 drivers/vhost/net.c | 79 ++---
 include/linux/uio.h |  3 --
 lib/iovec.c | 26 --
 3 files changed, 20 insertions(+), 88 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index d86cc9b..73c0ebf 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -84,10 +84,6 @@ struct vhost_net_ubuf_ref {
 
 struct vhost_net_virtqueue {
struct vhost_virtqueue vq;
-   /* hdr is used to store the virtio header.
-* Since each iovec has >= 1 byte length, we never need more than
-* header length entries to store the header. */
-   struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
size_t vhost_hlen;
size_t sock_hlen;
/* vhost zerocopy support fields below: */
@@ -235,44 +231,6 @@ static bool vhost_sock_zcopy(struct socket *sock)
sock_flag(sock->sk, SOCK_ZEROCOPY);
 }
 
-/* Pop first len bytes from iovec. Return number of segments used. */
-static int move_iovec_hdr(struct iovec *from, struct iovec *to,
- size_t len, int iov_count)
-{
-   int seg = 0;
-   size_t size;
-
-   while (len && seg < iov_count) {
-   size = min(from->iov_len, len);
-   to->iov_base = from->iov_base;
-   to->iov_len = size;
-   from->iov_len -= size;
-   from->iov_base += size;
-   len -= size;
-   ++from;
-   ++to;
-   ++seg;
-   }
-   return seg;
-}
-/* Copy iovec entries for len bytes from iovec. */
-static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
-  size_t len, int iovcount)
-{
-   int seg = 0;
-   size_t size;
-
-   while (len && seg < iovcount) {
-   size = min(from->iov_len, len);
-   to->iov_base = from->iov_base;
-   to->iov_len = size;
-   len -= size;
-   ++from;
-   ++to;
-   ++seg;
-   }
-}
-
 /* In case of DMA done not in order in lower device driver for some reason.
  * upend_idx is used to track end of used idx, done_idx is used to track head
  * of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -570,9 +528,9 @@ static void handle_rx(struct vhost_net *net)
.msg_controllen = 0,
.msg_flags = MSG_DONTWAIT,
};
-   struct virtio_net_hdr_mrg_rxbuf hdr = {
-   .hdr.flags = 0,
-   .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
+   struct virtio_net_hdr hdr = {
+   .flags = 0,
+   .gso_type = VIRTIO_NET_HDR_GSO_NONE
};
size_t total_len = 0;
int err, mergeable;
@@ -580,6 +538,7 @@ static void handle_rx(struct vhost_net *net)
size_t vhost_hlen, sock_hlen;
size_t vhost_len, sock_len;
struct socket *sock;
+   struct iov_iter fixup;
 
mutex_lock(&vq->mutex);
sock = vq->private_data;
@@ -624,14 +583,17 @@ static void handle_rx(struct vhost_net *net)
break;
}
/* We don't need to be notified again. */
-   if (unlikely((vhost_hlen)))
-   /* Skip header. TODO: support TSO. */
-   move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in);
-   else
-   /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
-* needed because recvmsg can modify msg_iov. */
-   copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in);
-   iov_iter_init(&msg.msg_iter, READ, vq->iov, in, sock_len);
+   iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
+   fixup = msg.msg_iter;
+   if (unlikely((vhost_hlen))) {
+   /* We will supply the header ourselves
+* TODO: support TSO. */
+   iov_iter_advance(&msg.msg_iter, vhost_hlen);
+   } else {
+   /* It'll come from socket; we'll need to patch
+* ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF */
+   iov_iter_advance(&fixup, sizeof(hdr));
+   }
err = sock->ops->recvmsg(NULL, sock, &msg,
 sock_len, MSG_DONTWAIT | MSG_TRUNC);
/* Userspace might have consumed the packet meanwhile:
@@ -643,18 +605,17 @@ static void handle_rx(struct vhost_net *net)
vhost_discard_vq_desc(vq, headcount);
continue;
}
+   /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
if (unlikely(vhost_hlen) &&
-   memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0,
- 

[PATCH v2 16/18] vhost: don't bother with copying iovec in handle_tx()

2015-02-02 Thread Al Viro
From: Al Viro 

just advance the msg.msg_iter and be done with that.

Cc: Michael S. Tsirkin 
Cc: kvm@vger.kernel.org
Signed-off-by: Al Viro 
---
 drivers/vhost/net.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 6906f76..d86cc9b 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -336,7 +336,7 @@ static void handle_tx(struct vhost_net *net)
 {
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = &nvq->vq;
-   unsigned out, in, s;
+   unsigned out, in;
int head;
struct msghdr msg = {
.msg_name = NULL,
@@ -395,16 +395,17 @@ static void handle_tx(struct vhost_net *net)
break;
}
/* Skip header. TODO: support TSO. */
-   s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out);
len = iov_length(vq->iov, out);
iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
+   iov_iter_advance(&msg.msg_iter, hdr_size);
/* Sanity check */
-   if (!len) {
+   if (!iov_iter_count(&msg.msg_iter)) {
vq_err(vq, "Unexpected header len for TX: "
   "%zd expected %zd\n",
-  iov_length(nvq->hdr, s), hdr_size);
+  len, hdr_size);
break;
}
+   len = iov_iter_count(&msg.msg_iter);
 
zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
   && (nvq->upend_idx + 1) % UIO_MAXIOV !=
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html