[PATCH v4 2/6] virtio_ring: Support DMA APIs

2015-11-01 Thread Andy Lutomirski
virtio_ring currently sends the device (usually a hypervisor)
physical addresses of its I/O buffers.  This is okay when DMA
addresses and physical addresses are the same thing, but this isn't
always the case.  For example, this never works on Xen guests, and
it is likely to fail if a physical "virtio" device ever ends up
behind an IOMMU or swiotlb.

The immediate use case for me is to enable virtio on Xen guests.
For that to work, we need vring to support DMA address translation
as well as a corresponding change to virtio_pci or to another
driver.

With this patch, if enabled, virtfs survives kmemleak and
CONFIG_DMA_API_DEBUG.

Signed-off-by: Andy Lutomirski 
---
 drivers/virtio/Kconfig   |   2 +-
 drivers/virtio/virtio_ring.c | 190 +++
 tools/virtio/linux/dma-mapping.h |  17 
 3 files changed, 172 insertions(+), 37 deletions(-)
 create mode 100644 tools/virtio/linux/dma-mapping.h

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index cab9f3f63a38..77590320d44c 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -60,7 +60,7 @@ config VIRTIO_INPUT
 
  config VIRTIO_MMIO
tristate "Platform bus driver for memory mapped virtio devices"
-   depends on HAS_IOMEM
+   depends on HAS_IOMEM && HAS_DMA
select VIRTIO
---help---
 This drivers provides support for memory mapped virtio
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 096b857e7b75..a872eb89587f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
@@ -54,7 +55,14 @@
 #define END_USE(vq)
 #endif
 
-struct vring_virtqueue {
+struct vring_desc_state
+{
+   void *data; /* Data for callback. */
+   struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
+};
+
+struct vring_virtqueue
+{
struct virtqueue vq;
 
/* Actual memory layout for this queue */
@@ -92,12 +100,71 @@ struct vring_virtqueue {
ktime_t last_add_time;
 #endif
 
-   /* Tokens for callbacks. */
-   void *data[];
+   /* Per-descriptor state. */
+   struct vring_desc_state desc_state[];
 };
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
+/*
+ * The DMA ops on various arches are rather gnarly right now, and
+ * making all of the arch DMA ops work on the vring device itself
+ * is a mess.  For now, we use the parent device for DMA ops.
+ */
+struct device *vring_dma_dev(const struct vring_virtqueue *vq)
+{
+   return vq->vq.vdev->dev.parent;
+}
+
+/* Map one sg entry. */
+static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
+  struct scatterlist *sg,
+  enum dma_data_direction direction)
+{
+   /*
+* We can't use dma_map_sg, because we don't use scatterlists in
+* the way it expects (we don't guarantee that the scatterlist
+* will exist for the lifetime of the mapping).
+*/
+   return dma_map_page(vring_dma_dev(vq),
+   sg_page(sg), sg->offset, sg->length,
+   direction);
+}
+
+static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
+  void *cpu_addr, size_t size,
+  enum dma_data_direction direction)
+{
+   return dma_map_single(vring_dma_dev(vq),
+ cpu_addr, size, direction);
+}
+
+static void vring_unmap_one(const struct vring_virtqueue *vq,
+   struct vring_desc *desc)
+{
+   u16 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
+
+   if (flags & VRING_DESC_F_INDIRECT) {
+   dma_unmap_single(vring_dma_dev(vq),
+virtio64_to_cpu(vq->vq.vdev, desc->addr),
+virtio32_to_cpu(vq->vq.vdev, desc->len),
+(flags & VRING_DESC_F_WRITE) ?
+DMA_FROM_DEVICE : DMA_TO_DEVICE);
+   } else {
+   dma_unmap_page(vring_dma_dev(vq),
+  virtio64_to_cpu(vq->vq.vdev, desc->addr),
+  virtio32_to_cpu(vq->vq.vdev, desc->len),
+  (flags & VRING_DESC_F_WRITE) ?
+  DMA_FROM_DEVICE : DMA_TO_DEVICE);
+   }
+}
+
+static int vring_mapping_error(const struct vring_virtqueue *vq,
+  dma_addr_t addr)
+{
+   return dma_mapping_error(vring_dma_dev(vq), addr);
+}
+
 static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
 unsigned int total_sg, gfp_t gfp)
 {
@@ -131,7 +198,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
struct vring_virtqueue *vq =

Re: [PATCH v4 2/6] virtio_ring: Support DMA APIs

2015-10-30 Thread Andy Lutomirski
On Fri, Oct 30, 2015 at 5:05 AM, Christian Borntraeger
 wrote:
> Am 30.10.2015 um 13:01 schrieb Cornelia Huck:
>> On Thu, 29 Oct 2015 18:09:47 -0700
>> Andy Lutomirski  wrote:
>>
>>> virtio_ring currently sends the device (usually a hypervisor)
>>> physical addresses of its I/O buffers.  This is okay when DMA
>>> addresses and physical addresses are the same thing, but this isn't
>>> always the case.  For example, this never works on Xen guests, and
>>> it is likely to fail if a physical "virtio" device ever ends up
>>> behind an IOMMU or swiotlb.
>>>
>>> The immediate use case for me is to enable virtio on Xen guests.
>>> For that to work, we need vring to support DMA address translation
>>> as well as a corresponding change to virtio_pci or to another
>>> driver.
>>>
>>> With this patch, if enabled, virtfs survives kmemleak and
>>> CONFIG_DMA_API_DEBUG.
>>>
>>> Signed-off-by: Andy Lutomirski 
>>> ---
>>>  drivers/virtio/Kconfig   |   2 +-
>>>  drivers/virtio/virtio_ring.c | 190 
>>> +++
>>>  tools/virtio/linux/dma-mapping.h |  17 
>>>  3 files changed, 172 insertions(+), 37 deletions(-)
>>>  create mode 100644 tools/virtio/linux/dma-mapping.h
>>
>>>  static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
>>>  {
>>> -unsigned int i;
>>> +unsigned int i, j;
>>> +u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
>>>
>>>  /* Clear data ptr. */
>>> -vq->data[head] = NULL;
>>> +vq->desc_state[head].data = NULL;
>>>
>>> -/* Put back on free list: find end */
>>> +/* Put back on free list: unmap first-level descriptors and find end */
>>>  i = head;
>>>
>>> -/* Free the indirect table */
>>> -if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, 
>>> VRING_DESC_F_INDIRECT))
>>> -kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, 
>>> vq->vring.desc[i].addr)));
>>> -
>>> -while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, 
>>> VRING_DESC_F_NEXT)) {
>>> +while (vq->vring.desc[i].flags & nextflag) {
>>> +vring_unmap_one(vq, &vq->vring.desc[i]);
>>>  i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
>>>  vq->vq.num_free++;
>>>  }
>>>
>>> +vring_unmap_one(vq, &vq->vring.desc[i]);
>>>  vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
>>>  vq->free_head = head;
>>> +
>>>  /* Plus final descriptor */
>>>  vq->vq.num_free++;
>>> +
>>> +/* Free the indirect table, if any, now that it's unmapped. */
>>> +if (vq->desc_state[head].indir_desc) {
>>> +struct vring_desc *indir_desc = 
>>> vq->desc_state[head].indir_desc;
>>> +u32 len = vq->vring.desc[head].len;
>>
>> This one needs to be virtio32_to_cpu(...) as well.
>
> Yes, just did the exact same change
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index f269e1c..f2249df 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -556,7 +556,7 @@ static void detach_buf(struct vring_virtqueue *vq, 
> unsigned int head)
> /* Free the indirect table, if any, now that it's unmapped. */
> if (vq->desc_state[head].indir_desc) {
> struct vring_desc *indir_desc = 
> vq->desc_state[head].indir_desc;
> -   u32 len = vq->vring.desc[head].len;
> +   u32 len = virtio32_to_cpu(vq->vq.vdev, 
> vq->vring.desc[head].len);
>
> BUG_ON(!(vq->vring.desc[head].flags &
>  cpu_to_virtio16(vq->vq.vdev, 
> VRING_DESC_F_INDIRECT)));
>
>
> now it boots.

Thanks!  I applied this to my tree.  I won't send a new version quite
yet, though, to reduce inbox load.

--Andy
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v4 2/6] virtio_ring: Support DMA APIs

2015-10-30 Thread Christian Borntraeger
Am 30.10.2015 um 13:01 schrieb Cornelia Huck:
> On Thu, 29 Oct 2015 18:09:47 -0700
> Andy Lutomirski  wrote:
> 
>> virtio_ring currently sends the device (usually a hypervisor)
>> physical addresses of its I/O buffers.  This is okay when DMA
>> addresses and physical addresses are the same thing, but this isn't
>> always the case.  For example, this never works on Xen guests, and
>> it is likely to fail if a physical "virtio" device ever ends up
>> behind an IOMMU or swiotlb.
>>
>> The immediate use case for me is to enable virtio on Xen guests.
>> For that to work, we need vring to support DMA address translation
>> as well as a corresponding change to virtio_pci or to another
>> driver.
>>
>> With this patch, if enabled, virtfs survives kmemleak and
>> CONFIG_DMA_API_DEBUG.
>>
>> Signed-off-by: Andy Lutomirski 
>> ---
>>  drivers/virtio/Kconfig   |   2 +-
>>  drivers/virtio/virtio_ring.c | 190 
>> +++
>>  tools/virtio/linux/dma-mapping.h |  17 
>>  3 files changed, 172 insertions(+), 37 deletions(-)
>>  create mode 100644 tools/virtio/linux/dma-mapping.h
> 
>>  static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
>>  {
>> -unsigned int i;
>> +unsigned int i, j;
>> +u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
>>
>>  /* Clear data ptr. */
>> -vq->data[head] = NULL;
>> +vq->desc_state[head].data = NULL;
>>
>> -/* Put back on free list: find end */
>> +/* Put back on free list: unmap first-level descriptors and find end */
>>  i = head;
>>
>> -/* Free the indirect table */
>> -if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, 
>> VRING_DESC_F_INDIRECT))
>> -kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, 
>> vq->vring.desc[i].addr)));
>> -
>> -while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, 
>> VRING_DESC_F_NEXT)) {
>> +while (vq->vring.desc[i].flags & nextflag) {
>> +vring_unmap_one(vq, &vq->vring.desc[i]);
>>  i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
>>  vq->vq.num_free++;
>>  }
>>
>> +vring_unmap_one(vq, &vq->vring.desc[i]);
>>  vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
>>  vq->free_head = head;
>> +
>>  /* Plus final descriptor */
>>  vq->vq.num_free++;
>> +
>> +/* Free the indirect table, if any, now that it's unmapped. */
>> +if (vq->desc_state[head].indir_desc) {
>> +struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
>> +u32 len = vq->vring.desc[head].len;
> 
> This one needs to be virtio32_to_cpu(...) as well.

Yes, just did the exact same change
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index f269e1c..f2249df 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -556,7 +556,7 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned 
int head)
/* Free the indirect table, if any, now that it's unmapped. */
if (vq->desc_state[head].indir_desc) {
struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
-   u32 len = vq->vring.desc[head].len;
+   u32 len = virtio32_to_cpu(vq->vq.vdev, 
vq->vring.desc[head].len);
 
BUG_ON(!(vq->vring.desc[head].flags &
 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));


now it boots.
> 
>> +
>> +BUG_ON(!(vq->vring.desc[head].flags &
>> + cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
>> +BUG_ON(len == 0 || len % sizeof(struct vring_desc));
>> +
>> +for (j = 0; j < len / sizeof(struct vring_desc); j++)
>> +vring_unmap_one(vq, &indir_desc[j]);
>> +
>> +kfree(vq->desc_state[head].indir_desc);
>> +vq->desc_state[head].indir_desc = NULL;
>> +}
>>  }
> 
> With that change on top of your current branch, I can boot (root on
> virtio-blk, either virtio-1 or legacy virtio) on current qemu master
> with kvm enabled on s390. Haven't tried anything further.
> 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v4 2/6] virtio_ring: Support DMA APIs

2015-10-30 Thread Cornelia Huck
On Thu, 29 Oct 2015 18:09:47 -0700
Andy Lutomirski  wrote:

> virtio_ring currently sends the device (usually a hypervisor)
> physical addresses of its I/O buffers.  This is okay when DMA
> addresses and physical addresses are the same thing, but this isn't
> always the case.  For example, this never works on Xen guests, and
> it is likely to fail if a physical "virtio" device ever ends up
> behind an IOMMU or swiotlb.
> 
> The immediate use case for me is to enable virtio on Xen guests.
> For that to work, we need vring to support DMA address translation
> as well as a corresponding change to virtio_pci or to another
> driver.
> 
> With this patch, if enabled, virtfs survives kmemleak and
> CONFIG_DMA_API_DEBUG.
> 
> Signed-off-by: Andy Lutomirski 
> ---
>  drivers/virtio/Kconfig   |   2 +-
>  drivers/virtio/virtio_ring.c | 190 
> +++
>  tools/virtio/linux/dma-mapping.h |  17 
>  3 files changed, 172 insertions(+), 37 deletions(-)
>  create mode 100644 tools/virtio/linux/dma-mapping.h

>  static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
>  {
> - unsigned int i;
> + unsigned int i, j;
> + u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
> 
>   /* Clear data ptr. */
> - vq->data[head] = NULL;
> + vq->desc_state[head].data = NULL;
> 
> - /* Put back on free list: find end */
> + /* Put back on free list: unmap first-level descriptors and find end */
>   i = head;
> 
> - /* Free the indirect table */
> - if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, 
> VRING_DESC_F_INDIRECT))
> - kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, 
> vq->vring.desc[i].addr)));
> -
> - while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, 
> VRING_DESC_F_NEXT)) {
> + while (vq->vring.desc[i].flags & nextflag) {
> + vring_unmap_one(vq, &vq->vring.desc[i]);
>   i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
>   vq->vq.num_free++;
>   }
> 
> + vring_unmap_one(vq, &vq->vring.desc[i]);
>   vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
>   vq->free_head = head;
> +
>   /* Plus final descriptor */
>   vq->vq.num_free++;
> +
> + /* Free the indirect table, if any, now that it's unmapped. */
> + if (vq->desc_state[head].indir_desc) {
> + struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
> + u32 len = vq->vring.desc[head].len;

This one needs to be virtio32_to_cpu(...) as well.

> +
> + BUG_ON(!(vq->vring.desc[head].flags &
> +  cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
> + BUG_ON(len == 0 || len % sizeof(struct vring_desc));
> +
> + for (j = 0; j < len / sizeof(struct vring_desc); j++)
> + vring_unmap_one(vq, &indir_desc[j]);
> +
> + kfree(vq->desc_state[head].indir_desc);
> + vq->desc_state[head].indir_desc = NULL;
> + }
>  }

With that change on top of your current branch, I can boot (root on
virtio-blk, either virtio-1 or legacy virtio) on current qemu master
with kvm enabled on s390. Haven't tried anything further.

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization