Re: [Qemu-devel] [PATCH for-4.2 v10 03/15] virtio-iommu: Add skeleton

2019-08-30 Thread Auger Eric
Hi Peter,
On 8/30/19 3:26 AM, Peter Xu wrote:
> On Thu, Aug 29, 2019 at 02:18:42PM +0200, Auger Eric wrote:
>> Hi Peter,
>>
>> First of all, please forgive me for the delay.
>> On 8/15/19 3:54 PM, Peter Xu wrote:
>>> On Tue, Jul 30, 2019 at 07:21:25PM +0200, Eric Auger wrote:
 +static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
 +{
 +VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
 +struct virtio_iommu_req_head head;
 +struct virtio_iommu_req_tail tail;
>>>
>>> [1]
>>>
 +VirtQueueElement *elem;
 +unsigned int iov_cnt;
 +struct iovec *iov;
 +size_t sz;
 +
 +for (;;) {
 +elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 +if (!elem) {
 +return;
 +}
 +
 +if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
 +iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
 +virtio_error(vdev, "virtio-iommu bad head/tail size");
 +virtqueue_detach_element(vq, elem, 0);
 +g_free(elem);
 +break;
 +}
 +
 +iov_cnt = elem->out_num;
 +iov = g_memdup(elem->out_sg, sizeof(struct iovec) * 
 elem->out_num);
>>>
>>> Could I ask why memdup is needed here?
>> Indeed I don't think it is needed and besides iov is not freed!
>>
>> I got inspired from hw/net/virtio-net.c. To be honest I don't get why
>> the g_memdup is needed there either. The out_sg gets duplicated and
>> commands work on the duplicated data and not in place.
> 
> Oh true, I found that it's because of calling of iov_discard_front().
> Please have a look at 771b6ed37e3.  Though it seems to me that
> virtio-iommu does not truncate iovs so it should not be needed.

thanks for the sha1. indeed virtio-iommu does not use iov_discard_front
so I shouldn't need it.
> 
>>>
 +sz = iov_to_buf(iov, iov_cnt, 0, , sizeof(head));
 +if (unlikely(sz != sizeof(head))) {
 +tail.status = VIRTIO_IOMMU_S_DEVERR;
>>>
>>> Do you need to zero the reserved bits to make sure it won't contain
>>> garbage?  Same question to below uses of tail.
>> yes. I initialized tail.
>>>
 +goto out;
 +}
 +qemu_mutex_lock(>mutex);
 +switch (head.type) {
 +case VIRTIO_IOMMU_T_ATTACH:
 +tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
 +break;
 +case VIRTIO_IOMMU_T_DETACH:
 +tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
 +break;
 +case VIRTIO_IOMMU_T_MAP:
 +tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
 +break;
 +case VIRTIO_IOMMU_T_UNMAP:
 +tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
 +break;
 +default:
 +tail.status = VIRTIO_IOMMU_S_UNSUPP;
 +}
 +qemu_mutex_unlock(>mutex);
 +
 +out:
 +sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
 +  , sizeof(tail));
 +assert(sz == sizeof(tail));
 +
 +virtqueue_push(vq, elem, sizeof(tail));
>>>
>>> s/tail/head/ (though they are the same size)?
>> That's unclear to me. Similarly when checking against virtio-net.c, the
>> element is pushed back to the used ring and len is set to the size of
>> the status with:
>>
>> /*
>>  * Control virtqueue data structures
>>  *
>>  * The control virtqueue expects a header in the first sg entry
>>  * and an ack/status response in the last entry.  Data for the
>>  * command goes in between.
>>  */
> 
> I was referencing the balloon code when reading the patch, e.g.,
> virtio_balloon_handle_output().  Though after I read more carefully I
> see that other places are using it as you described.  Now I tend to
> agree with you, because virtqueue_push() who calls
> virtqueue_unmap_sg() used the len to unmap in_sg[] rather than
> out_sg[].  So please ignore my previous comment.

OK
> 
> (then I'm not sure whether the usage in the balloon code was correct
>  now...)
> 
>>>
 +virtio_notify(vdev, vq);
 +g_free(elem);
 +}
 +}
>>>
>>> [...]
>>>
 +static void virtio_iommu_set_features(VirtIODevice *vdev, uint64_t val)
 +{
 +VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
 +
 +dev->acked_features = val;
 +trace_virtio_iommu_set_features(dev->acked_features);
 +}
 +
 +static const VMStateDescription vmstate_virtio_iommu_device = {
 +.name = "virtio-iommu-device",
 +.unmigratable = 1,
>>>
>>> Curious, is there explicit reason to not support migration from the
>>> first version? :)
>> The state is made of red black trees, lists. For the former there is no
>> VMSTATE* ready. I am working on it but I think this should be handled
>> separately
> 
> Fair 

Re: [Qemu-devel] [PATCH for-4.2 v10 03/15] virtio-iommu: Add skeleton

2019-08-29 Thread Peter Xu
On Thu, Aug 29, 2019 at 02:18:42PM +0200, Auger Eric wrote:
> Hi Peter,
> 
> First of all, please forgive me for the delay.
> On 8/15/19 3:54 PM, Peter Xu wrote:
> > On Tue, Jul 30, 2019 at 07:21:25PM +0200, Eric Auger wrote:
> >> +static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
> >> +{
> >> +VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
> >> +struct virtio_iommu_req_head head;
> >> +struct virtio_iommu_req_tail tail;
> > 
> > [1]
> > 
> >> +VirtQueueElement *elem;
> >> +unsigned int iov_cnt;
> >> +struct iovec *iov;
> >> +size_t sz;
> >> +
> >> +for (;;) {
> >> +elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> >> +if (!elem) {
> >> +return;
> >> +}
> >> +
> >> +if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
> >> +iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
> >> +virtio_error(vdev, "virtio-iommu bad head/tail size");
> >> +virtqueue_detach_element(vq, elem, 0);
> >> +g_free(elem);
> >> +break;
> >> +}
> >> +
> >> +iov_cnt = elem->out_num;
> >> +iov = g_memdup(elem->out_sg, sizeof(struct iovec) * 
> >> elem->out_num);
> > 
> > Could I ask why memdup is needed here?
> Indeed I don't think it is needed and besides iov is not freed!
> 
> I got inspired from hw/net/virtio-net.c. To be honest I don't get why
> the g_memdup is needed there either. The out_sg gets duplicated and
> commands work on the duplicated data and not in place.

Oh true, I found that it's because of calling of iov_discard_front().
Please have a look at 771b6ed37e3.  Though it seems to me that
virtio-iommu does not truncate iovs so it should not be needed.

> > 
> >> +sz = iov_to_buf(iov, iov_cnt, 0, , sizeof(head));
> >> +if (unlikely(sz != sizeof(head))) {
> >> +tail.status = VIRTIO_IOMMU_S_DEVERR;
> > 
> > Do you need to zero the reserved bits to make sure it won't contain
> > garbage?  Same question to below uses of tail.
> yes. I initialized tail.
> > 
> >> +goto out;
> >> +}
> >> +qemu_mutex_lock(>mutex);
> >> +switch (head.type) {
> >> +case VIRTIO_IOMMU_T_ATTACH:
> >> +tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
> >> +break;
> >> +case VIRTIO_IOMMU_T_DETACH:
> >> +tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
> >> +break;
> >> +case VIRTIO_IOMMU_T_MAP:
> >> +tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
> >> +break;
> >> +case VIRTIO_IOMMU_T_UNMAP:
> >> +tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
> >> +break;
> >> +default:
> >> +tail.status = VIRTIO_IOMMU_S_UNSUPP;
> >> +}
> >> +qemu_mutex_unlock(>mutex);
> >> +
> >> +out:
> >> +sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
> >> +  , sizeof(tail));
> >> +assert(sz == sizeof(tail));
> >> +
> >> +virtqueue_push(vq, elem, sizeof(tail));
> > 
> > s/tail/head/ (though they are the same size)?
> That's unclear to me. Similarly when checking against virtio-net.c, the
> element is pushed back to the used ring and len is set to the size of
> the status with:
> 
> /*
>  * Control virtqueue data structures
>  *
>  * The control virtqueue expects a header in the first sg entry
>  * and an ack/status response in the last entry.  Data for the
>  * command goes in between.
>  */

I was referencing the balloon code when reading the patch, e.g.,
virtio_balloon_handle_output().  Though after I read more carefully I
see that other places are using it as you described.  Now I tend to
agree with you, because virtqueue_push() who calls
virtqueue_unmap_sg() used the len to unmap in_sg[] rather than
out_sg[].  So please ignore my previous comment.

(then I'm not sure whether the usage in the balloon code was correct
 now...)

> > 
> >> +virtio_notify(vdev, vq);
> >> +g_free(elem);
> >> +}
> >> +}
> > 
> > [...]
> > 
> >> +static void virtio_iommu_set_features(VirtIODevice *vdev, uint64_t val)
> >> +{
> >> +VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
> >> +
> >> +dev->acked_features = val;
> >> +trace_virtio_iommu_set_features(dev->acked_features);
> >> +}
> >> +
> >> +static const VMStateDescription vmstate_virtio_iommu_device = {
> >> +.name = "virtio-iommu-device",
> >> +.unmigratable = 1,
> > 
> > Curious, is there explicit reason to not support migration from the
> > first version? :)
> The state is made of red black trees, lists. For the former there is no
> VMSTATE* ready. I am working on it but I think this should be handled
> separately

Fair enough.  Would you mind to add a similar comment above
unmigratable?

Thanks,

-- 
Peter Xu



Re: [Qemu-devel] [PATCH for-4.2 v10 03/15] virtio-iommu: Add skeleton

2019-08-29 Thread Auger Eric
Hi Peter,

First of all, please forgive me for the delay.
On 8/15/19 3:54 PM, Peter Xu wrote:
> On Tue, Jul 30, 2019 at 07:21:25PM +0200, Eric Auger wrote:
>> +static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
>> +{
>> +VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
>> +struct virtio_iommu_req_head head;
>> +struct virtio_iommu_req_tail tail;
> 
> [1]
> 
>> +VirtQueueElement *elem;
>> +unsigned int iov_cnt;
>> +struct iovec *iov;
>> +size_t sz;
>> +
>> +for (;;) {
>> +elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
>> +if (!elem) {
>> +return;
>> +}
>> +
>> +if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
>> +iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
>> +virtio_error(vdev, "virtio-iommu bad head/tail size");
>> +virtqueue_detach_element(vq, elem, 0);
>> +g_free(elem);
>> +break;
>> +}
>> +
>> +iov_cnt = elem->out_num;
>> +iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
> 
> Could I ask why memdup is needed here?
Indeed I don't think it is needed and besides iov is not freed!

I got inspired from hw/net/virtio-net.c. To be honest I don't get why
the g_memdup is needed there either. The out_sg gets duplicated and
commands work on the duplicated data and not in place.
> 
>> +sz = iov_to_buf(iov, iov_cnt, 0, , sizeof(head));
>> +if (unlikely(sz != sizeof(head))) {
>> +tail.status = VIRTIO_IOMMU_S_DEVERR;
> 
> Do you need to zero the reserved bits to make sure it won't contain
> garbage?  Same question to below uses of tail.
yes. I initialized tail.
> 
>> +goto out;
>> +}
>> +qemu_mutex_lock(>mutex);
>> +switch (head.type) {
>> +case VIRTIO_IOMMU_T_ATTACH:
>> +tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
>> +break;
>> +case VIRTIO_IOMMU_T_DETACH:
>> +tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
>> +break;
>> +case VIRTIO_IOMMU_T_MAP:
>> +tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
>> +break;
>> +case VIRTIO_IOMMU_T_UNMAP:
>> +tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
>> +break;
>> +default:
>> +tail.status = VIRTIO_IOMMU_S_UNSUPP;
>> +}
>> +qemu_mutex_unlock(>mutex);
>> +
>> +out:
>> +sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
>> +  , sizeof(tail));
>> +assert(sz == sizeof(tail));
>> +
>> +virtqueue_push(vq, elem, sizeof(tail));
> 
> s/tail/head/ (though they are the same size)?
That's unclear to me. Similarly when checking against virtio-net.c, the
element is pushed back to the used ring and len is set to the size of
the status with:

/*
 * Control virtqueue data structures
 *
 * The control virtqueue expects a header in the first sg entry
 * and an ack/status response in the last entry.  Data for the
 * command goes in between.
 */
> 
>> +virtio_notify(vdev, vq);
>> +g_free(elem);
>> +}
>> +}
> 
> [...]
> 
>> +static void virtio_iommu_set_features(VirtIODevice *vdev, uint64_t val)
>> +{
>> +VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
>> +
>> +dev->acked_features = val;
>> +trace_virtio_iommu_set_features(dev->acked_features);
>> +}
>> +
>> +static const VMStateDescription vmstate_virtio_iommu_device = {
>> +.name = "virtio-iommu-device",
>> +.unmigratable = 1,
> 
> Curious, is there explicit reason to not support migration from the
> first version? :)
The state is made of red black trees, lists. For the former there is no
VMSTATE* ready. I am working on it but I think this should be handled
separately
> 
>> +};
>> +
>> +static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>> +{
>> +VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> +VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
>> +
>> +virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
>> +sizeof(struct virtio_iommu_config));
>> +
>> +s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
>> + virtio_iommu_handle_command);
>> +s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
>> +
>> +s->config.page_size_mask = TARGET_PAGE_MASK;
>> +s->config.input_range.end = -1UL;
>> +s->config.domain_range.start = 0;
> 
> Zero input_range.start = 0?  After all domain_range.start is zeroed.
virtio_init does:
if (vdev->config_len) {
vdev->config = g_malloc0(config_size);

but I should be homogeneous and then remove s->config.domain_range.start
= 0;
> 
>> +s->config.domain_range.end = 32;
>> +
>> +virtio_add_feature(>features, VIRTIO_RING_F_EVENT_IDX);
>> +virtio_add_feature(>features, VIRTIO_RING_F_INDIRECT_DESC);
>> +virtio_add_feature(>features, 

Re: [Qemu-devel] [PATCH for-4.2 v10 03/15] virtio-iommu: Add skeleton

2019-08-15 Thread Peter Xu
On Tue, Jul 30, 2019 at 07:21:25PM +0200, Eric Auger wrote:
> +static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
> +struct virtio_iommu_req_head head;
> +struct virtio_iommu_req_tail tail;

[1]

> +VirtQueueElement *elem;
> +unsigned int iov_cnt;
> +struct iovec *iov;
> +size_t sz;
> +
> +for (;;) {
> +elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> +if (!elem) {
> +return;
> +}
> +
> +if (iov_size(elem->in_sg, elem->in_num) < sizeof(tail) ||
> +iov_size(elem->out_sg, elem->out_num) < sizeof(head)) {
> +virtio_error(vdev, "virtio-iommu bad head/tail size");
> +virtqueue_detach_element(vq, elem, 0);
> +g_free(elem);
> +break;
> +}
> +
> +iov_cnt = elem->out_num;
> +iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);

Could I ask why memdup is needed here?

> +sz = iov_to_buf(iov, iov_cnt, 0, , sizeof(head));
> +if (unlikely(sz != sizeof(head))) {
> +tail.status = VIRTIO_IOMMU_S_DEVERR;

Do you need to zero the reserved bits to make sure it won't contain
garbage?  Same question to below uses of tail.

> +goto out;
> +}
> +qemu_mutex_lock(>mutex);
> +switch (head.type) {
> +case VIRTIO_IOMMU_T_ATTACH:
> +tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt);
> +break;
> +case VIRTIO_IOMMU_T_DETACH:
> +tail.status = virtio_iommu_handle_detach(s, iov, iov_cnt);
> +break;
> +case VIRTIO_IOMMU_T_MAP:
> +tail.status = virtio_iommu_handle_map(s, iov, iov_cnt);
> +break;
> +case VIRTIO_IOMMU_T_UNMAP:
> +tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt);
> +break;
> +default:
> +tail.status = VIRTIO_IOMMU_S_UNSUPP;
> +}
> +qemu_mutex_unlock(>mutex);
> +
> +out:
> +sz = iov_from_buf(elem->in_sg, elem->in_num, 0,
> +  , sizeof(tail));
> +assert(sz == sizeof(tail));
> +
> +virtqueue_push(vq, elem, sizeof(tail));

s/tail/head/ (though they are the same size)?

> +virtio_notify(vdev, vq);
> +g_free(elem);
> +}
> +}

[...]

> +static void virtio_iommu_set_features(VirtIODevice *vdev, uint64_t val)
> +{
> +VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev);
> +
> +dev->acked_features = val;
> +trace_virtio_iommu_set_features(dev->acked_features);
> +}
> +
> +static const VMStateDescription vmstate_virtio_iommu_device = {
> +.name = "virtio-iommu-device",
> +.unmigratable = 1,

Curious, is there explicit reason to not support migration from the
first version? :)

> +};
> +
> +static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
> +{
> +VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
> +
> +virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
> +sizeof(struct virtio_iommu_config));
> +
> +s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
> + virtio_iommu_handle_command);
> +s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
> +
> +s->config.page_size_mask = TARGET_PAGE_MASK;
> +s->config.input_range.end = -1UL;
> +s->config.domain_range.start = 0;

Zero input_range.start = 0?  After all domain_range.start is zeroed.

> +s->config.domain_range.end = 32;
> +
> +virtio_add_feature(>features, VIRTIO_RING_F_EVENT_IDX);
> +virtio_add_feature(>features, VIRTIO_RING_F_INDIRECT_DESC);
> +virtio_add_feature(>features, VIRTIO_F_VERSION_1);
> +virtio_add_feature(>features, VIRTIO_IOMMU_F_INPUT_RANGE);
> +virtio_add_feature(>features, VIRTIO_IOMMU_F_DOMAIN_RANGE);
> +virtio_add_feature(>features, VIRTIO_IOMMU_F_MAP_UNMAP);
> +virtio_add_feature(>features, VIRTIO_IOMMU_F_BYPASS);
> +virtio_add_feature(>features, VIRTIO_IOMMU_F_MMIO);
> +}

Regards,

-- 
Peter Xu



[Qemu-devel] [PATCH for-4.2 v10 03/15] virtio-iommu: Add skeleton

2019-07-30 Thread Eric Auger
This patchs adds the skeleton for the virtio-iommu device.

Signed-off-by: Eric Auger 

---

v9 -> v10:
- expose VIRTIO_IOMMU_F_MMIO feature
- s/domain_bits/domain_range struct
- change error codes
- enforce unmigratable
- Kconfig

v7 -> v8:
- expose VIRTIO_IOMMU_F_BYPASS and VIRTIO_F_VERSION_1
  features
- set_config dummy implementation + tracing
- add trace in get_features
- set the features on realize() and store the acked ones
- remove inclusion of linux/virtio_iommu.h

v6 -> v7:
- removed qapi-event.h include
- add primary_bus and associated property

v4 -> v5:
- use the new v0.5 terminology (domain, endpoint)
- add the event virtqueue

v3 -> v4:
- use page_size_mask instead of page_sizes
- added set_features()
- added some traces (reset, set_status, set_features)
- empty virtio_iommu_set_config() as the driver MUST NOT
  write to device configuration fields
- add get_config trace

v2 -> v3:
- rebase on 2.10-rc0, ie. use IOMMUMemoryRegion and remove
  iommu_ops.
- advertise VIRTIO_IOMMU_F_MAP_UNMAP feature
- page_sizes set to TARGET_PAGE_SIZE

Conflicts:
hw/virtio/trace-events
---
 hw/virtio/Kconfig|   5 +
 hw/virtio/Makefile.objs  |   1 +
 hw/virtio/trace-events   |   8 +
 hw/virtio/virtio-iommu.c | 267 +++
 include/hw/virtio/virtio-iommu.h |  62 +++
 5 files changed, 343 insertions(+)
 create mode 100644 hw/virtio/virtio-iommu.c
 create mode 100644 include/hw/virtio/virtio-iommu.h

diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
index 3724ff8bac..a30107b439 100644
--- a/hw/virtio/Kconfig
+++ b/hw/virtio/Kconfig
@@ -6,6 +6,11 @@ config VIRTIO_RNG
 default y
 depends on VIRTIO
 
+config VIRTIO_IOMMU
+bool
+default y
+depends on VIRTIO
+
 config VIRTIO_PCI
 bool
 default y if PCI_DEVICES
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index 964ce78607..f42e4dd94f 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -14,6 +14,7 @@ obj-$(CONFIG_VIRTIO_CRYPTO) += virtio-crypto.o
 obj-$(call land,$(CONFIG_VIRTIO_CRYPTO),$(CONFIG_VIRTIO_PCI)) += 
virtio-crypto-pci.o
 obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o
 common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += 
virtio-pmem-pci.o
+obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
 obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o
 
 ifeq ($(CONFIG_VIRTIO_PCI),y)
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index e28ba48da6..f7dac39213 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -53,3 +53,11 @@ virtio_mmio_write_offset(uint64_t offset, uint64_t value) 
"virtio_mmio_write off
 virtio_mmio_guest_page(uint64_t size, int shift) "guest page size 0x%" PRIx64 
" shift %d"
 virtio_mmio_queue_write(uint64_t value, int max_size) "mmio_queue write 0x%" 
PRIx64 " max %d"
 virtio_mmio_setting_irq(int level) "virtio_mmio setting IRQ %d"
+
+# hw/virtio/virtio-iommu.c
+virtio_iommu_device_reset(void) "reset!"
+virtio_iommu_get_features(uint64_t features) "device supports 
features=0x%"PRIx64
+virtio_iommu_set_features(uint64_t features) "features accepted by the driver 
=0x%"PRIx64
+virtio_iommu_device_status(uint8_t status) "driver status = %d"
+virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, 
uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" 
start=0x%"PRIx64" end=0x%"PRIx64" domain_range=%d probe_size=0x%x"
+virtio_iommu_set_config(uint64_t page_size_mask, uint64_t start, uint64_t end, 
uint32_t domain_range, uint32_t probe_size) "page_size_mask=0x%"PRIx64" 
start=0x%"PRIx64" end=0x%"PRIx64" domain_bits=%d probe_size=0x%x"
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
new file mode 100644
index 00..f239954396
--- /dev/null
+++ b/hw/virtio/virtio-iommu.c
@@ -0,0 +1,267 @@
+/*
+ * virtio-iommu device
+ *
+ * Copyright (c) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "qemu-common.h"
+#include "hw/virtio/virtio.h"
+#include "sysemu/kvm.h"
+#include "trace.h"
+
+#include "standard-headers/linux/virtio_ids.h"
+
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include "hw/virtio/virtio-iommu.h"
+
+/* Max size */
+#define VIOMMU_DEFAULT_QUEUE_SIZE 256
+
+static int virtio_iommu_handle_attach(VirtIOIOMMU *s,
+