The new flag allows passing a connected socket instead of an
eventfd to be notified of writes or reads to the specified memory region.

Instead of signaling an event, On write - the value written to the memory
region is written to the pipe.
On read - a notification of the read is sent to the host, and a response
is expected with the value to be 'read'.

Using a socket instead of an eventfd is usefull when any value can be
written to the memory region but we're interested in recieving the
actual value instead of just a notification.

A simple example for practical use is the serial port. we are not
interested in an exit every time a char is written to the port, but
we do need to know what was written so we could handle it on the guest.

Cc: Avi Kivity <a...@redhat.com>
Cc: Ingo Molnar <mi...@elte.hu>
Cc: Marcelo Tosatti <mtosa...@redhat.com>
Cc: Michael S. Tsirkin <m...@redhat.com>
Cc: Pekka Enberg <penb...@kernel.org>
Signed-off-by: Sasha Levin <levinsasha...@gmail.com>
---
 Documentation/virtual/kvm/api.txt |   18 ++++-
 include/linux/kvm.h               |    9 ++
 virt/kvm/eventfd.c                |  153 ++++++++++++++++++++++++++++++++-----
 3 files changed, 161 insertions(+), 19 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 317d86a..74f0946 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1330,7 +1330,7 @@ Returns: 0 on success, !0 on error
 
 This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
 within the guest.  A guest write in the registered address will signal the
-provided event instead of triggering an exit.
+provided event or write to the provided socket instead of triggering an exit.
 
 struct kvm_ioeventfd {
        __u64 datamatch;
@@ -1341,6 +1341,13 @@ struct kvm_ioeventfd {
        __u8  pad[36];
 };
 
+struct kvm_ioeventfd_data {
+       __u64 data;
+       __u64 addr;
+       __u32 len;
+       __u8  is_write;
+};
+
 The following flags are defined:
 
 #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
@@ -1348,6 +1355,7 @@ The following flags are defined:
 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
 #define KVM_IOEVENTFD_FLAG_READ      (1 << kvm_ioeventfd_flag_nr_read)
 #define KVM_IOEVENTFD_FLAG_NOWRITE   (1 << kvm_ioeventfd_flag_nr_nowrite)
+#define KVM_IOEVENTFD_FLAG_SOCKET    (1 << kvm_ioeventfd_flag_nr_socket)
 
 If datamatch flag is set, the event will be signaled only if the written value
 to the registered address is equal to datamatch in struct kvm_ioeventfd.
@@ -1359,6 +1367,14 @@ passed in datamatch.
 If the nowrite flag is set, the event won't be signaled when the specified 
address
 is being written to.
 
+If the socket flag is set, fd is expected to be a connected AF_UNIX
+SOCK_SEQPACKET socket. Once a guest write in the registered address is
+detected - a struct kvm_ioeventfd_data which describes the write will be
+written to the socket.
+On read, struct kvm_ioeventfd_data will be written with 'is_write = 0', and
+would wait for a response with a struct kvm_ioeventfd_data containing the
+value which should be 'read' by the guest.
+
 
 5. The kvm_run structure
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8a12711..ff3d808 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -389,6 +389,7 @@ enum {
        kvm_ioeventfd_flag_nr_deassign,
        kvm_ioeventfd_flag_nr_read,
        kvm_ioeventfd_flag_nr_nowrite,
+       kvm_ioeventfd_flag_nr_socket,
        kvm_ioeventfd_flag_nr_max,
 };
 
@@ -397,6 +398,7 @@ enum {
 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
 #define KVM_IOEVENTFD_FLAG_READ      (1 << kvm_ioeventfd_flag_nr_read)
 #define KVM_IOEVENTFD_FLAG_NOWRITE   (1 << kvm_ioeventfd_flag_nr_nowrite)
+#define KVM_IOEVENTFD_FLAG_SOCKET    (1 << kvm_ioeventfd_flag_nr_socket)
 
 #define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
 
@@ -409,6 +411,13 @@ struct kvm_ioeventfd {
        __u8  pad[36];
 };
 
+struct kvm_ioeventfd_data {
+       __u64 data;
+       __u64 addr;
+       __u32 len;
+       __u8  is_write;
+};
+
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {
        /* in */
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 5f2d203..d1d63b3 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -32,6 +32,7 @@
 #include <linux/eventfd.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/net.h>
 
 #include "iodev.h"
 
@@ -413,10 +414,11 @@ module_exit(irqfd_module_exit);
 
 /*
  * --------------------------------------------------------------------
- * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
+ * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal or
+ *            a socket write.
  *
- * userspace can register a PIO/MMIO address with an eventfd for receiving
- * notification when the memory has been touched.
+ * userspace can register a PIO/MMIO address with an eventfd or a
+ * socket for receiving notification when the memory has been touched.
  * --------------------------------------------------------------------
  */
 
@@ -424,7 +426,10 @@ struct _ioeventfd {
        struct list_head     list;
        u64                  addr;
        int                  length;
-       struct eventfd_ctx  *eventfd;
+       union {
+               struct socket       *sock;
+               struct eventfd_ctx  *eventfd;
+       };
        u64                  datamatch;
        struct kvm_io_device dev;
        bool                 wildcard;
@@ -441,7 +446,11 @@ to_ioeventfd(struct kvm_io_device *dev)
 static void
 ioeventfd_release(struct _ioeventfd *p)
 {
-       eventfd_ctx_put(p->eventfd);
+       if (p->eventfd)
+               eventfd_ctx_put(p->eventfd);
+       else
+               sockfd_put(p->sock);
+
        list_del(&p->list);
        kfree(p);
 }
@@ -510,12 +519,65 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int 
len, const void *val)
        return _val == p->datamatch ? true : false;
 }
 
+static ssize_t socket_write(struct socket *sock, const void *buf, size_t count)
+{
+       mm_segment_t old_fs;
+       ssize_t res;
+       struct msghdr msg;
+       struct iovec iov;
+
+       iov = (struct iovec) {
+               .iov_base = (void *)buf,
+               .iov_len  = count,
+       };
+
+       msg = (struct msghdr) {
+               .msg_iov = &iov,
+               .msg_iovlen = 1,
+       };
+
+       old_fs = get_fs();
+       set_fs(get_ds());
+       /* The cast to a user pointer is valid due to the set_fs() */
+       res = sock_sendmsg(sock, &msg, count);
+       set_fs(old_fs);
+
+       return res;
+}
+
+static ssize_t socket_read(struct socket *sock, void *buf, size_t count)
+{
+       mm_segment_t old_fs;
+       ssize_t res;
+       struct msghdr msg;
+       struct iovec iov;
+
+       iov = (struct iovec) {
+               .iov_base = (void *)buf,
+               .iov_len  = count,
+       };
+
+       msg = (struct msghdr) {
+               .msg_iov = &iov,
+               .msg_iovlen = 1,
+       };
+
+       old_fs = get_fs();
+       set_fs(get_ds());
+       /* The cast to a user pointer is valid due to the set_fs() */
+       res = sock_recvmsg(sock, &msg, count, 0);
+       set_fs(old_fs);
+
+       return res;
+}
+
 /* MMIO/PIO writes trigger an event if the addr/val match */
 static int
 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
                const void *val)
 {
        struct _ioeventfd *p = to_ioeventfd(this);
+       struct kvm_ioeventfd_data data;
 
        /* Exit if signaling on writes isn't requested */
        if (!p->track_writes)
@@ -524,7 +586,18 @@ ioeventfd_write(struct kvm_io_device *this, gpa_t addr, 
int len,
        if (!ioeventfd_in_range(p, addr, len, val))
                return -EOPNOTSUPP;
 
-       eventfd_signal(p->eventfd, 1);
+       data = (struct kvm_ioeventfd_data) {
+               .data = get_val(val, len),
+               .addr = addr,
+               .len = len,
+               .is_write = 1,
+       };
+
+       if (p->sock)
+               socket_write(p->sock, &data, sizeof(data));
+       else
+               eventfd_signal(p->eventfd, 1);
+
        return 0;
 }
 
@@ -534,6 +607,7 @@ ioeventfd_read(struct kvm_io_device *this, gpa_t addr, int 
len,
                void *val)
 {
        struct _ioeventfd *p = to_ioeventfd(this);
+       struct kvm_ioeventfd_data data;
 
        /* Exit if signaling on reads isn't requested */
        if (!p->track_reads)
@@ -542,7 +616,21 @@ ioeventfd_read(struct kvm_io_device *this, gpa_t addr, int 
len,
        if (!ioeventfd_in_range(p, addr, len, val))
                return -EOPNOTSUPP;
 
-       eventfd_signal(p->eventfd, 1);
+       data = (struct kvm_ioeventfd_data) {
+               .addr = addr,
+               .len = len,
+               .is_write = 0,
+       };
+
+       if (p->sock) {
+               socket_write(p->sock, &data, sizeof(data));
+               socket_read(p->sock, &data, sizeof(data));
+               set_val(val, len, data.data);
+       } else {
+               set_val(val, len, p->datamatch);
+               eventfd_signal(p->eventfd, 1);
+       }
+
        return 0;
 }
 
@@ -585,7 +673,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd 
*args)
        int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
        enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
        struct _ioeventfd        *p;
-       struct eventfd_ctx       *eventfd;
+       struct eventfd_ctx       *eventfd = NULL;
        int                       ret;
 
        /* check for range overflow */
@@ -596,10 +684,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd 
*args)
        if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
                return -EINVAL;
 
-       eventfd = eventfd_ctx_fdget(args->fd);
-       if (IS_ERR(eventfd))
-               return PTR_ERR(eventfd);
-
        p = kzalloc(sizeof(*p), GFP_KERNEL);
        if (!p) {
                ret = -ENOMEM;
@@ -611,6 +695,20 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd 
*args)
        p->length  = args->len;
        p->eventfd = eventfd;
 
+       if (args->flags & KVM_IOEVENTFD_FLAG_SOCKET) {
+               ret = 0;
+               p->sock = sockfd_lookup(args->fd, &ret);
+               if (ret)
+                       goto fail;
+       } else {
+               ret = -EINVAL;
+               eventfd = eventfd_ctx_fdget(args->fd);
+               if (IS_ERR(eventfd))
+                       goto fail;
+
+               p->eventfd = eventfd;
+       }
+
        /* The datamatch feature is optional, otherwise this is a wildcard */
        if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
                p->datamatch = args->datamatch;
@@ -649,8 +747,14 @@ unlock_fail:
        mutex_unlock(&kvm->slots_lock);
 
 fail:
+       if (eventfd)
+               eventfd_ctx_put(eventfd);
+
+       if (p->sock)
+               sockfd_put(p->sock);
+
+
        kfree(p);
-       eventfd_ctx_put(eventfd);
 
        return ret;
 }
@@ -661,12 +765,21 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct 
kvm_ioeventfd *args)
        int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
        enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
        struct _ioeventfd        *p, *tmp;
-       struct eventfd_ctx       *eventfd;
+       struct eventfd_ctx       *eventfd = NULL;
+       struct socket            *sock = NULL;
        int                       ret = -ENOENT;
 
-       eventfd = eventfd_ctx_fdget(args->fd);
-       if (IS_ERR(eventfd))
-               return PTR_ERR(eventfd);
+       if (args->flags & KVM_IOEVENTFD_FLAG_SOCKET) {
+               ret = 0;
+               sock = sockfd_lookup(args->fd, &ret);
+               if (ret)
+                       return PTR_ERR(sock);
+       } else {
+               ret = -EINVAL;
+               eventfd = eventfd_ctx_fdget(args->fd);
+               if (IS_ERR(eventfd))
+                       return PTR_ERR(eventfd);
+       }
 
        mutex_lock(&kvm->slots_lock);
 
@@ -674,6 +787,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct 
kvm_ioeventfd *args)
                bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
 
                if (p->eventfd != eventfd  ||
+                   p->sock != sock        ||
                    p->addr != args->addr  ||
                    p->length != args->len ||
                    p->wildcard != wildcard)
@@ -690,7 +804,10 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct 
kvm_ioeventfd *args)
 
        mutex_unlock(&kvm->slots_lock);
 
-       eventfd_ctx_put(eventfd);
+       if (eventfd)
+               eventfd_ctx_put(eventfd);
+       if (sock)
+               sockfd_put(sock);
 
        return ret;
 }
-- 
1.7.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to