Re: [PATCH V3 2/2] xen: privcmd: Add support for irqfd

2023-07-25 Thread Juergen Gross

On 25.07.23 08:47, Viresh Kumar wrote:

Xen provides support for injecting interrupts to the guests via the
HYPERVISOR_dm_op() hypercall. The same is used by the Virtio based
device backend implementations, in an inefficient manner currently.

Generally, the Virtio backends are implemented to work with the Eventfd
based mechanism. In order to make such backends work with Xen, another
software layer needs to poll the Eventfds and raise an interrupt to the
guest using the Xen based mechanism. This results in an extra context
switch.

This is not a new problem in Linux though. It is present with other
hypervisors like KVM, etc. as well. The generic solution implemented in
the kernel for them is to provide an IOCTL call to pass the interrupt
details and eventfd, which lets the kernel take care of polling the
eventfd and raising of the interrupt, instead of handling this in user
space (which involves an extra context switch).

This patch adds support to inject a specific interrupt to guest using
the eventfd mechanism, by preventing the extra context switch.

Inspired by existing implementations for KVM, etc..

Signed-off-by: Viresh Kumar 
---
V2.1->V3
- No changes

V2->V2.1
- Select EVENTFD from Kconfig

V1->V2:
- Improve error handling.
- Remove the unnecessary usage of list_for_each_entry_safe().
- Restrict the use of XEN_DMOP_set_irq_level to only ARM64.

  drivers/xen/Kconfig|   1 +
  drivers/xen/privcmd.c  | 276 -
  include/uapi/xen/privcmd.h |  14 ++
  3 files changed, 289 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index d5d7c402b651..7967393c55a4 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -261,6 +261,7 @@ config XEN_SCSI_BACKEND
  config XEN_PRIVCMD
tristate "Xen hypercall passthrough driver"
depends on XEN
+   select EVENTFD


I don't like this. Can we maybe add another bool config item depending on
XEN_PRIVCMD, EVENTFD and XEN_VIRTIO, which can then be used to guard the
code additions to privcmd.c?

This would avoid adding additional code for everyone.


Juergen


OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature


[PATCH V3 2/2] xen: privcmd: Add support for irqfd

2023-07-24 Thread Viresh Kumar
Xen provides support for injecting interrupts to the guests via the
HYPERVISOR_dm_op() hypercall. The same is used by the Virtio based
device backend implementations, in an inefficient manner currently.

Generally, the Virtio backends are implemented to work with the Eventfd
based mechanism. In order to make such backends work with Xen, another
software layer needs to poll the Eventfds and raise an interrupt to the
guest using the Xen based mechanism. This results in an extra context
switch.

This is not a new problem in Linux though. It is present with other
hypervisors like KVM, etc. as well. The generic solution implemented in
the kernel for them is to provide an IOCTL call to pass the interrupt
details and eventfd, which lets the kernel take care of polling the
eventfd and raising of the interrupt, instead of handling this in user
space (which involves an extra context switch).

This patch adds support to inject a specific interrupt to guest using
the eventfd mechanism, by preventing the extra context switch.

Inspired by existing implementations for KVM, etc..

Signed-off-by: Viresh Kumar 
---
V2.1->V3
- No changes

V2->V2.1
- Select EVENTFD from Kconfig

V1->V2:
- Improve error handling.
- Remove the unnecessary usage of list_for_each_entry_safe().
- Restrict the use of XEN_DMOP_set_irq_level to only ARM64.

 drivers/xen/Kconfig|   1 +
 drivers/xen/privcmd.c  | 276 -
 include/uapi/xen/privcmd.h |  14 ++
 3 files changed, 289 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index d5d7c402b651..7967393c55a4 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -261,6 +261,7 @@ config XEN_SCSI_BACKEND
 config XEN_PRIVCMD
tristate "Xen hypercall passthrough driver"
depends on XEN
+   select EVENTFD
default m
help
  The hypercall passthrough driver allows privileged user programs to
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index e2f580e30a86..0debc5482253 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -9,11 +9,16 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
+#include 
+#include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -833,6 +838,257 @@ static long privcmd_ioctl_mmap_resource(struct file *file,
return rc;
 }
 
+/* Irqfd support */
+static struct workqueue_struct *irqfd_cleanup_wq;
+static DEFINE_MUTEX(irqfds_lock);
+static LIST_HEAD(irqfds_list);
+
+struct privcmd_kernel_irqfd {
+   domid_t dom;
+   u8 level;
+   bool error;
+   u32 irq;
+   struct eventfd_ctx *eventfd;
+   struct work_struct shutdown;
+   wait_queue_entry_t wait;
+   struct list_head list;
+   poll_table pt;
+};
+
+static void irqfd_deactivate(struct privcmd_kernel_irqfd *kirqfd)
+{
+   lockdep_assert_held(&irqfds_lock);
+
+   list_del_init(&kirqfd->list);
+   queue_work(irqfd_cleanup_wq, &kirqfd->shutdown);
+}
+
+static void irqfd_shutdown(struct work_struct *work)
+{
+   struct privcmd_kernel_irqfd *kirqfd =
+   container_of(work, struct privcmd_kernel_irqfd, shutdown);
+   u64 cnt;
+
+   eventfd_ctx_remove_wait_queue(kirqfd->eventfd, &kirqfd->wait, &cnt);
+   eventfd_ctx_put(kirqfd->eventfd);
+   kfree(kirqfd);
+}
+
+static void irqfd_inject(struct privcmd_kernel_irqfd *kirqfd)
+{
+   /* Different architectures support this differently */
+   struct xen_dm_op dm_op = {
+#ifdef CONFIG_ARM64
+   .op = XEN_DMOP_set_irq_level,
+   .u.set_irq_level.irq = kirqfd->irq,
+   .u.set_irq_level.level = kirqfd->level,
+#endif
+   };
+   struct xen_dm_op_buf xbufs = {
+   .size = sizeof(dm_op),
+   };
+   u64 cnt;
+   long rc;
+
+   eventfd_ctx_do_read(kirqfd->eventfd, &cnt);
+   set_xen_guest_handle(xbufs.h, &dm_op);
+
+   xen_preemptible_hcall_begin();
+   rc = HYPERVISOR_dm_op(kirqfd->dom, 1, &xbufs);
+   xen_preemptible_hcall_end();
+
+   /* Don't repeat the error message for consecutive failures */
+   if (rc && !kirqfd->error) {
+   pr_err("Failed to configure irq: %d to level: %d for guest 
domain: %d\n",
+  kirqfd->irq, kirqfd->level, kirqfd->dom);
+   }
+
+   kirqfd->error = !!rc;
+}
+
+static int
+irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
+{
+   struct privcmd_kernel_irqfd *kirqfd =
+   container_of(wait, struct privcmd_kernel_irqfd, wait);
+   __poll_t flags = key_to_poll(key);
+
+   if (flags & EPOLLIN)
+   irqfd_inject(kirqfd);
+
+   if (flags & EPOLLHUP) {
+   mutex_lock(&irqfds_lock);
+   irqfd_deactivate(kirqfd);
+   mutex_unlock(&irqfds_lock);
+   }
+
+   return 0;
+}
+
+static void
+irqfd_poll_func(struct file *file,