From: Nir Peleg <[EMAIL PROTECTED]>
From: Or Sagi <[EMAIL PROTECTED]>

When using the --no-kvm-irqchip option, this irqhook module injects
interrupts into the guests for assigned devices.

This module is not well-supported and only exists for debugging and
for legacy / non-x86 support.

Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
---
 Makefile                  |   10 ++-
 irqhook/Kbuild            |    3 +
 irqhook/Makefile          |   25 +++++
 irqhook/irqhook_main.c    |  215 +++++++++++++++++++++++++++++++++++++++++++++
 qemu/hw/apic.c            |    4 +
 qemu/hw/pci-passthrough.c |  171 ++++++++++++++++++++++++++++++++++--
 qemu/hw/pci-passthrough.h |    1 +
 qemu/vl.c                 |    4 +-
 8 files changed, 421 insertions(+), 12 deletions(-)
 create mode 100644 irqhook/Kbuild
 create mode 100644 irqhook/Makefile
 create mode 100644 irqhook/irqhook_main.c

diff --git a/Makefile b/Makefile
index 48a8dff..d4246fd 100644
--- a/Makefile
+++ b/Makefile
@@ -7,16 +7,16 @@ rpmrelease = devel
 
 sane-arch = $(subst i386,x86,$(subst x86_64,x86,$(ARCH)))
 
-.PHONY: kernel user libkvm qemu bios vgabios extboot clean libfdt
+.PHONY: kernel irqhook user libkvm qemu bios vgabios extboot clean libfdt
 
 all: libkvm qemu
 ifneq '$(filter $(ARCH), x86_64 i386 ia64)' ''
-    all: $(if $(WANT_MODULE), kernel) user
+    all: $(if $(WANT_MODULE), kernel irqhook) user
 endif
 
 kcmd = $(if $(WANT_MODULE),,@\#)
 
-qemu kernel user libkvm:
+qemu kernel user irqhook libkvm:
        $(MAKE) -C $@
 
 qemu: libkvm
@@ -77,6 +77,7 @@ install-rpm:
 
 install:
        $(kcmd)make -C kernel DESTDIR="$(DESTDIR)" install
+       $(kcmd)make -C irqhook DESTDIR="$(DESTDIR)" install
        make -C libkvm DESTDIR="$(DESTDIR)" install
        make -C qemu DESTDIR="$(DESTDIR)" install
 
@@ -97,6 +98,7 @@ srpm:
        tar czf $(RPMTOPDIR)/SOURCES/user.tar.gz user
        tar czf $(RPMTOPDIR)/SOURCES/libkvm.tar.gz libkvm
        tar czf $(RPMTOPDIR)/SOURCES/kernel.tar.gz kernel
+       tar czf $(RPMTOPDIR)/SOURCES/irqhook.tar.gz irqhook
        tar czf $(RPMTOPDIR)/SOURCES/scripts.tar.gz scripts
        tar czf $(RPMTOPDIR)/SOURCES/extboot.tar.gz extboot
        cp Makefile configure kvm_stat $(RPMTOPDIR)/SOURCES
@@ -104,7 +106,7 @@ srpm:
        $(RM) $(tmpspec)
 
 clean:
-       for i in $(if $(WANT_MODULE), kernel) user libkvm qemu libfdt; do \
+       for i in $(if $(WANT_MODULE), kernel irqhook) user libkvm qemu libfdt; 
do \
                make -C $$i clean; \
        done
 
diff --git a/irqhook/Kbuild b/irqhook/Kbuild
new file mode 100644
index 0000000..9af75a4
--- /dev/null
+++ b/irqhook/Kbuild
@@ -0,0 +1,3 @@
+EXTRA_CFLAGS := -I$(src)/include
+obj-m := irqhook.o
+irqhook-objs := irqhook_main.o
diff --git a/irqhook/Makefile b/irqhook/Makefile
new file mode 100644
index 0000000..3b1d851
--- /dev/null
+++ b/irqhook/Makefile
@@ -0,0 +1,25 @@
+include ../config.mak
+
+KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR))
+
+DESTDIR=
+
+INSTALLDIR = $(patsubst %/build,%/extra,$(KERNELDIR))
+
+rpmrelease = devel
+
+LINUX = ../linux-2.6
+
+all::
+       $(MAKE) -C $(KERNELDIR) M=`pwd` "$$@"
+
+#sync:
+#      rsync --exclude='*.mod.c' "$(LINUX)"/drivers/irqhook/*.[ch] .
+
+install:
+       mkdir -p $(DESTDIR)/$(INSTALLDIR)
+       cp *.ko $(DESTDIR)/$(INSTALLDIR)
+       /sbin/depmod -a
+
+clean:
+       $(MAKE) -C $(KERNELDIR) M=`pwd` $@
diff --git a/irqhook/irqhook_main.c b/irqhook/irqhook_main.c
new file mode 100644
index 0000000..0f93d17
--- /dev/null
+++ b/irqhook/irqhook_main.c
@@ -0,0 +1,215 @@
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/bitmap.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/miscdevice.h>
+#include <linux/pci.h>
+
+#include <asm/uaccess.h>
+
+#define irqh_VERSION "0.0.1"
+#define irqh_MODULE_NAME "irqhook"
+#define irqh_DRIVER_NAME   irqh_MODULE_NAME " HW IRQ hook " irqh_VERSION
+
+// based on earlier proprietary Tutis code; this modified version goes under 
GPL
+MODULE_AUTHOR("Nir Peleg - Tutis");
+MODULE_DESCRIPTION("IRQ hook driver");
+MODULE_LICENSE("GPL");
+
+//#define irqh_DEBUG /* define to enable copious debugging info */
+
+#ifdef irqh_DEBUG
+#define DPRINTK(fmt, args...) printk("<1>" "%s: " fmt, __FUNCTION__ , ## args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+#define ERROR(fmt, args...) printk("<1>" "%s: " fmt, __FUNCTION__ , ## args)
+
+static spinlock_t irqh_lock;
+static wait_queue_head_t irqh_proc_list;
+
+static DECLARE_BITMAP(pending, NR_IRQS);
+static DECLARE_BITMAP(handled, NR_IRQS);
+
+#define irqh_on(which, bit)    test_bit(bit, which)
+#define irqh_set(which, bit)   set_bit(bit, which)
+#define irqh_clear(which, bit) clear_bit(bit, which)
+#define irqh_ffs(which)                find_first_bit(which, NR_IRQS)
+
+static irqreturn_t
+irqh_interrupt(int irq, void *p)
+{
+       unsigned long flags;
+
+       DPRINTK("interrupt: %d\n", irq);
+       if (!irqh_on(handled, irq))
+               return IRQ_HANDLED;
+       spin_lock_irqsave(&irqh_lock, flags);
+       irqh_set(pending, irq);
+       wake_up_interruptible(&irqh_proc_list);
+       spin_unlock_irqrestore(&irqh_lock, flags);
+       disable_irq_nosync(irq);
+       return IRQ_HANDLED;
+}
+
+static ssize_t
+irqh_dev_write(struct file *fp, const char *buf, size_t size, loff_t *offp)
+{
+       int n, device, func, devfn;
+       char arg[32], *cp, *cp1;
+       struct pci_dev *pdp = 0;
+
+       DPRINTK("ENTER\n");
+       if ((fp->f_mode & FMODE_WRITE) == 0 || size > sizeof arg)
+               return -EINVAL;
+
+       if (size >= sizeof arg || copy_from_user(arg, buf, size))
+               return -EFAULT;
+       arg[size] = 0;
+       cp = arg + (arg[0] == '+' || arg[0] == '-');
+       n = simple_strtol(cp, &cp1, 0);
+       if (*cp1 == ':') {
+               device = simple_strtol(cp1+1, &cp1, 0);
+               func = simple_strtol(cp1+1, NULL, 0);
+               DPRINTK("PCI dev %d:%d.%d\n", n, device, func);
+               devfn = PCI_DEVFN(device, func);
+               for_each_pci_dev(pdp) {
+                       if (pdp->bus->number == n && pdp->devfn == devfn) {
+                               n = pdp->irq;
+                               goto found;
+                       }
+               }
+               ERROR("PCI device not found\n");
+               return -ENOENT;
+       }
+    found:
+       DPRINTK("IRQ %d\n", n);
+       if (arg[0] == '+') {
+               if (pdp) {
+                       if (pci_enable_device(pdp))
+                               ERROR("device not enabled\n");
+                       if ((unsigned)(n = pdp->irq) >= NR_IRQS) {
+                               ERROR("device has invalid IRQ set\n");
+                               return -EINVAL;
+                       }
+               }
+               if (irqh_on(handled, n))
+                       return -EBUSY;
+               if (request_irq(n, irqh_interrupt, IRQF_SHARED, 
irqh_MODULE_NAME, (void *)irqh_interrupt)) {
+                       ERROR("request_irq failed\n");
+                       return -EIO;
+               }
+               printk("Bound machine irq %d\n", n);
+               irqh_set(handled, n);
+               goto done;
+       }
+       if ((unsigned)n >= NR_IRQS)
+               return -EINVAL;
+       if (arg[0] == '-') {
+               if (pdp)
+                       pci_disable_device(pdp);
+               free_irq(n, (void *)irqh_interrupt);
+               irqh_clear(handled, n);
+       } else
+               enable_irq(n);
+
+    done:
+       DPRINTK("DONE\n");
+       return size;
+}
+
+static ssize_t
+irqh_dev_read(struct file *fp, char *buf, size_t size, loff_t *offp)
+{
+       char b[20];
+       int m = -ERESTARTSYS, n;
+
+       DECLARE_WAITQUEUE(wait, current);
+
+       DPRINTK("ENTER\n");
+       if ((fp->f_mode & FMODE_READ) == 0)
+               return -EINVAL;
+       spin_lock_irq(&irqh_lock);
+       while (!signal_pending(current)) {
+               if ((n = irqh_ffs(pending)) < NR_IRQS) {
+                       if ((m = sprintf(b, "%d", n) + 1) > size)
+                               m = size;
+                       if (copy_to_user(buf, b, m))
+                               m = -EFAULT;
+                       else
+                               irqh_clear(pending, n);
+                       break;
+               }
+               if (fp->f_flags & O_NONBLOCK) {
+                       m = -EWOULDBLOCK;
+                       break;
+               }
+               add_wait_queue(&irqh_proc_list, &wait);
+               set_current_state(TASK_INTERRUPTIBLE);
+               spin_unlock_irq(&irqh_lock);
+               schedule();
+               spin_lock_irq(&irqh_lock);
+               current->state = TASK_RUNNING;
+               remove_wait_queue(&irqh_proc_list, &wait);
+       }
+       spin_unlock_irq(&irqh_lock);
+       return m;
+}
+
+static struct file_operations irqh_chrdev_ops = {
+       owner:          THIS_MODULE,
+       read:           irqh_dev_read,
+       write:          irqh_dev_write,
+};
+
+#define        irqh_MISCDEV_MINOR      MISC_DYNAMIC_MINOR
+
+static struct miscdevice irqh_miscdev = {
+       irqh_MISCDEV_MINOR,
+       irqh_MODULE_NAME,
+       &irqh_chrdev_ops,
+};
+
+static int __init
+irqh_init(void)
+{
+       int rc;
+
+       DPRINTK("ENTER\n");
+
+       if ((rc = misc_register(&irqh_miscdev))) {
+               printk(KERN_ERR irqh_MODULE_NAME ": " "cannot register misc 
device\n");
+               DPRINTK("EXIT, returning %d\n", rc);
+               return rc;
+       }
+
+       printk(KERN_INFO irqh_DRIVER_NAME " loaded\n");
+
+       init_waitqueue_head(&irqh_proc_list);
+       spin_lock_init(&irqh_lock);
+
+       DPRINTK("EXIT, returning 0\n");
+       return 0;
+}
+
+static void __exit
+irqh_cleanup(void)
+{
+       int n;
+
+       DPRINTK("ENTER\n");
+       
+       while ((n = irqh_ffs(handled)) < NR_IRQS) {
+               irqh_clear(handled, n);
+               free_irq(n, (void *)irqh_interrupt);
+       }
+       misc_deregister (&irqh_miscdev);
+
+       DPRINTK("EXIT\n");
+}
+
+module_init (irqh_init);
+module_exit (irqh_cleanup);
diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c
index 4ebf1ff..7d45385 100644
--- a/qemu/hw/apic.c
+++ b/qemu/hw/apic.c
@@ -23,6 +23,8 @@
 
 #include "qemu-kvm.h"
 
+#include "pci-passthrough.h"
+
 //#define DEBUG_APIC
 //#define DEBUG_IOAPIC
 
@@ -389,6 +391,7 @@ static void apic_eoi(APICState *s)
     /* XXX: send the EOI packet to the APIC bus to allow the I/O APIC to
             set the remote IRR bit for level triggered interrupts. */
     apic_update_irq(s);
+    pt_ack_mirq(isrv);
 }
 
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
@@ -1144,6 +1147,7 @@ static void ioapic_mem_writel(void *opaque, 
target_phys_addr_t addr, uint32_t va
                     } else {
                         s->ioredtbl[index] &= ~0xffffffffULL;
                         s->ioredtbl[index] |= val;
+                        pt_set_vector(index, (val << 24) >> 24);
                     }
                     ioapic_service(s);
                 }
diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c
index 250d7ef..1cf1d0f 100644
--- a/qemu/hw/pci-passthrough.c
+++ b/qemu/hw/pci-passthrough.c
@@ -398,9 +398,11 @@ again:
        return 0;
 }
 
+static int pt_bind_mirq(int bus, int dev, int fn);
+
 static pt_dev_t *register_real_device(PCIBus *e_bus, const char *e_dev_name,
                                      int e_devfn, uint8_t r_bus, uint8_t r_dev,
-                                     uint8_t r_func)
+                                     uint8_t r_func, uint32_t machine_irq)
 {
        int rc;
        pt_dev_t *pci_dev;
@@ -435,10 +437,24 @@ static pt_dev_t *register_real_device(PCIBus *e_bus, 
const char *e_dev_name,
        e_intx = pci_dev->dev.config[0x3d] - 1;
        pci_dev->intpin = e_intx;
        pci_dev->run = 0;
+       pci_dev->mirq = machine_irq;
        pci_dev->girq = 0;
        pci_dev->h_busnr = r_bus;
        pci_dev->h_devfn = PCI_DEVFN(r_dev, r_func);
 
+       /* bind machine_irq to device */
+       if (machine_irq && (!kvm_enabled() || !qemu_kvm_irqchip_in_kernel())) {
+               DEBUG(logfile, "Binding mirq %u to device=0x%x intpin=0x%x\n",
+                       machine_irq, e_device, pci_dev->intpin);
+               rc = pt_bind_mirq(r_bus, r_dev, r_func);
+               if (rc) {
+                       fprintf(stderr, "pt_bind %d failed rc=%d\n",
+                               pci_dev->mirq, rc);
+                       return NULL;
+               }
+               sprintf(pci_dev->sirq, "%d", pci_dev->mirq);
+       }
+
 #ifdef KVM_CAP_PCI_PASSTHROUGH
        if (kvm_enabled()) {
                struct kvm_pci_passthrough_dev pci_pt_dev;
@@ -464,9 +480,9 @@ static pt_dev_t *register_real_device(PCIBus *e_bus, const 
char *e_dev_name,
        }
 #endif
 
-       fprintf(logfile, "Registered host PCI device %02x:%02x.%1x "
+       fprintf(logfile, "Registered host PCI device %02x:%02x.%1x-%u "
                "as guest device %02x:%02x.%1x\n",
-               r_bus, r_dev, r_func,
+               r_bus, r_dev, r_func, machine_irq,
                pci_bus_num(e_bus), e_device, r_func);
 
        return pci_dev;
@@ -478,6 +494,7 @@ struct {
        int bus;
        int dev;
        int func;
+       int irq;
        pt_dev_t *ptdev;
 } ptdevs[MAX_PTDEVS];
 
@@ -518,6 +535,62 @@ void pci_pt_update_irq(PCIDevice *d)
 }
 #endif
 
+static QEMUBH *ptbh;
+static int irqfd;
+static pt_dev_t **apicv[0xfe]; /* 0x10 - 0xfe according to intel IOAPIC spec */
+#define IRQHOOK_DEV "/dev/irqhook"
+static pthread_t irqthread;
+
+static void *pt_irq(void *arg)
+{
+       char buf[20];
+       int irq;
+       int i;
+       pt_dev_t *dev;
+       sigset_t signals;
+
+       sigfillset(&signals);
+       sigprocmask(SIG_BLOCK, &signals, NULL);
+
+       if (!irqfd) {
+               fprintf(stderr, "pt_irq: irqfd %d, exiting\n", irqfd);
+               exit(-1);
+       }
+
+       for (;;) {
+               if (read(irqfd, buf, 20) == -1) {
+                       if (errno == EINTR)
+                               continue;
+                       perror("irq read");
+                       break;
+               }
+
+               irq = atoi(buf);
+               DEBUG("read irq %d\n", irq);
+               if (!irq)
+                       continue;
+
+               for (i = 0; i < nptdevs; i++)
+                       if ((dev = ptdevs[i].ptdev) && dev->mirq == irq)
+                               dev->run = 1;
+               qemu_bh_schedule(ptbh);
+       }
+       return NULL;
+}
+
+static void pt_bh(void *p)
+{
+       int i;
+       pt_dev_t *dev;
+       for (i = 0; i < nptdevs; i++)
+               if ((dev = ptdevs[i].ptdev) && dev->run) {
+                       qemu_set_irq(dev->dev.irq[dev->intpin], 1);
+                       dev->run = 0;
+                       if (cpu_single_env)
+                               cpu_interrupt(cpu_single_env, 
CPU_INTERRUPT_EXIT);
+               }
+}
+
 int pt_init_system(void)
 {
        /* Do we have any devices to be assigned? */
@@ -526,6 +599,17 @@ int pt_init_system(void)
 
        iopl(3);
 
+       if (!kvm_enabled() || !qemu_kvm_irqchip_in_kernel()) {
+               if (!(ptbh = qemu_bh_new(pt_bh, 0))) {
+                       fprintf(stderr, "Couldn't register PT callback\n");
+                       return -1;
+               }
+               if (!(irqfd = open(IRQHOOK_DEV, O_RDWR))) {
+                       fprintf(stderr, "Couldn't open PT irqhook dev, make "
+                               "sure the irqhook module is loaded\n");
+                       return -1;
+               }
+       }
        return 0;
 }
 
@@ -544,7 +628,7 @@ int pt_init_device(PCIBus *bus, int *index)
 
        dev = register_real_device(bus, ptdevs[i].name, -1,
                                   ptdevs[i].bus, ptdevs[i].dev,
-                                  ptdevs[i].func);
+                                  ptdevs[i].func, ptdevs[i].irq);
        if (dev == NULL) {
                fprintf(stderr, "Error: Couldn't register device %s\n",
                        ptdevs[i].name);
@@ -552,13 +636,23 @@ int pt_init_device(PCIBus *bus, int *index)
        }
        ptdevs[i].ptdev = dev;
 
+       if (!*index && kvm_enabled() && !qemu_kvm_irqchip_in_kernel()) {
+               if (ptdevs[i].irq == 0) {
+                       fprintf(stderr, "Please specify the irq for the 
device\n");
+                       return -1;
+               }
+               if (pthread_create(&irqthread, 0, pt_irq, dev)) {
+                       fprintf(stderr, "Couldn't create IRQ thread\n");
+                       return -1;
+               }
+       }
        --*index;
        return ret;
 }
 
 void add_pci_passthrough_device(const char *arg)
 {
-       /* name/bus:dev.func */
+       /* name/bus:dev.func-intr */
        char *cp, *cp1;
 
        if (nptdevs >= MAX_PTDEVS) {
@@ -583,12 +677,75 @@ void add_pci_passthrough_device(const char *arg)
        cp = cp1 + 1;
 
        ptdevs[nptdevs].func = strtoul(cp, &cp1, 16);
-       if (*cp1 != 0)
+
+       /* In case of irqchip_in_kernel, we don't want the next param */
+       if (*cp1 == 0) {
+               ptdevs[nptdevs].irq = 0;
+               goto skip_irq;
+       }
+       if (*cp1 != '-')
                goto bad;
+       cp = cp1 + 1;
 
+       ptdevs[nptdevs].irq = strtoul(cp, &cp1, 0);
+       if (*cp1 != 0)
+               goto bad;
+skip_irq:
        nptdevs++;
        return;
 bad:
        fprintf(stderr, "passthrough arg (%s) not in the form of "
-               "name/bus:dev.func\n", arg);
+               "name/bus:dev.func-intr\n", arg);
+}
+
+void pt_ack_mirq(int vector)
+{
+       pt_dev_t **p = apicv[vector];
+       if (!p)
+               return;
+
+       for (; *p; *p++) {
+               write(irqfd, (*p)->sirq, strlen((*p)->sirq));
+               qemu_set_irq((*p)->dev.irq[(*p)->intpin], 0);
+       }
+}
+
+static int pt_bind_mirq(int bus, int dev, int fn)
+{
+       char s[64];
+       sprintf(s, "+%d:%d.%d", bus, dev, fn);
+       if (write(irqfd, s, strlen(s)) != strlen(s)) {
+               perror("pt_bind_mirq");
+               fprintf(stderr, "Make sure the irqhook module is loaded\n");
+               exit(-1);
+       }
+       return 0;
+}
+
+int piix3_get_pin(int pic_irq);
+
+void pt_set_vector(int irq, int vector)
+{
+       int i, j;
+       int pin = piix3_get_pin(irq);
+       pt_dev_t *pt, **p;
+
+       DEBUG("irq %d vector %d\n", irq, vector);
+       if (vector > 0xfe)
+               return;
+       for (i = 0; i < nptdevs; i++) {
+               pt = ptdevs[i].ptdev;
+               if (!pt || pt->bound)
+                       continue;
+               if (pci_map_irq(&pt->dev, pt->intpin) == pin) {
+                       for (j = 1, p = apicv[vector]; p; j++, *p++)
+                               ;
+                       apicv[vector] = realloc(apicv[vector], j * sizeof pt);
+                       p = &apicv[vector][j];
+                       *(p-1) = pt;
+                       *p = 0;
+                       pt->bound = 1;
+               }
+       }
+       DEBUG("done\n");
 }
diff --git a/qemu/hw/pci-passthrough.h b/qemu/hw/pci-passthrough.h
index 60df017..cd63482 100644
--- a/qemu/hw/pci-passthrough.h
+++ b/qemu/hw/pci-passthrough.h
@@ -75,6 +75,7 @@ typedef struct pt_dev_s {
        pt_region_t v_addrs[PCI_NUM_REGIONS];
        pci_dev_t real_device;
        int run;
+       int mirq;
        int girq;
        char sirq[4];
        unsigned char h_busnr;
diff --git a/qemu/vl.c b/qemu/vl.c
index 4946e9a..33decf5 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -7788,9 +7788,11 @@ static void help(int exitcode)
           "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n"
           "-no-kvm-pit     disable KVM kernel mode PIT\n"
 #if defined(TARGET_I386) || defined(TARGET_X86_64)
-          "-pcidevice name/bus:dev.func\n"
+          "-pcidevice name/bus:dev.func[-intr] \n"
           "                expose a PCI device to the guest OS.\n"
           "                'name' is just used for debug logs.\n"
+          "                [-intr] is the interrupt (from the lspci -v 
output),\n"
+          "                in case you use the irqhook module for interrupt 
routing.\n"
 #endif
 #endif
 #ifdef TARGET_I386
-- 
1.5.4.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to