Epoll based interrupt controller.

IMPROVES: IO loop performance - no per fd lookups, allowing for
15% IO speedup in minimal config going to 100s of % with many
devices - a N^N lookup is now replaced by a log(N)

ADDS: True Write IRQ functionality

OBSOLETES: The need to call reactivate_fd() in any driver which
has only read IRQ semantics. Write IRQs work, but will need to
be updated to use this fully.

Potentially (with a change in API) will allow both edge and level
IRQ semantics.

Pre-requisite for using packet mmap and multipacket read/write
which do not get along with poll() very well.

Signed-off-by/: Anton Ivanov <aiva...@brocade.com>
---
 arch/um/drivers/line.c            |   5 +-
 arch/um/drivers/mconsole_kern.c   |   2 -
 arch/um/drivers/net_kern.c        |   1 -
 arch/um/drivers/port_kern.c       |   1 -
 arch/um/drivers/random.c          |   1 -
 arch/um/drivers/ubd_kern.c        |   1 -
 arch/um/include/shared/irq_user.h |  24 ++-
 arch/um/include/shared/os.h       |  13 +-
 arch/um/kernel/irq.c              | 412 ++++++++++++++++++++++----------------
 arch/um/os-Linux/irq.c            | 145 +++++---------
 10 files changed, 321 insertions(+), 284 deletions(-)

diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 6208702..84384c8 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
@@ -283,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct 
line *line, void *data)
        if (err)
                return err;
        if (output)
-               err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
+               err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
                                     line_write_interrupt, IRQF_SHARED,
                                     driver->write_irq_name, data);
        return err;
@@ -666,8 +667,6 @@ static irqreturn_t winch_interrupt(int irq, void *data)
                tty_kref_put(tty);
        }
  out:
-       if (winch->fd != -1)
-               reactivate_fd(winch->fd, WINCH_IRQ);
        return IRQ_HANDLED;
 }
 
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 29880c9..5e8881c 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -95,7 +95,6 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id)
        }
        if (!list_empty(&mc_requests))
                schedule_work(&mconsole_work);
-       reactivate_fd(fd, MCONSOLE_IRQ);
        return IRQ_HANDLED;
 }
 
@@ -243,7 +242,6 @@ void mconsole_stop(struct mc_request *req)
                (*req->cmd->handler)(req);
        }
        os_set_fd_block(req->originating_fd, 0);
-       reactivate_fd(req->originating_fd, MCONSOLE_IRQ);
        mconsole_reply(req, "", 0, 0);
 }
 
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index f70dd54..82ea3a2 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -137,7 +137,6 @@ static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
                schedule_work(&lp->work);
                goto out;
        }
-       reactivate_fd(lp->fd, UM_ETH_IRQ);
 
 out:
        spin_unlock(&lp->lock);
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 40ca5cc..b0e9ff3 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -137,7 +137,6 @@ static void port_work_proc(struct work_struct *unused)
                if (!port->has_connection)
                        continue;
 
-               reactivate_fd(port->fd, ACCEPT_IRQ);
                while (port_accept(port))
                        ;
                port->has_connection = 0;
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index dd16c90..a392828 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -72,7 +72,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user 
*buf, size_t size,
                                return ret ? : -EAGAIN;
 
                        atomic_inc(&host_sleep_count);
-                       reactivate_fd(random_fd, RANDOM_IRQ);
                        add_sigio_fd(random_fd);
 
                        add_wait_queue(&host_read_wait, &wait);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index e8ab93c..731982c 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -466,7 +466,6 @@ static void ubd_handler(void)
                blk_end_request(req->req, 0, req->length);
                kfree(req);
        }
-       reactivate_fd(thread_fd, UBD_IRQ);
 
        list_for_each_safe(list, next_ele, &restart){
                ubd = container_of(list, struct ubd, restart);
diff --git a/arch/um/include/shared/irq_user.h 
b/arch/um/include/shared/irq_user.h
index df56330..0eca64c 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
@@ -9,16 +10,23 @@
 #include <sysdep/ptrace.h>
 
 struct irq_fd {
-       struct irq_fd *next;
-       void *id;
-       int fd;
-       int type;
-       int irq;
-       int events;
-       int current_events;
+        void *id;
+        int irq;
+        int events;
+};
+
+
+#define IRQ_READ  0
+#define IRQ_WRITE 1 
+#define IRQ_NONE 2
+#define MAX_IRQ_TYPE (IRQ_NONE + 1)
+
+struct irq_entry {
+        struct irq_entry *next;
+        int fd;
+       struct irq_fd * irq_array[MAX_IRQ_TYPE + 1];
 };
 
-enum { IRQ_READ, IRQ_WRITE };
 
 struct siginfo;
 extern void sigio_handler(int sig, struct siginfo *unused_si, struct 
uml_pt_regs *regs);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 21d704b..3fe1249 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2015 Thomas Meyer (tho...@m3y3r.de)
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
@@ -284,15 +285,17 @@ extern void halt_skas(void);
 extern void reboot_skas(void);
 
 /* irq.c */
-extern int os_waiting_for_events(struct irq_fd *active_fds);
-extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int 
size_tmpfds);
+
+extern int os_setup_epoll(int maxevents);
+extern int os_waiting_for_events_epoll(void *kernel_events, int maxevents);
+extern int os_add_epoll_fd (int events, int fd, void * data);
+extern int os_mod_epoll_fd (int events, int fd, void * data);
+extern int os_del_epoll_fd (int fd);
+
 extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
                struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2);
 extern void os_free_irq_later(struct irq_fd *active_fds,
                int irq, void *dev_id);
-extern int os_get_pollfd(int i);
-extern void os_set_pollfd(int i, int fd);
-extern void os_set_ioignore(void);
 
 /* sigio.c */
 extern int add_sigio_fd(int fd);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 23cb935..516b13b 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,4 +1,7 @@
 /*
+ * Copyright (C) 2015 Brocade Communications Ltd
+ *     Author: Anton Ivanov aivanov@{brocade.com,kot-begemot.co.uk}
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
@@ -18,6 +21,61 @@
 #include <os.h>
 
 /*
+*      We are on the "kernel side" so we cannot pick up the sys/epoll.h
+*      So we lift out of it the applicable key definitions.
+*/
+
+
+enum EPOLL_EVENTS
+  {
+       EPOLLIN = 0x001,
+#define EPOLLIN EPOLLIN
+       EPOLLPRI = 0x002,
+#define EPOLLPRI EPOLLPRI
+       EPOLLOUT = 0x004,
+#define EPOLLOUT EPOLLOUT
+       EPOLLRDNORM = 0x040,
+#define EPOLLRDNORM EPOLLRDNORM
+       EPOLLRDBAND = 0x080,
+#define EPOLLRDBAND EPOLLRDBAND
+       EPOLLWRNORM = 0x100,
+#define EPOLLWRNORM EPOLLWRNORM
+       EPOLLWRBAND = 0x200,
+#define EPOLLWRBAND EPOLLWRBAND
+       EPOLLMSG = 0x400,
+#define EPOLLMSG EPOLLMSG
+       EPOLLERR = 0x008,
+#define EPOLLERR EPOLLERR
+       EPOLLHUP = 0x010,
+#define EPOLLHUP EPOLLHUP
+       EPOLLRDHUP = 0x2000,
+#define EPOLLRDHUP EPOLLRDHUP
+       EPOLLONESHOT = (1 << 30),
+#define EPOLLONESHOT EPOLLONESHOT
+       EPOLLET = (1 << 31)
+#define EPOLLET EPOLLET
+  };
+
+
+typedef union epoll_data
+{
+       void *ptr;
+       int fd;
+       uint32_t u32;
+       uint64_t u64;
+} epoll_data_t;
+
+struct epoll_event
+{
+       uint32_t events;        /* Epoll events */
+       epoll_data_t data;      /* User data variable */
+} __attribute__ ((__packed__));
+
+#define MAX_EPOLL_EVENTS 16
+
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
+
+/*
  * This list is accessed under irq_lock, except in sigio_handler,
  * where it is safe from being modified.  IRQ handlers won't change it -
  * if an IRQ source has vanished, it will be freed by free_irqs just
@@ -25,44 +83,91 @@
  * list of irqs to free, with its own locking, coming back here to
  * remove list elements, taking the irq_lock to do so.
  */
-static struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
+static struct irq_entry *active_fds = NULL;
 
 extern void free_irqs(void);
 
+
+static DEFINE_SPINLOCK(irq_lock);
+
+
+/*
+ * Principles of Operation:
+ * Each Epoll structure contains a pointer pointing back to an array
+ * with irq entries for read, write and none and their matching event
+ * masks.
+ * This allows us to stop looking up "who talked"
+ * We no longer need to enable/disable any polls while we process them
+ * epoll will take care of that. The exemption to this (for now) are
+ * character devices because of their own internal buffering, which
+ * needs to be updated to leverage the new write IRQ semantics.
+ * We can now support both read and write IRQs and have separate IRQs
+ * for read and write ops.
+ */
+
+
 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs 
*regs)
 {
        struct irq_fd *irq_fd;
-       int n;
+       struct irq_entry *irq_entry;
+       unsigned long flags;
+
+       int n, i, j;
 
        while (1) {
-               n = os_waiting_for_events(active_fds);
-               if (n <= 0) {
-                       if (n == -EINTR)
-                               continue;
-                       else break;
-               }
 
-               for (irq_fd = active_fds; irq_fd != NULL;
-                    irq_fd = irq_fd->next) {
-                       if (irq_fd->current_events != 0) {
-                               irq_fd->current_events = 0;
-                               do_IRQ(irq_fd->irq, regs);
-                       }
+               spin_lock_irqsave(&irq_lock, flags);
+
+               n = os_waiting_for_events_epoll(
+                       &epoll_events, MAX_EPOLL_EVENTS
+               );
+
+
+               if (n <= 0) {
+                       if (n == -EINTR) { continue; }
+                       else { break; }
                }
+
+
+               for (i = 0; i < n ; i++) {
+                       /* start from the data ptr, walk the tree branch */
+                       irq_entry = (struct irq_entry *) 
epoll_events[i].data.ptr;
+                       for (j = 0; j < MAX_IRQ_TYPE ; j ++ ) {
+                               irq_fd = irq_entry->irq_array[j];
+                               if (irq_fd != NULL) {
+                                       if (epoll_events[i].events & 
irq_fd->events) {
+                                               do_IRQ(irq_fd->irq, regs);
+                                       }
+                               }
+                       }
+               }
+               spin_unlock_irqrestore(&irq_lock, flags);
        }
 
        free_irqs();
 }
 
-static DEFINE_SPINLOCK(irq_lock);
+static int update_events(struct irq_entry * irq_entry) {
+       int i;
+       int events = 0;
+       struct irq_fd * irq_fd;
+       for (i = 0; i < MAX_IRQ_TYPE ; i ++ ) {
+               irq_fd = irq_entry->irq_array[i];
+               if (irq_fd != NULL) {
+                       events = irq_fd->events | events;
+               }
+       }
+       /* os_add_epoll will call os_mod_epoll if this already exists */
+       return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+}
+
 
 static int activate_fd(int irq, int fd, int type, void *dev_id)
 {
-       struct pollfd *tmp_pfd;
-       struct irq_fd *new_fd, *irq_fd;
+       struct irq_fd *new_fd;
+       struct irq_entry * irq_entry;
        unsigned long flags;
-       int events, err, n;
+       int  i, err, events;
 
        err = os_set_fd_async(fd);
        if (err < 0)
@@ -74,186 +179,150 @@ static int activate_fd(int irq, int fd, int type, void 
*dev_id)
                goto out;
 
        if (type == IRQ_READ)
-               events = UM_POLLIN | UM_POLLPRI;
-       else events = UM_POLLOUT;
-       *new_fd = ((struct irq_fd) { .next              = NULL,
-                                    .id                = dev_id,
-                                    .fd                = fd,
-                                    .type              = type,
-                                    .irq               = irq,
-                                    .events            = events,
-                                    .current_events    = 0 } );
-
-       err = -EBUSY;
-       spin_lock_irqsave(&irq_lock, flags);
-       for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
-               if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
-                       printk(KERN_ERR "Registering fd %d twice\n", fd);
-                       printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
-                       printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
-                              dev_id);
-                       goto out_unlock;
-               }
-       }
-
+               events |= EPOLLIN | EPOLLPRI;
        if (type == IRQ_WRITE)
-               fd = -1;
+               events |= EPOLLOUT;
 
-       tmp_pfd = NULL;
-       n = 0;
+       *new_fd = ((struct irq_fd) {
+               .id             = dev_id,
+               .irq            = irq,
+               .events         = events
+       });
 
-       while (1) {
-               n = os_create_pollfd(fd, events, tmp_pfd, n);
-               if (n == 0)
-                       break;
+       err = -EBUSY;
 
-               /*
-                * n > 0
-                * It means we couldn't put new pollfd to current pollfds
-                * and tmp_fds is NULL or too small for new pollfds array.
-                * Needed size is equal to n as minimum.
-                *
-                * Here we have to drop the lock in order to call
-                * kmalloc, which might sleep.
-                * If something else came in and changed the pollfds array
-                * so we will not be able to put new pollfd struct to pollfds
-                * then we free the buffer tmp_fds and try again.
-                */
-               spin_unlock_irqrestore(&irq_lock, flags);
-               kfree(tmp_pfd);
+       spin_lock_irqsave(&irq_lock, flags);
 
-               tmp_pfd = kmalloc(n, GFP_KERNEL);
-               if (tmp_pfd == NULL)
-                       goto out_kfree;
+       for (irq_entry = active_fds; irq_entry != NULL; irq_entry = 
irq_entry->next) {
+               if (irq_entry->fd == fd) break;
+       }
 
-               spin_lock_irqsave(&irq_lock, flags);
+       if (irq_entry == NULL) {
+               irq_entry = kmalloc(sizeof(struct irq_entry), GFP_KERNEL);
+               if (irq_entry == NULL) {
+                       printk(KERN_ERR
+                               "Failed to allocate new IRQ entry\n");
+                       kfree(new_fd);
+                       goto out;
+               }
+               irq_entry->fd = fd;
+               for (i = 0; i < MAX_IRQ_TYPE; i++) {
+                       irq_entry->irq_array[i] = NULL;
+               }
+               irq_entry->next = active_fds;
+               active_fds = irq_entry;
        }
 
-       *last_irq_ptr = new_fd;
-       last_irq_ptr = &new_fd->next;
+       if (irq_entry->irq_array[type] != NULL) {
+               printk(KERN_ERR
+                       "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
+                       irq, fd, type, dev_id
+               );
+               goto out_unlock;
+       } else {
+               irq_entry->irq_array[type] = new_fd;
+       }
 
+       update_events(irq_entry);
+       
        spin_unlock_irqrestore(&irq_lock, flags);
 
-       /*
-        * This calls activate_fd, so it has to be outside the critical
-        * section.
-        */
-       maybe_sigio_broken(fd, (type == IRQ_READ));
+       maybe_sigio_broken(fd, (type != IRQ_NONE));
 
        return 0;
 
  out_unlock:
        spin_unlock_irqrestore(&irq_lock, flags);
- out_kfree:
        kfree(new_fd);
  out:
        return err;
 }
 
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&irq_lock, flags);
-       os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
-       spin_unlock_irqrestore(&irq_lock, flags);
-}
-
-struct irq_and_dev {
-       int irq;
-       void *dev;
-};
 
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
+static void do_free_by_irq_and_dev(
+       struct irq_entry* irq_entry,
+       unsigned int irq,
+       void * dev
+)
 {
-       struct irq_and_dev *data = d;
-
-       return ((irq->irq == data->irq) && (irq->id == data->dev));
+       int i;
+       struct irq_fd * to_free;
+       for (i = 0; i < MAX_IRQ_TYPE ; i ++ ) {
+               if (irq_entry->irq_array[i] != NULL) {
+                       if (
+                               (irq_entry->irq_array[i]->irq == irq) &&
+                               (irq_entry->irq_array[i]->id == dev)
+                       ) {
+                               to_free = irq_entry->irq_array[i];
+                               irq_entry->irq_array[i] = NULL;
+                               update_events(irq_entry);
+                               kfree(to_free);
+                       }
+               }
+       }
 }
 
-static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
-{
-       struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
-                                                         .dev  = dev });
+void free_irq_by_fd(int fd) {
 
-       free_irq_by_cb(same_irq_and_dev, &data);
-}
+       struct irq_entry *irq_entry, *prev = NULL;
+       unsigned long flags;
+       int i;
 
-static int same_fd(struct irq_fd *irq, void *fd)
-{
-       return (irq->fd == *((int *)fd));
+       spin_lock_irqsave(&irq_lock, flags);
+       for (irq_entry = active_fds; irq_entry != NULL; irq_entry = 
irq_entry->next) {
+               if (irq_entry->fd == irq_entry->fd) {
+                       os_del_epoll_fd(fd);   /* ignore err, just do it */
+                       for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+                               if (irq_entry->irq_array[i] != NULL) {
+                                       kfree(irq_entry->irq_array[i]);
+                               }
+                       }
+                       if (prev == NULL) {
+                               active_fds = irq_entry->next;
+                       } else {
+                               prev->next = irq_entry->next;
+                       }
+                       kfree(irq_entry);
+               } else {
+                       prev = irq_entry;
+               }
+       }
+       spin_unlock_irqrestore(&irq_lock, flags);
+       
 }
 
-void free_irq_by_fd(int fd)
-{
-       free_irq_by_cb(same_fd, &fd);
-}
 
-/* Must be called with irq_lock held */
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
-{
-       struct irq_fd *irq;
-       int i = 0;
-       int fdi;
-
-       for (irq = active_fds; irq != NULL; irq = irq->next) {
-               if ((irq->fd == fd) && (irq->irq == irqnum))
-                       break;
-               i++;
-       }
-       if (irq == NULL) {
-               printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
-                      fd);
-               goto out;
-       }
-       fdi = os_get_pollfd(i);
-       if ((fdi != -1) && (fdi != fd)) {
-               printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
-                      "and pollfds, fd %d vs %d, need %d\n", irq->fd,
-                      fdi, fd);
-               irq = NULL;
-               goto out;
-       }
-       *index_out = i;
- out:
-       return irq;
-}
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) {
 
-void reactivate_fd(int fd, int irqnum)
-{
-       struct irq_fd *irq;
+       struct irq_entry *irq_entry;
        unsigned long flags;
-       int i;
 
        spin_lock_irqsave(&irq_lock, flags);
-       irq = find_irq_by_fd(fd, irqnum, &i);
-       if (irq == NULL) {
-               spin_unlock_irqrestore(&irq_lock, flags);
-               return;
+       for (irq_entry = active_fds; irq_entry != NULL; irq_entry = 
irq_entry->next) {
+               do_free_by_irq_and_dev(irq_entry, irq, dev);
        }
-       os_set_pollfd(i, irq->fd);
        spin_unlock_irqrestore(&irq_lock, flags);
-
-       add_sigio_fd(fd);
+       
 }
 
-void deactivate_fd(int fd, int irqnum)
+
+void reactivate_fd(int fd, int irqnum)
 {
-       struct irq_fd *irq;
+       struct irq_entry *irq_entry;
        unsigned long flags;
-       int i;
-
        spin_lock_irqsave(&irq_lock, flags);
-       irq = find_irq_by_fd(fd, irqnum, &i);
-       if (irq == NULL) {
-               spin_unlock_irqrestore(&irq_lock, flags);
-               return;
+       for (irq_entry = active_fds; irq_entry != NULL; irq_entry = 
irq_entry->next) {
+               if (irq_entry->fd == fd) {
+                       update_events(irq_entry);
+               }
        }
-
-       os_set_pollfd(i, -1);
        spin_unlock_irqrestore(&irq_lock, flags);
+       
+}
 
-       ignore_sigio_fd(fd);
+void deactivate_fd(int fd, int irqnum)
+{
+       os_del_epoll_fd(fd);   /* ignore err, just do it */
 }
 EXPORT_SYMBOL(deactivate_fd);
 
@@ -265,17 +334,16 @@ EXPORT_SYMBOL(deactivate_fd);
  */
 int deactivate_all_fds(void)
 {
-       struct irq_fd *irq;
+       struct irq_entry * irq_entry;
        int err;
 
-       for (irq = active_fds; irq != NULL; irq = irq->next) {
-               err = os_clear_fd_async(irq->fd);
-               if (err)
-                       return err;
+       for (irq_entry = active_fds; irq_entry != NULL; irq_entry = 
irq_entry->next) {
+               os_del_epoll_fd(irq_entry->fd);   /* ignore err, just do it */
+               err = os_clear_fd_async(irq_entry->fd);
+               if (err) {
+                       printk(KERN_ERR "Clear FD async failed with %d", err);
+               }
        }
-       /* If there is a signal already queued, after unblocking ignore it */
-       os_set_ioignore();
-
        return 0;
 }
 
@@ -308,13 +376,13 @@ int um_request_irq(unsigned int irq, int fd, int type,
 {
        int err;
 
-       if (fd != -1) {
+       err = request_irq(irq, handler, irqflags, devname, dev_id);
+
+       if ((!err) && (fd != -1)) {
                err = activate_fd(irq, fd, type, dev_id);
-               if (err)
-                       return err;
        }
 
-       return request_irq(irq, handler, irqflags, devname, dev_id);
+       return err;
 }
 
 EXPORT_SYMBOL(um_request_irq);
@@ -352,9 +420,9 @@ void __init init_IRQ(void)
        int i;
 
        irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, 
handle_edge_irq);
-
-       for (i = 1; i < NR_IRQS; i++)
+       for (i = 1; i < NR_IRQS - 1 ; i++)
                irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+       os_setup_epoll(MAX_EPOLL_EVENTS);
 }
 
 /*
@@ -382,11 +450,11 @@ void __init init_IRQ(void)
  * thread_info.
  *
  * There are three cases -
- *     The first interrupt on the stack - sets up the thread_info and
+ *      The first interrupt on the stack - sets up the thread_info and
  * handles the interrupt
- *     A nested interrupt interrupting the copying of the thread_info -
+ *      A nested interrupt interrupting the copying of the thread_info -
  * can't handle the interrupt, as the stack is in an unknown state
- *     A nested interrupt not interrupting the copying of the
+ *      A nested interrupt not interrupting the copying of the
  * thread_info - doesn't do any setup, just handles the interrupt
  *
  * The first job is to figure out whether we interrupted stack setup.
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index b9afb74..837aa68 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2012 - 2014 Cisco Systems
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
@@ -6,6 +7,7 @@
 #include <stdlib.h>
 #include <errno.h>
 #include <poll.h>
+#include <sys/epoll.h>
 #include <signal.h>
 #include <string.h>
 #include <irq_user.h>
@@ -16,117 +18,80 @@
  * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
  * and os_free_irq_by_cb, which are called under irq_lock.
  */
-static struct pollfd *pollfds = NULL;
-static int pollfds_num = 0;
-static int pollfds_size = 0;
 
-int os_waiting_for_events(struct irq_fd *active_fds)
+/* epoll support */
+
+
+static int epollfd = -1;
+
+int os_setup_epoll(int maxevents) {
+       epollfd = epoll_create(maxevents);
+       return epollfd;
+}
+
+int os_waiting_for_events_epoll(void *kernel_events, int maxevents)
 {
-       struct irq_fd *irq_fd;
-       int i, n, err;
+       int n, err;
 
-       n = poll(pollfds, pollfds_num, 0);
+       n = epoll_wait(epollfd,
+               (struct epoll_event *) kernel_events, maxevents, 0);
        if (n < 0) {
                err = -errno;
                if (errno != EINTR)
-                       printk(UM_KERN_ERR "os_waiting_for_events:"
-                              " poll returned %d, errno = %d\n", n, errno);
+                       printk(
+                               UM_KERN_ERR "os_waiting_for_events:"
+                               " poll returned %d, error = %s\n", n,
+                               strerror(errno)
+                       );
                return err;
        }
 
-       if (n == 0)
-               return 0;
-
-       irq_fd = active_fds;
-
-       for (i = 0; i < pollfds_num; i++) {
-               if (pollfds[i].revents != 0) {
-                       irq_fd->current_events = pollfds[i].revents;
-                       pollfds[i].fd = -1;
-               }
-               irq_fd = irq_fd->next;
-       }
        return n;
 }
 
-int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
-{
-       if (pollfds_num == pollfds_size) {
-               if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
-                       /* return min size needed for new pollfds area */
-                       return (pollfds_size + 1) * sizeof(pollfds[0]);
-               }
-
-               if (pollfds != NULL) {
-                       memcpy(tmp_pfd, pollfds,
-                              sizeof(pollfds[0]) * pollfds_size);
-                       /* remove old pollfds */
-                       kfree(pollfds);
-               }
-               pollfds = tmp_pfd;
-               pollfds_size++;
-       } else
-               kfree(tmp_pfd); /* remove not used tmp_pfd */
+int os_add_epoll_fd (int events, int fd, void * data) {
+       struct epoll_event event;
+       int result;
 
-       pollfds[pollfds_num] = ((struct pollfd) { .fd           = fd,
-                                                 .events       = events,
-                                                 .revents      = 0 });
-       pollfds_num++;
-
-       return 0;
+       event.data.ptr = data;
+       event.events = events | EPOLLET;
+       result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+       if ((result) && (errno == EEXIST)) {
+               result = os_mod_epoll_fd (events, fd, data);
+       }
+       if (result) {
+               printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
+       }
+       return result;
 }
 
-void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-               struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
-{
-       struct irq_fd **prev;
-       int i = 0;
-
-       prev = &active_fds;
-       while (*prev != NULL) {
-               if ((*test)(*prev, arg)) {
-                       struct irq_fd *old_fd = *prev;
-                       if ((pollfds[i].fd != -1) &&
-                           (pollfds[i].fd != (*prev)->fd)) {
-                               printk(UM_KERN_ERR "os_free_irq_by_cb - "
-                                      "mismatch between active_fds and "
-                                      "pollfds, fd %d vs %d\n",
-                                      (*prev)->fd, pollfds[i].fd);
-                               goto out;
-                       }
-
-                       pollfds_num--;
-
-                       /*
-                        * This moves the *whole* array after pollfds[i]
-                        * (though it doesn't spot as such)!
-                        */
-                       memmove(&pollfds[i], &pollfds[i + 1],
-                              (pollfds_num - i) * sizeof(pollfds[0]));
-                       if (*last_irq_ptr2 == &old_fd->next)
-                               *last_irq_ptr2 = prev;
-
-                       *prev = (*prev)->next;
-                       if (old_fd->type == IRQ_WRITE)
-                               ignore_sigio_fd(old_fd->fd);
-                       kfree(old_fd);
-                       continue;
-               }
-               prev = &(*prev)->next;
-               i++;
+int os_mod_epoll_fd (int events, int fd, void * data) {
+       struct epoll_event event;
+       int result;
+       event.data.ptr = data;
+       event.events = events;
+       result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
+       if (result) {
+               printk("epollctl mod err fd %d, %s\n", fd, strerror(errno));
        }
- out:
-       return;
+       return result;
 }
 
-int os_get_pollfd(int i)
-{
-       return pollfds[i].fd;
+int os_del_epoll_fd (int fd) {
+       struct epoll_event event;
+       int result;
+       result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
+       if (result) {
+               printk("epollctl del err %s\n", strerror(errno));
+       }
+       return result;
 }
 
-void os_set_pollfd(int i, int fd)
+void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
+               struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
 {
-       pollfds[i].fd = fd;
+       printk("Someone invoking obsolete deactivate_by_CB!!!\n");
+       return;
 }
 
 void os_set_ioignore(void)
-- 
2.1.4


------------------------------------------------------------------------------
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel

Reply via email to