I'd like to break the comm code out into per-commtype files; much like
squid-3.

Here's my first cut. They compile and run; I haven't done much
testing on them. There's plenty more tidying up which can be done
after this is in - mostly involving tidying up commSetSelect() and
making the commDeferFD/commResumeFD calls part of the net API rather
than just for epoll.

I'd like to get this into squid-2.6 now so I or someone else can bring
across kqueue/solaris epoll before 2.6 is released.

I also discovered some recent change to the select() code stopped it
compiling; I've fixed it with what a cursory glance shows should be
right. I'd appreciate someone verifying this.




Adrian


Index: src/comm_poll.c
===================================================================
RCS file: src/comm_poll.c
diff -N src/comm_poll.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ src/comm_poll.c     24 May 2006 14:31:51 -0000
@@ -0,0 +1,689 @@
+
+/*
+ * $Id: comm_select.c,v 1.66 2006/05/22 19:01:32 serassio Exp $
+ *
+ * DEBUG: section 5     Socket Functions
+ *
+ * SQUID Web Proxy Cache          http://www.squid-cache.org/
+ * ----------------------------------------------------------
+ *
+ *  Squid is the result of efforts by numerous individuals from
+ *  the Internet community; see the CONTRIBUTORS file for full
+ *  details.   Many organizations have provided support for Squid's
+ *  development; see the SPONSORS file for full details.  Squid is
+ *  Copyrighted (C) 2001 by the Regents of the University of
+ *  California; see the COPYRIGHT file for full details.  Squid
+ *  incorporates software developed and/or copyrighted by other
+ *  sources; see the CREDITS file for full details.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "squid.h"
+
+#if USE_POLL
+
+static int MAX_POLL_TIME = 1000;       /* see also comm_quick_poll_required() 
*/
+
+
+#ifndef        howmany
+#define howmany(x, y)   (((x)+((y)-1))/(y))
+#endif
+#ifndef        NBBY
+#define        NBBY    8
+#endif
+#define FD_MASK_BYTES sizeof(fd_mask)
+#define FD_MASK_BITS (FD_MASK_BYTES*NBBY)
+
+/* STATIC */
+static int fdIsHttp(int fd);
+static int fdIsIcp(int fd);
+static int fdIsDns(int fd);
+static int commDeferRead(int fd);
+static void checkTimeouts(void);
+static OBJH commIncomingStats;
+static int comm_check_incoming_poll_handlers(int nfds, int *fds);
+static void comm_poll_dns_incoming(void);
+
+static fd_set global_readfds;
+static fd_set global_writefds;
+static int nreadfds;
+static int nwritefds;
+
+/*
+ * Automatic tuning for incoming requests:
+ *
+ * INCOMING sockets are the ICP and HTTP ports.  We need to check these
+ * fairly regularly, but how often?  When the load increases, we
+ * want to check the incoming sockets more often.  If we have a lot
+ * of incoming ICP, then we need to check these sockets more than
+ * if we just have HTTP.
+ *
+ * The variables 'incoming_icp_interval' and 'incoming_http_interval' 
+ * determine how many normal I/O events to process before checking
+ * incoming sockets again.  Note we store the incoming_interval
+ * multipled by a factor of (2^INCOMING_FACTOR) to have some
+ * pseudo-floating point precision.
+ *
+ * The variable 'icp_io_events' and 'http_io_events' counts how many normal
+ * I/O events have been processed since the last check on the incoming
+ * sockets.  When io_events > incoming_interval, its time to check incoming
+ * sockets.
+ *
+ * Every time we check incoming sockets, we count how many new messages
+ * or connections were processed.  This is used to adjust the
+ * incoming_interval for the next iteration.  The new incoming_interval
+ * is calculated as the current incoming_interval plus what we would
+ * like to see as an average number of events minus the number of
+ * events just processed.
+ *
+ *  incoming_interval = incoming_interval + target_average - 
number_of_events_processed
+ *
+ * There are separate incoming_interval counters for both HTTP and ICP events
+ * 
+ * You can see the current values of the incoming_interval's, as well as
+ * a histogram of 'incoming_events' by asking the cache manager
+ * for 'comm_incoming', e.g.:
+ *
+ *      % ./client mgr:comm_incoming
+ *
+ * Caveats:
+ *
+ *      - We have MAX_INCOMING_INTEGER as a magic upper limit on
+ *        incoming_interval for both types of sockets.  At the
+ *        largest value the cache will effectively be idling.
+ *
+ *      - The higher the INCOMING_FACTOR, the slower the algorithm will
+ *        respond to load spikes/increases/decreases in demand. A value
+ *        between 3 and 8 is recommended.
+ */
+
+#define MAX_INCOMING_INTEGER 256
+#define INCOMING_FACTOR 5
+#define MAX_INCOMING_INTERVAL (MAX_INCOMING_INTEGER << INCOMING_FACTOR)
+static int icp_io_events = 0;
+static int dns_io_events = 0;
+static int http_io_events = 0;
+static int incoming_icp_interval = 16 << INCOMING_FACTOR;
+static int incoming_dns_interval = 16 << INCOMING_FACTOR;
+static int incoming_http_interval = 16 << INCOMING_FACTOR;
+#define commCheckICPIncoming (++icp_io_events > (incoming_icp_interval>> 
INCOMING_FACTOR))
+#define commCheckDNSIncoming (++dns_io_events > (incoming_dns_interval>> 
INCOMING_FACTOR))
+#define commCheckHTTPIncoming (++http_io_events > (incoming_http_interval>> 
INCOMING_FACTOR))
+
+static int
+fdIsIcp(int fd)
+{
+    if (fd == theInIcpConnection)
+       return 1;
+    if (fd == theOutIcpConnection)
+       return 1;
+    return 0;
+}
+
+static int
+fdIsDns(int fd)
+{
+    if (fd == DnsSocket)
+       return 1;
+    return 0;
+}
+
+static int
+fdIsHttp(int fd)
+{
+    int j;
+    for (j = 0; j < NHttpSockets; j++) {
+       if (fd == HttpSockets[j])
+           return 1;
+    }
+    return 0;
+}
+
+#if DELAY_POOLS
+static int slowfdcnt = 0;
+static int slowfdarr[SQUID_MAXFD];
+
+static void
+commAddSlowFd(int fd)
+{
+    assert(slowfdcnt < SQUID_MAXFD);
+    slowfdarr[slowfdcnt++] = fd;
+}
+
+static int
+commGetSlowFd(void)
+{
+    int whichfd, retfd;
+
+    if (!slowfdcnt)
+       return -1;
+    whichfd = squid_random() % slowfdcnt;
+    retfd = slowfdarr[whichfd];
+    slowfdarr[whichfd] = slowfdarr[--slowfdcnt];
+    return retfd;
+}
+#endif
+
+static int
+comm_check_incoming_poll_handlers(int nfds, int *fds)
+{
+    int i;
+    int fd;
+    PF *hdl = NULL;
+    int npfds;
+    struct pollfd pfds[3 + MAXHTTPPORTS];
+    incoming_sockets_accepted = 0;
+    for (i = npfds = 0; i < nfds; i++) {
+       int events;
+       fd = fds[i];
+       events = 0;
+       if (fd_table[fd].read_handler)
+           events |= POLLRDNORM;
+       if (fd_table[fd].write_handler)
+           events |= POLLWRNORM;
+       if (events) {
+           pfds[npfds].fd = fd;
+           pfds[npfds].events = events;
+           pfds[npfds].revents = 0;
+           npfds++;
+       }
+    }
+    if (!nfds)
+       return -1;
+#if !ALARM_UPDATES_TIME
+    getCurrentTime();
+#endif
+    statCounter.syscalls.polls++;
+    if (poll(pfds, npfds, 0) < 1)
+       return incoming_sockets_accepted;
+    for (i = 0; i < npfds; i++) {
+       int revents;
+       if (((revents = pfds[i].revents) == 0) || ((fd = pfds[i].fd) == -1))
+           continue;
+       if (revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) {
+           if ((hdl = fd_table[fd].read_handler)) {
+               fd_table[fd].read_handler = NULL;
+               hdl(fd, fd_table[fd].read_data);
+           } else if (pfds[i].events & POLLRDNORM)
+               debug(5, 1) ("comm_poll_incoming: FD %d NULL read handler\n",
+                   fd);
+       }
+       if (revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) {
+           if ((hdl = fd_table[fd].write_handler)) {
+               fd_table[fd].write_handler = NULL;
+               hdl(fd, fd_table[fd].write_data);
+           } else if (pfds[i].events & POLLWRNORM)
+               debug(5, 1) ("comm_poll_incoming: FD %d NULL write_handler\n",
+                   fd);
+       }
+    }
+    return incoming_sockets_accepted;
+}
+
+static void
+comm_poll_icp_incoming(void)
+{
+    int nfds = 0;
+    int fds[2];
+    int nevents;
+    icp_io_events = 0;
+    if (theInIcpConnection >= 0)
+       fds[nfds++] = theInIcpConnection;
+    if (theInIcpConnection != theOutIcpConnection)
+       if (theOutIcpConnection >= 0)
+           fds[nfds++] = theOutIcpConnection;
+    if (nfds == 0)
+       return;
+    nevents = comm_check_incoming_poll_handlers(nfds, fds);
+    incoming_icp_interval += Config.comm_incoming.icp_average - nevents;
+    if (incoming_icp_interval < Config.comm_incoming.icp_min_poll)
+       incoming_icp_interval = Config.comm_incoming.icp_min_poll;
+    if (incoming_icp_interval > MAX_INCOMING_INTERVAL)
+       incoming_icp_interval = MAX_INCOMING_INTERVAL;
+    if (nevents > INCOMING_ICP_MAX)
+       nevents = INCOMING_ICP_MAX;
+    statHistCount(&statCounter.comm_icp_incoming, nevents);
+}
+
+static void
+comm_poll_http_incoming(void)
+{
+    int nfds = 0;
+    int fds[MAXHTTPPORTS];
+    int j;
+    int nevents;
+    http_io_events = 0;
+    for (j = 0; j < NHttpSockets; j++) {
+       if (HttpSockets[j] < 0)
+           continue;
+       if (commDeferRead(HttpSockets[j]))
+           continue;
+       fds[nfds++] = HttpSockets[j];
+    }
+    nevents = comm_check_incoming_poll_handlers(nfds, fds);
+    incoming_http_interval = incoming_http_interval
+       + Config.comm_incoming.http_average - nevents;
+    if (incoming_http_interval < Config.comm_incoming.http_min_poll)
+       incoming_http_interval = Config.comm_incoming.http_min_poll;
+    if (incoming_http_interval > MAX_INCOMING_INTERVAL)
+       incoming_http_interval = MAX_INCOMING_INTERVAL;
+    if (nevents > INCOMING_HTTP_MAX)
+       nevents = INCOMING_HTTP_MAX;
+    statHistCount(&statCounter.comm_http_incoming, nevents);
+}
+
+/* poll all sockets; call handlers for those that are ready. */
+int
+comm_poll(int msec)
+{
+    struct pollfd pfds[SQUID_MAXFD];
+#if DELAY_POOLS
+    fd_set slowfds;
+#endif
+    int fd;
+    unsigned int i;
+    unsigned int maxfd;
+    unsigned int nfds;
+    unsigned int npending;
+    int num;
+    int callicp = 0, callhttp = 0;
+    int calldns = 0;
+    static time_t last_timeout = 0;
+    double timeout = current_dtime + (msec / 1000.0);
+    do {
+#if !ALARM_UPDATES_TIME
+       double start;
+       getCurrentTime();
+       start = current_dtime;
+#endif
+       /* Handle any fs callbacks that need doing */
+       storeDirCallback();
+#if DELAY_POOLS
+       FD_ZERO(&slowfds);
+#endif
+       if (commCheckICPIncoming)
+           comm_poll_icp_incoming();
+       if (commCheckDNSIncoming)
+           comm_poll_dns_incoming();
+       if (commCheckHTTPIncoming)
+           comm_poll_http_incoming();
+       callicp = calldns = callhttp = 0;
+       nfds = 0;
+       npending = 0;
+       maxfd = Biggest_FD + 1;
+       for (i = 0; i < maxfd; i++) {
+           int events;
+           events = 0;
+           /* Check each open socket for a handler. */
+           if (fd_table[i].read_handler) {
+               int dopoll = 1;
+               switch (commDeferRead(i)) {
+               case 0:
+                   break;
+               case 1:
+                   dopoll = 0;
+                   break;
+#if DELAY_POOLS
+               case -1:
+                   FD_SET(i, &slowfds);
+                   break;
+#endif
+               default:
+                   fatalf("bad return value from commDeferRead(FD %d)\n", i);
+                   /* NOTREACHED */
+               }
+               if (dopoll) {
+                   switch (fd_table[i].read_pending) {
+                   case COMM_PENDING_NORMAL:
+                       events |= POLLRDNORM;
+                       break;
+                   case COMM_PENDING_WANTS_WRITE:
+                       events |= POLLWRNORM;
+                       break;
+                   case COMM_PENDING_WANTS_READ:
+                       events |= POLLRDNORM;
+                       break;
+                   case COMM_PENDING_NOW:
+                       events |= POLLRDNORM;
+                       npending++;
+                       break;
+                   }
+               }
+           }
+           if (fd_table[i].write_handler) {
+               switch (fd_table[i].write_pending) {
+               case COMM_PENDING_NORMAL:
+                   events |= POLLWRNORM;
+                   break;
+               case COMM_PENDING_WANTS_WRITE:
+                   events |= POLLWRNORM;
+                   break;
+               case COMM_PENDING_WANTS_READ:
+                   events |= POLLRDNORM;
+                   break;
+               case COMM_PENDING_NOW:
+                   events |= POLLWRNORM;
+                   npending++;
+                   break;
+               }
+           }
+           if (events) {
+               pfds[nfds].fd = i;
+               pfds[nfds].events = events;
+               pfds[nfds].revents = 0;
+               nfds++;
+           }
+       }
+       if (nfds == 0) {
+           assert(shutting_down);
+           return COMM_SHUTDOWN;
+       }
+       if (npending)
+           msec = 0;
+       if (msec > MAX_POLL_TIME)
+           msec = MAX_POLL_TIME;
+       statCounter.syscalls.polls++;
+       num = poll(pfds, nfds, msec);
+       statCounter.select_loops++;
+       if (num < 0 && !ignoreErrno(errno)) {
+           debug(5, 0) ("comm_poll: poll failure: %s\n", xstrerror());
+           assert(errno != EINVAL);
+           return COMM_ERROR;
+           /* NOTREACHED */
+       }
+       debug(5, num ? 5 : 8) ("comm_poll: %d+%u FDs ready\n", num, npending);
+       statHistCount(&statCounter.select_fds_hist, num);
+       /* Check timeout handlers ONCE each second. */
+       if (squid_curtime > last_timeout) {
+           last_timeout = squid_curtime;
+           checkTimeouts();
+       }
+       if (num <= 0 && npending == 0)
+           continue;
+       /* scan each socket but the accept socket. Poll this 
+        * more frequently to minimize losses due to the 5 connect 
+        * limit in SunOS */
+       for (i = 0; i < nfds; i++) {
+           fde *F;
+           int revents = pfds[i].revents;
+           fd = pfds[i].fd;
+           if (fd == -1)
+               continue;
+           switch (fd_table[fd].read_pending) {
+           case COMM_PENDING_NORMAL:
+           case COMM_PENDING_WANTS_READ:
+               break;
+           case COMM_PENDING_WANTS_WRITE:
+               if (pfds[i].revents & (POLLOUT | POLLWRNORM))
+                   revents |= POLLIN;
+               break;
+           case COMM_PENDING_NOW:
+               revents |= POLLIN;
+               break;
+           }
+           switch (fd_table[fd].write_pending) {
+           case COMM_PENDING_NORMAL:
+           case COMM_PENDING_WANTS_WRITE:
+               break;
+           case COMM_PENDING_WANTS_READ:
+               if (pfds[i].revents & (POLLIN | POLLRDNORM))
+                   revents |= POLLOUT;
+               break;
+           case COMM_PENDING_NOW:
+               revents |= POLLOUT;
+               break;
+           }
+           if (revents == 0)
+               continue;
+           if (fdIsIcp(fd)) {
+               callicp = 1;
+               continue;
+           }
+           if (fdIsDns(fd)) {
+               calldns = 1;
+               continue;
+           }
+           if (fdIsHttp(fd)) {
+               callhttp = 1;
+               continue;
+           }
+           F = &fd_table[fd];
+           if (revents & (POLLRDNORM | POLLIN | POLLHUP | POLLERR)) {
+               PF *hdl = F->read_handler;
+               debug(5, 6) ("comm_poll: FD %d ready for reading\n", fd);
+               if (hdl == NULL)
+                   (void) 0;   /* Nothing to do */
+#if DELAY_POOLS
+               else if (FD_ISSET(fd, &slowfds))
+                   commAddSlowFd(fd);
+#endif
+               else {
+                   F->read_handler = NULL;
+                   F->read_pending = COMM_PENDING_NORMAL;
+                   hdl(fd, F->read_data);
+                   statCounter.select_fds++;
+                   if (commCheckICPIncoming)
+                       comm_poll_icp_incoming();
+                   if (commCheckDNSIncoming)
+                       comm_poll_dns_incoming();
+                   if (commCheckHTTPIncoming)
+                       comm_poll_http_incoming();
+               }
+           }
+           if (revents & (POLLWRNORM | POLLOUT | POLLHUP | POLLERR)) {
+               PF *hdl = F->write_handler;
+               debug(5, 5) ("comm_poll: FD %d ready for writing\n", fd);
+               if (hdl != NULL) {
+                   F->write_handler = NULL;
+                   F->write_pending = COMM_PENDING_NORMAL;
+                   hdl(fd, F->write_data);
+                   statCounter.select_fds++;
+                   if (commCheckICPIncoming)
+                       comm_poll_icp_incoming();
+                   if (commCheckDNSIncoming)
+                       comm_poll_dns_incoming();
+                   if (commCheckHTTPIncoming)
+                       comm_poll_http_incoming();
+               }
+           }
+           if (revents & POLLNVAL) {
+               close_handler *ch;
+               debug(5, 0) ("WARNING: FD %d has handlers, but it's 
invalid.\n", fd);
+               debug(5, 0) ("FD %d is a %s\n", fd, fdTypeStr[F->type]);
+               debug(5, 0) ("--> %s\n", F->desc);
+               debug(5, 0) ("tmout:%p read:%p write:%p\n",
+                   F->timeout_handler,
+                   F->read_handler,
+                   F->write_handler);
+               for (ch = F->close_handler; ch; ch = ch->next)
+                   debug(5, 0) (" close handler: %p\n", ch->handler);
+               if (F->close_handler) {
+                   commCallCloseHandlers(fd);
+               } else if (F->timeout_handler) {
+                   debug(5, 0) ("comm_poll: Calling Timeout Handler\n");
+                   F->timeout_handler(fd, F->timeout_data);
+               }
+               F->close_handler = NULL;
+               F->timeout_handler = NULL;
+               F->read_handler = NULL;
+               F->write_handler = NULL;
+               if (F->flags.open)
+                   fd_close(fd);
+           }
+       }
+       if (callicp)
+           comm_poll_icp_incoming();
+       if (calldns)
+           comm_poll_dns_incoming();
+       if (callhttp)
+           comm_poll_http_incoming();
+#if DELAY_POOLS
+       while ((fd = commGetSlowFd()) != -1) {
+           fde *F = &fd_table[fd];
+           PF *hdl = F->read_handler;
+           debug(5, 6) ("comm_select: slow FD %d selected for reading\n", fd);
+           if (hdl != NULL) {
+               F->read_handler = NULL;
+               F->read_pending = COMM_PENDING_NORMAL;
+               hdl(fd, F->read_data);
+               statCounter.select_fds++;
+               if (commCheckICPIncoming)
+                   comm_poll_icp_incoming();
+               if (commCheckDNSIncoming)
+                   comm_poll_dns_incoming();
+               if (commCheckHTTPIncoming)
+                   comm_poll_http_incoming();
+           }
+       }
+#endif
+#if !ALARM_UPDATES_TIME
+       getCurrentTime();
+       statCounter.select_time += (current_dtime - start);
+#endif
+       return COMM_OK;
+    }
+    while (timeout > current_dtime);
+    debug(5, 8) ("comm_poll: time out: %ld.\n", (long int) squid_curtime);
+    return COMM_TIMEOUT;
+}
+
+
+static void
+comm_poll_dns_incoming(void)
+{
+    int nfds = 0;
+    int fds[2];
+    int nevents;
+    dns_io_events = 0;
+    if (DnsSocket < 0)
+       return;
+    fds[nfds++] = DnsSocket;
+    nevents = comm_check_incoming_poll_handlers(nfds, fds);
+    if (nevents < 0)
+       return;
+    incoming_dns_interval += Config.comm_incoming.dns_average - nevents;
+    if (incoming_dns_interval < Config.comm_incoming.dns_min_poll)
+       incoming_dns_interval = Config.comm_incoming.dns_min_poll;
+    if (incoming_dns_interval > MAX_INCOMING_INTERVAL)
+       incoming_dns_interval = MAX_INCOMING_INTERVAL;
+    if (nevents > INCOMING_DNS_MAX)
+       nevents = INCOMING_DNS_MAX;
+    statHistCount(&statCounter.comm_dns_incoming, nevents);
+}
+
+void
+comm_select_init(void)
+{
+    cachemgrRegister("comm_incoming",
+       "comm_incoming() stats",
+       commIncomingStats, 0, 1);
+    FD_ZERO(&global_readfds);
+    FD_ZERO(&global_writefds);
+    nreadfds = nwritefds = 0;
+}
+
+
+static void
+commIncomingStats(StoreEntry * sentry)
+{
+    StatCounters *f = &statCounter;
+    storeAppendPrintf(sentry, "Current incoming_icp_interval: %d\n",
+       incoming_icp_interval >> INCOMING_FACTOR);
+    storeAppendPrintf(sentry, "Current incoming_dns_interval: %d\n",
+       incoming_dns_interval >> INCOMING_FACTOR);
+    storeAppendPrintf(sentry, "Current incoming_http_interval: %d\n",
+       incoming_http_interval >> INCOMING_FACTOR);
+    storeAppendPrintf(sentry, "\n");
+    storeAppendPrintf(sentry, "Histogram of events per incoming socket 
type\n");
+    storeAppendPrintf(sentry, "ICP Messages handled per 
comm_poll_icp_incoming() call:\n");
+    statHistDump(&f->comm_icp_incoming, sentry, statHistIntDumper);
+    storeAppendPrintf(sentry, "DNS Messages handled per 
comm_poll_dns_incoming() call:\n");
+    statHistDump(&f->comm_dns_incoming, sentry, statHistIntDumper);
+    storeAppendPrintf(sentry, "HTTP Messages handled per 
comm_poll_http_incoming() call:\n");
+    statHistDump(&f->comm_http_incoming, sentry, statHistIntDumper);
+}
+
+void
+commUpdateReadBits(int fd, PF * handler)
+{
+    if (handler && !FD_ISSET(fd, &global_readfds)) {
+       FD_SET(fd, &global_readfds);
+       nreadfds++;
+    } else if (!handler && FD_ISSET(fd, &global_readfds)) {
+       FD_CLR(fd, &global_readfds);
+       nreadfds--;
+    }
+}
+
+void
+commUpdateWriteBits(int fd, PF * handler)
+{
+    if (handler && !FD_ISSET(fd, &global_writefds)) {
+       FD_SET(fd, &global_writefds);
+       nwritefds++;
+    } else if (!handler && FD_ISSET(fd, &global_writefds)) {
+       FD_CLR(fd, &global_writefds);
+       nwritefds--;
+    }
+}
+
+
+static int
+commDeferRead(int fd)
+{
+    fde *F = &fd_table[fd];
+    if (F->defer_check == NULL)
+       return 0;
+    return F->defer_check(fd, F->defer_data);
+}
+
+static void
+checkTimeouts(void)
+{
+    int fd;
+    fde *F = NULL;
+    PF *callback;
+    for (fd = 0; fd <= Biggest_FD; fd++) {
+       F = &fd_table[fd];
+       if (!F->flags.open)
+           continue;
+       if (F->timeout == 0)
+           continue;
+       if (F->timeout > squid_curtime)
+           continue;
+       debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd);
+       if (F->timeout_handler) {
+           debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd);
+           callback = F->timeout_handler;
+           F->timeout_handler = NULL;
+           callback(fd, F->timeout_data);
+       } else {
+           debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd);
+           comm_close(fd);
+       }
+    }
+}
+
+
+/* Called by async-io or diskd to speed up the polling */
+void
+comm_quick_poll_required(void)
+{
+    MAX_POLL_TIME = 10;
+}
+
+#endif
Index: src/comm_epoll.c
===================================================================
RCS file: src/comm_epoll.c
diff -N src/comm_epoll.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ src/comm_epoll.c    24 May 2006 14:31:51 -0000
@@ -0,0 +1,459 @@
+
+/*
+ * $Id: comm_select.c,v 1.66 2006/05/22 19:01:32 serassio Exp $
+ *
+ * DEBUG: section 5     Socket Functions
+ *
+ * SQUID Web Proxy Cache          http://www.squid-cache.org/
+ * ----------------------------------------------------------
+ *
+ *  Squid is the result of efforts by numerous individuals from
+ *  the Internet community; see the CONTRIBUTORS file for full
+ *  details.   Many organizations have provided support for Squid's
+ *  development; see the SPONSORS file for full details.  Squid is
+ *  Copyrighted (C) 2001 by the Regents of the University of
+ *  California; see the COPYRIGHT file for full details.  Squid
+ *  incorporates software developed and/or copyrighted by other
+ *  sources; see the CREDITS file for full details.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "squid.h"
+
+#if USE_EPOLL
+
+static int MAX_POLL_TIME = 1000;       /* see also comm_quick_poll_required() 
*/
+
+/* epoll structs */
+static int kdpfd;
+static struct epoll_event *pevents;
+
+/* Array to keep track of backed off filedescriptors */
+static int backoff_fds[FD_SETSIZE];
+
+static void checkTimeouts(void);
+static int commDeferRead(int fd);
+
+static const char *
+epolltype_atoi(int x)
+{
+    switch (x) {
+
+    case EPOLL_CTL_ADD:
+       return "EPOLL_CTL_ADD";
+
+    case EPOLL_CTL_DEL:
+       return "EPOLL_CTL_DEL";
+
+    case EPOLL_CTL_MOD:
+       return "EPOLL_CTL_MOD";
+
+    default:
+       return "UNKNOWN_EPOLLCTL_OP";
+    }
+}
+
+/* Bring all fds back online */
+void
+commEpollBackon()
+{
+    fde *F;
+    int i;
+    int fd;
+    int j = 0;
+
+    for (i = 0; i < FD_SETSIZE; i++) {
+       if (backoff_fds[i]) {
+           /* record the fd and zero the descriptor */
+           fd = backoff_fds[i];
+           F = &fd_table[fd];
+           backoff_fds[i] = 0;
+
+           /* If the fd is no longer backed off, ignore */
+           if (!(F->epoll_backoff)) {
+               continue;
+           }
+           /* If the fd is still meant to be backed off, add it to the start of
+            * the list and continue */
+           if (commDeferRead(fd) == 1) {
+               backoff_fds[j++] = fd;
+               continue;
+           }
+           debug(5, 4) ("commEpollBackon: fd=%d\n", fd);
+
+           /* Resume operations for this fd */
+           commResumeFD(fd);
+       } else {
+           /* Once we hit a non-backed off FD, we can break */
+           break;
+       }
+    }
+}
+
+
+/* Back off on the next epoll for the given fd */
+void
+commEpollBackoff(int fd)
+{
+    commDeferFD(fd);
+}
+
+/* Defer reads from this fd */
+void
+commDeferFD(int fd)
+{
+    fde *F = &fd_table[fd];
+    struct epoll_event ev;
+    int epoll_ctl_type = 0;
+    int i;
+
+    /* Return if the fd is already backed off */
+    if (F->epoll_backoff) {
+       return;
+    }
+    for (i = 0; i < FD_SETSIZE; i++) {
+       if (!(backoff_fds[i]) || (backoff_fds[i] == fd)) {
+           break;
+       }
+    }
+
+    /* die if we have no fd (very unlikely), if the fd has no existing epoll 
+     * state, if we are given a bad fd, or if the fd is not open. */
+    assert(i < FD_SETSIZE);
+    assert(F->epoll_state);
+    assert(fd >= 0);
+    assert(F->flags.open);
+
+    /* set up ev struct */
+    ev.events = 0;
+    ev.data.fd = fd;
+
+    /* If we were only waiting for reads, delete the fd, otherwise remove the 
+     * read event */
+    if (F->epoll_state == (EPOLLIN | EPOLLHUP | EPOLLERR)) {
+       epoll_ctl_type = EPOLL_CTL_DEL;
+    } else {
+       epoll_ctl_type = EPOLL_CTL_MOD;
+       ev.events = (F->epoll_state - EPOLLIN);
+    }
+    debug(5, 5) ("commDeferFD: epoll_ctl_type=%s, fd=%d epoll_state=%d\n", 
epolltype_atoi(epoll_ctl_type), fd, F->epoll_state);
+
+    if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
+       /* If an error occurs, log it */
+       debug(5, 1) ("commDeferFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", 
epolltype_atoi(epoll_ctl_type), fd, xstrerror());
+    } else {
+       backoff_fds[i] = fd;
+       F->epoll_backoff = 1;
+       F->epoll_state = ev.events;
+    }
+}
+
+/* Resume reading from the given fd */
+void
+commResumeFD(int fd)
+{
+    struct epoll_event ev;
+    int epoll_ctl_type = 0;
+    fde *F;
+
+    F = &fd_table[fd];
+
+    /* If the fd has been modified, do nothing and remove the flag */
+    if (!(F->read_handler) || !(F->epoll_backoff)) {
+       debug(5, 2) ("commResumeFD: fd=%d ignoring read_handler=%p, 
epoll_backoff=%d\n", fd, F->read_handler, F->epoll_backoff);
+       F->epoll_backoff = 0;
+       return;
+    }
+    /* we need to re-add the fd to the epoll list with EPOLLIN set */
+    ev.events = F->epoll_state | EPOLLIN | EPOLLHUP | EPOLLERR;
+    ev.data.fd = fd;
+
+    /* If epoll_state is not set, then this fd is only waiting for
+     * reads, and needs adding, otherwise we mod it to add  EPOLLIN */
+    if (!(F->epoll_state)) {
+       epoll_ctl_type = EPOLL_CTL_ADD;
+    } else {
+       epoll_ctl_type = EPOLL_CTL_MOD;
+    }
+    debug(5, 5) ("commResumeFD: epoll_ctl_type=%s, fd=%d\n", 
epolltype_atoi(epoll_ctl_type), fd);
+
+    /* Try and add the fd back into the epoll struct */
+    if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
+       /* If an error occurs, log */
+       debug(5, 1) ("commResumeFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", 
epolltype_atoi(epoll_ctl_type), fd, xstrerror());
+    } else {
+       F->epoll_backoff = 0;
+       F->epoll_state = ev.events;
+    }
+}
+void
+comm_select_init()
+{
+    int i;
+    pevents = (struct epoll_event *) xmalloc(SQUID_MAXFD * sizeof(struct 
epoll_event));
+    if (!pevents) {
+       fatalf("comm_select_init: xmalloc() failed: %s\n", xstrerror());
+    }
+    kdpfd = epoll_create(SQUID_MAXFD);
+
+    if (kdpfd < 0) {
+       fatalf("comm_select_init: epoll_create(): %s\n", xstrerror());
+    }
+    for (i = 0; i < FD_SETSIZE; i++) {
+       backoff_fds[i] = 0;
+    }
+}
+
+void
+commUpdateReadBits(int fd, PF * handler)
+{
+    /* Not imlpemented */
+}
+
+void
+commUpdateWriteBits(int fd, PF * handler)
+{
+    /* Not imlpemented */
+}
+
+void
+commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, 
time_t timeout)
+{
+    fde *F = &fd_table[fd];
+    int epoll_ctl_type = 0;
+    struct epoll_event ev;
+
+    assert(fd >= 0);
+    assert(F->flags.open);
+    debug(5, 8) 
("commSetSelect(fd=%d,type=%u,handler=%p,client_data=%p,timeout=%ld)\n", fd, 
type, handler, client_data, timeout);
+
+    ev.events = 0;
+    ev.data.fd = fd;
+
+    if (type & COMM_SELECT_READ) {
+       /* Only add the epoll event if the fd is not backed off */
+       if (handler && !(F->epoll_backoff)) {
+           ev.events |= EPOLLIN;
+       }
+       F->read_handler = handler;
+       F->read_data = client_data;
+
+       // Otherwise, use previously stored value if the fd is not backed off
+    } else if ((F->epoll_state & EPOLLIN) && (F->read_handler) && 
!(F->epoll_backoff)) {
+       ev.events |= EPOLLIN;
+    }
+    if (type & COMM_SELECT_WRITE) {
+       if (handler) {
+           ev.events |= EPOLLOUT;
+       }
+       F->write_handler = handler;
+       F->write_data = client_data;
+
+       // Otherwise, use previously stored value
+    } else if ((F->epoll_state & EPOLLOUT) && (F->write_handler)) {
+       ev.events |= EPOLLOUT;
+    }
+    if (ev.events) {
+       ev.events |= EPOLLHUP | EPOLLERR;
+    }
+    /* If the type is 0, force adding the fd to the epoll set */
+    if (!(type)) {
+       F->epoll_state = 0;
+    }
+    if (ev.events != F->epoll_state) {
+       // If the struct is already in epoll MOD or DEL, else ADD
+       if (F->epoll_state) {
+           epoll_ctl_type = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
+       } else {
+           epoll_ctl_type = EPOLL_CTL_ADD;
+       }
+
+       /* Update the state */
+       F->epoll_state = ev.events;
+
+       if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
+           debug(5, 1) ("commSetSelect: epoll_ctl(%s): failed on fd=%d: %s\n",
+               epolltype_atoi(epoll_ctl_type), fd, xstrerror());
+       }
+    }
+    if (timeout)
+       F->timeout = squid_curtime + timeout;
+}
+
+int
+comm_epoll(int msec)
+{
+    struct timespec ts;
+    static time_t last_timeout = 0;
+    int i;
+    int num;
+    int fd;
+    fde *F;
+    PF *hdl;
+    struct epoll_event *cevents;
+    double timeout = current_dtime + (msec / 1000.0);
+
+    if (msec > MAX_POLL_TIME)
+       msec = MAX_POLL_TIME;
+
+    debug(50, 3) ("comm_epoll: timeout %d\n", msec);
+
+    do {
+#if !ALARM_UPDATES_TIME
+       double start;
+       getCurrentTime();
+       start = current_dtime;
+#endif
+       ts.tv_sec = msec / 1000;
+       ts.tv_nsec = (msec % 1000) * 1000;
+
+       /* Check timeouts once per second */
+       if (last_timeout < squid_curtime) {
+           last_timeout = squid_curtime;
+           checkTimeouts();
+
+           /* bring backed off connections back online */
+           commEpollBackon();
+       }
+       /* Check for disk io callbacks */
+       storeDirCallback();
+
+       for (;;) {
+           statCounter.syscalls.polls++;
+           num = epoll_wait(kdpfd, pevents, SQUID_MAXFD, msec);
+           statCounter.select_loops++;
+
+           if (num >= 0)
+               break;
+
+           if (ignoreErrno(errno))
+               break;
+
+           debug(5, 0) ("comm_epoll: epoll failure: %s\n", xstrerror());
+
+           return COMM_ERROR;
+       }
+
+       statHistCount(&statCounter.select_fds_hist, num);
+
+       if (num <= 0)
+           continue;
+
+       for (i = 0, cevents = pevents; i < num; i++, cevents++) {
+           fd = cevents->data.fd;
+           F = &fd_table[fd];
+           debug(5, 8) ("comm_epoll(): got fd=%d events=%x monitoring=%x 
F->read_handler=%p F->write_handler=%p\n"
+               ,fd, cevents->events, F->epoll_state, F->read_handler, 
F->write_handler);
+           if (cevents->events & (EPOLLIN | EPOLLHUP | EPOLLERR)) {
+               if ((hdl = F->read_handler) != NULL) {
+                   // If the descriptor is meant to be deferred, don't handle
+                   if (commDeferRead(fd) == 1) {
+                       if (!(F->epoll_backoff)) {
+                           debug(5, 1) ("comm_epoll(): WARNING defer handler 
for fd=%d (desc=%s) does not call commDeferFD() - backing off manually\n", fd, 
F->desc);
+                           commEpollBackoff(fd);
+                       }
+                       goto WRITE_EVENT;
+                   }
+                   debug(5, 8) ("comm_epoll(): Calling read handler on 
fd=%d\n", fd);
+                   F->read_handler = NULL;
+                   hdl(fd, F->read_data);
+                   statCounter.select_fds++;
+                   if ((F->read_handler == NULL) && (F->flags.open)) {
+                       commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
+                   }
+               } else if (cevents->events & EPOLLIN) {
+                   debug(5, 2) ("comm_epoll(): no read handler for fd=%d", fd);
+                   if (F->flags.open) {
+                       commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
+                   }
+               }
+           }
+         WRITE_EVENT:
+           if (cevents->events & (EPOLLOUT | EPOLLHUP | EPOLLERR)) {
+               if ((hdl = F->write_handler) != NULL) {
+                   debug(5, 8) ("comm_epoll(): Calling write handler on 
fd=%d\n", fd);
+                   F->write_handler = NULL;
+                   hdl(fd, F->write_data);
+                   statCounter.select_fds++;
+                   if ((F->write_handler == NULL) && (F->flags.open)) {
+                       commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
+                   }
+               } else if (cevents->events & EPOLLOUT) {
+                   debug(5, 2) ("comm_epoll(): no write handler for fd=%d\n", 
fd);
+                   if (F->flags.open) {
+                       commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
+                   }
+               }
+           }
+       }
+#if !ALARM_UPDATES_TIME
+       getCurrentTime();
+       statCounter.select_time += (current_dtime - start);
+#endif
+       return COMM_OK;
+    }
+    while (timeout > current_dtime);
+
+    debug(5, 8) ("comm_epoll: time out: %ld.\n", (long int) squid_curtime);
+    return COMM_TIMEOUT;
+}
+
+static int
+commDeferRead(int fd)
+{
+    fde *F = &fd_table[fd];
+    if (F->defer_check == NULL)
+       return 0;
+    return F->defer_check(fd, F->defer_data);
+}
+
+static void
+checkTimeouts(void)
+{
+    int fd;
+    fde *F = NULL;
+    PF *callback;
+    for (fd = 0; fd <= Biggest_FD; fd++) {
+       F = &fd_table[fd];
+       if (!F->flags.open)
+           continue;
+       if (F->timeout == 0)
+           continue;
+       if (F->timeout > squid_curtime)
+           continue;
+       debug(5, 5) ("checkTimeouts: FD %d Expired\n", fd);
+       if (F->timeout_handler) {
+           debug(5, 5) ("checkTimeouts: FD %d: Call timeout handler\n", fd);
+           callback = F->timeout_handler;
+           F->timeout_handler = NULL;
+           callback(fd, F->timeout_data);
+       } else {
+           debug(5, 5) ("checkTimeouts: FD %d: Forcing comm_close()\n", fd);
+           comm_close(fd);
+       }
+    }
+}
+
+
+/* Called by async-io or diskd to speed up the polling */
+void
+comm_quick_poll_required(void)
+{
+    MAX_POLL_TIME = 10;
+}
+
+#endif
Index: src/Makefile.am
===================================================================
RCS file: /server/cvs-server/squid/squid/src/Makefile.am,v
retrieving revision 1.44
diff -u -r1.44 Makefile.am
--- src/Makefile.am     23 May 2006 21:52:27 -0000      1.44
+++ src/Makefile.am     24 May 2006 14:31:51 -0000
@@ -115,6 +115,8 @@
        client_side.c \
        comm.c \
        comm_select.c \
+       comm_poll.c \
+       comm_epoll.c \
        debug.c \
        defines.h \
        $(DELAY_POOL_SOURCE) \
Index: src/client_side.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/client_side.c,v
retrieving revision 1.621
diff -u -r1.621 client_side.c
--- src/client_side.c   22 May 2006 22:16:37 -0000      1.621
+++ src/client_side.c   24 May 2006 14:31:52 -0000
@@ -3290,7 +3290,7 @@
     ConnStateData *conn = data;
     if (conn->body.size_left && !F->flags.socket_eof) {
        if (conn->in.offset >= conn->in.size - 1) {
-#if HAVE_EPOLL
+#if USE_EPOLL
            /* The commResumeFD function is called in this file */
            conn->in.clientfd = fd;
            commDeferFD(fd);
@@ -3301,7 +3301,7 @@
        }
     } else {
        if (conn->defer.until > squid_curtime) {
-#if HAVE_EPOLL
+#if USE_EPOLL
            /* This is a second resolution timer, so commEpollBackon will 
             * handle the resume for this defer call */
            commDeferFD(fd);
@@ -3709,7 +3709,7 @@
        conn->body.size_left -= size;
        /* Move any remaining data */
        conn->in.offset -= size;
-#if HAVE_EPOLL
+#if USE_EPOLL
        /* Resume the fd if necessary */
        if (conn->in.clientfd) {
            if (conn->in.offset < conn->in.size - 1) {
Index: src/comm.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/comm.c,v
retrieving revision 1.342
diff -u -r1.342 comm.c
--- src/comm.c  22 May 2006 18:55:23 -0000      1.342
+++ src/comm.c  24 May 2006 14:31:52 -0000
@@ -375,7 +375,7 @@
        commSetTcpNoDelay(cs->fd);
 #endif
 
-#if HAVE_EPOLL
+#if USE_EPOLL
     // If we are using epoll(), we need to make sure that this fd will be 
polled
     commSetSelect(cs->fd, 0, NULL, NULL, 0);
 #endif
@@ -779,7 +779,7 @@
 }
 
 /* Epoll redefines this function in comm_select.c */
-#if !HAVE_EPOLL
+#if !USE_EPOLL
 void
 commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, 
time_t timeout)
 {
Index: src/comm_select.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/comm_select.c,v
retrieving revision 1.66
diff -u -r1.66 comm_select.c
--- src/comm_select.c   22 May 2006 19:01:32 -0000      1.66
+++ src/comm_select.c   24 May 2006 14:31:52 -0000
@@ -34,9 +34,10 @@
 
 #include "squid.h"
 
+#if USE_SELECT
+
 static int MAX_POLL_TIME = 1000;       /* see also comm_quick_poll_required() 
*/
 
-#if !HAVE_EPOLL
 
 #ifndef        howmany
 #define howmany(x, y)   (((x)+((y)-1))/(y))
@@ -765,7 +766,7 @@
                default:
                    fatalf("bad return value from commDeferRead(FD %d)\n", fd);
                }
-               if (FD_ISSET(fd, &readfds) && fd_table[fd].flags.read_pending) {
+               if (FD_ISSET(fd, &readfds) && fd_table[fd].read_pending & 
COMM_PENDING_NORMAL) {
                    FD_SET(fd, &pendingfds);
                    pending++;
                }
@@ -1126,381 +1127,6 @@
     }
 }
 
-#else /* HAVE_EPOLL */
-/* epoll structs */
-static int kdpfd;
-static struct epoll_event *pevents;
-
-/* Array to keep track of backed off filedescriptors */
-static int backoff_fds[FD_SETSIZE];
-
-static void checkTimeouts(void);
-static int commDeferRead(int fd);
-
-static const char *
-epolltype_atoi(int x)
-{
-    switch (x) {
-
-    case EPOLL_CTL_ADD:
-       return "EPOLL_CTL_ADD";
-
-    case EPOLL_CTL_DEL:
-       return "EPOLL_CTL_DEL";
-
-    case EPOLL_CTL_MOD:
-       return "EPOLL_CTL_MOD";
-
-    default:
-       return "UNKNOWN_EPOLLCTL_OP";
-    }
-}
-
-/* Bring all fds back online */
-void
-commEpollBackon()
-{
-    fde *F;
-    int i;
-    int fd;
-    int j = 0;
-
-    for (i = 0; i < FD_SETSIZE; i++) {
-       if (backoff_fds[i]) {
-           /* record the fd and zero the descriptor */
-           fd = backoff_fds[i];
-           F = &fd_table[fd];
-           backoff_fds[i] = 0;
-
-           /* If the fd is no longer backed off, ignore */
-           if (!(F->epoll_backoff)) {
-               continue;
-           }
-           /* If the fd is still meant to be backed off, add it to the start of
-            * the list and continue */
-           if (commDeferRead(fd) == 1) {
-               backoff_fds[j++] = fd;
-               continue;
-           }
-           debug(5, 4) ("commEpollBackon: fd=%d\n", fd);
-
-           /* Resume operations for this fd */
-           commResumeFD(fd);
-       } else {
-           /* Once we hit a non-backed off FD, we can break */
-           break;
-       }
-    }
-}
-
-
-/* Back off on the next epoll for the given fd */
-void
-commEpollBackoff(int fd)
-{
-    commDeferFD(fd);
-}
-
-/* Defer reads from this fd */
-void
-commDeferFD(int fd)
-{
-    fde *F = &fd_table[fd];
-    struct epoll_event ev;
-    int epoll_ctl_type = 0;
-    int i;
-
-    /* Return if the fd is already backed off */
-    if (F->epoll_backoff) {
-       return;
-    }
-    for (i = 0; i < FD_SETSIZE; i++) {
-       if (!(backoff_fds[i]) || (backoff_fds[i] == fd)) {
-           break;
-       }
-    }
-
-    /* die if we have no fd (very unlikely), if the fd has no existing epoll 
-     * state, if we are given a bad fd, or if the fd is not open. */
-    assert(i < FD_SETSIZE);
-    assert(F->epoll_state);
-    assert(fd >= 0);
-    assert(F->flags.open);
-
-    /* set up ev struct */
-    ev.events = 0;
-    ev.data.fd = fd;
-
-    /* If we were only waiting for reads, delete the fd, otherwise remove the 
-     * read event */
-    if (F->epoll_state == (EPOLLIN | EPOLLHUP | EPOLLERR)) {
-       epoll_ctl_type = EPOLL_CTL_DEL;
-    } else {
-       epoll_ctl_type = EPOLL_CTL_MOD;
-       ev.events = (F->epoll_state - EPOLLIN);
-    }
-    debug(5, 5) ("commDeferFD: epoll_ctl_type=%s, fd=%d epoll_state=%d\n", 
epolltype_atoi(epoll_ctl_type), fd, F->epoll_state);
-
-    if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
-       /* If an error occurs, log it */
-       debug(5, 1) ("commDeferFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", 
epolltype_atoi(epoll_ctl_type), fd, xstrerror());
-    } else {
-       backoff_fds[i] = fd;
-       F->epoll_backoff = 1;
-       F->epoll_state = ev.events;
-    }
-}
-
-/* Resume reading from the given fd */
-void
-commResumeFD(int fd)
-{
-    struct epoll_event ev;
-    int epoll_ctl_type = 0;
-    fde *F;
-
-    F = &fd_table[fd];
-
-    /* If the fd has been modified, do nothing and remove the flag */
-    if (!(F->read_handler) || !(F->epoll_backoff)) {
-       debug(5, 2) ("commResumeFD: fd=%d ignoring read_handler=%p, 
epoll_backoff=%d\n", fd, F->read_handler, F->epoll_backoff);
-       F->epoll_backoff = 0;
-       return;
-    }
-    /* we need to re-add the fd to the epoll list with EPOLLIN set */
-    ev.events = F->epoll_state | EPOLLIN | EPOLLHUP | EPOLLERR;
-    ev.data.fd = fd;
-
-    /* If epoll_state is not set, then this fd is only waiting for
-     * reads, and needs adding, otherwise we mod it to add  EPOLLIN */
-    if (!(F->epoll_state)) {
-       epoll_ctl_type = EPOLL_CTL_ADD;
-    } else {
-       epoll_ctl_type = EPOLL_CTL_MOD;
-    }
-    debug(5, 5) ("commResumeFD: epoll_ctl_type=%s, fd=%d\n", 
epolltype_atoi(epoll_ctl_type), fd);
-
-    /* Try and add the fd back into the epoll struct */
-    if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
-       /* If an error occurs, log */
-       debug(5, 1) ("commResumeFD: epoll_ctl(,%s,,): failed on fd=%d: %s\n", 
epolltype_atoi(epoll_ctl_type), fd, xstrerror());
-    } else {
-       F->epoll_backoff = 0;
-       F->epoll_state = ev.events;
-    }
-}
-void
-comm_select_init()
-{
-    int i;
-    pevents = (struct epoll_event *) xmalloc(SQUID_MAXFD * sizeof(struct 
epoll_event));
-    if (!pevents) {
-       fatalf("comm_select_init: xmalloc() failed: %s\n", xstrerror());
-    }
-    kdpfd = epoll_create(SQUID_MAXFD);
-
-    if (kdpfd < 0) {
-       fatalf("comm_select_init: epoll_create(): %s\n", xstrerror());
-    }
-    for (i = 0; i < FD_SETSIZE; i++) {
-       backoff_fds[i] = 0;
-    }
-}
-
-void
-commUpdateReadBits(int fd, PF * handler)
-{
-    /* Not imlpemented */
-}
-
-void
-commUpdateWriteBits(int fd, PF * handler)
-{
-    /* Not imlpemented */
-}
-
-void
-commSetSelect(int fd, unsigned int type, PF * handler, void *client_data, 
time_t timeout)
-{
-    fde *F = &fd_table[fd];
-    int epoll_ctl_type = 0;
-    struct epoll_event ev;
-
-    assert(fd >= 0);
-    assert(F->flags.open);
-    debug(5, 8) 
("commSetSelect(fd=%d,type=%u,handler=%p,client_data=%p,timeout=%ld)\n", fd, 
type, handler, client_data, timeout);
-
-    ev.events = 0;
-    ev.data.fd = fd;
-
-    if (type & COMM_SELECT_READ) {
-       /* Only add the epoll event if the fd is not backed off */
-       if (handler && !(F->epoll_backoff)) {
-           ev.events |= EPOLLIN;
-       }
-       F->read_handler = handler;
-       F->read_data = client_data;
-
-       // Otherwise, use previously stored value if the fd is not backed off
-    } else if ((F->epoll_state & EPOLLIN) && (F->read_handler) && 
!(F->epoll_backoff)) {
-       ev.events |= EPOLLIN;
-    }
-    if (type & COMM_SELECT_WRITE) {
-       if (handler) {
-           ev.events |= EPOLLOUT;
-       }
-       F->write_handler = handler;
-       F->write_data = client_data;
-
-       // Otherwise, use previously stored value
-    } else if ((F->epoll_state & EPOLLOUT) && (F->write_handler)) {
-       ev.events |= EPOLLOUT;
-    }
-    if (ev.events) {
-       ev.events |= EPOLLHUP | EPOLLERR;
-    }
-    /* If the type is 0, force adding the fd to the epoll set */
-    if (!(type)) {
-       F->epoll_state = 0;
-    }
-    if (ev.events != F->epoll_state) {
-       // If the struct is already in epoll MOD or DEL, else ADD
-       if (F->epoll_state) {
-           epoll_ctl_type = ev.events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
-       } else {
-           epoll_ctl_type = EPOLL_CTL_ADD;
-       }
-
-       /* Update the state */
-       F->epoll_state = ev.events;
-
-       if (epoll_ctl(kdpfd, epoll_ctl_type, fd, &ev) < 0) {
-           debug(5, 1) ("commSetSelect: epoll_ctl(%s): failed on fd=%d: %s\n",
-               epolltype_atoi(epoll_ctl_type), fd, xstrerror());
-       }
-    }
-    if (timeout)
-       F->timeout = squid_curtime + timeout;
-}
-
-int
-comm_epoll(int msec)
-{
-    struct timespec ts;
-    static time_t last_timeout = 0;
-    int i;
-    int num;
-    int fd;
-    fde *F;
-    PF *hdl;
-    struct epoll_event *cevents;
-    double timeout = current_dtime + (msec / 1000.0);
-
-    if (msec > MAX_POLL_TIME)
-       msec = MAX_POLL_TIME;
-
-    debug(50, 3) ("comm_epoll: timeout %d\n", msec);
-
-    do {
-#if !ALARM_UPDATES_TIME
-       double start;
-       getCurrentTime();
-       start = current_dtime;
-#endif
-       ts.tv_sec = msec / 1000;
-       ts.tv_nsec = (msec % 1000) * 1000;
-
-       /* Check timeouts once per second */
-       if (last_timeout < squid_curtime) {
-           last_timeout = squid_curtime;
-           checkTimeouts();
-
-           /* bring backed off connections back online */
-           commEpollBackon();
-       }
-       /* Check for disk io callbacks */
-       storeDirCallback();
-
-       for (;;) {
-           statCounter.syscalls.polls++;
-           num = epoll_wait(kdpfd, pevents, SQUID_MAXFD, msec);
-           statCounter.select_loops++;
-
-           if (num >= 0)
-               break;
-
-           if (ignoreErrno(errno))
-               break;
-
-           debug(5, 0) ("comm_epoll: epoll failure: %s\n", xstrerror());
-
-           return COMM_ERROR;
-       }
-
-       statHistCount(&statCounter.select_fds_hist, num);
-
-       if (num <= 0)
-           continue;
-
-       for (i = 0, cevents = pevents; i < num; i++, cevents++) {
-           fd = cevents->data.fd;
-           F = &fd_table[fd];
-           debug(5, 8) ("comm_epoll(): got fd=%d events=%x monitoring=%x 
F->read_handler=%p F->write_handler=%p\n"
-               ,fd, cevents->events, F->epoll_state, F->read_handler, 
F->write_handler);
-           if (cevents->events & (EPOLLIN | EPOLLHUP | EPOLLERR)) {
-               if ((hdl = F->read_handler) != NULL) {
-                   // If the descriptor is meant to be deferred, don't handle
-                   if (commDeferRead(fd) == 1) {
-                       if (!(F->epoll_backoff)) {
-                           debug(5, 1) ("comm_epoll(): WARNING defer handler 
for fd=%d (desc=%s) does not call commDeferFD() - backing off manually\n", fd, 
F->desc);
-                           commEpollBackoff(fd);
-                       }
-                       goto WRITE_EVENT;
-                   }
-                   debug(5, 8) ("comm_epoll(): Calling read handler on 
fd=%d\n", fd);
-                   F->read_handler = NULL;
-                   hdl(fd, F->read_data);
-                   statCounter.select_fds++;
-                   if ((F->read_handler == NULL) && (F->flags.open)) {
-                       commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
-                   }
-               } else if (cevents->events & EPOLLIN) {
-                   debug(5, 2) ("comm_epoll(): no read handler for fd=%d", fd);
-                   if (F->flags.open) {
-                       commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
-                   }
-               }
-           }
-         WRITE_EVENT:
-           if (cevents->events & (EPOLLOUT | EPOLLHUP | EPOLLERR)) {
-               if ((hdl = F->write_handler) != NULL) {
-                   debug(5, 8) ("comm_epoll(): Calling write handler on 
fd=%d\n", fd);
-                   F->write_handler = NULL;
-                   hdl(fd, F->write_data);
-                   statCounter.select_fds++;
-                   if ((F->write_handler == NULL) && (F->flags.open)) {
-                       commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
-                   }
-               } else if (cevents->events & EPOLLOUT) {
-                   debug(5, 2) ("comm_epoll(): no write handler for fd=%d\n", 
fd);
-                   if (F->flags.open) {
-                       commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
-                   }
-               }
-           }
-       }
-#if !ALARM_UPDATES_TIME
-       getCurrentTime();
-       statCounter.select_time += (current_dtime - start);
-#endif
-       return COMM_OK;
-    }
-    while (timeout > current_dtime);
-
-    debug(5, 8) ("comm_epoll: time out: %ld.\n", (long int) squid_curtime);
-    return COMM_TIMEOUT;
-}
-#endif /* HAVE_EPOLL  */
 
 static int
 commDeferRead(int fd)
@@ -1545,3 +1171,5 @@
 {
     MAX_POLL_TIME = 10;
 }
+
+#endif
Index: src/fd.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/fd.c,v
retrieving revision 1.46
diff -u -r1.46 fd.c
--- src/fd.c    18 May 2006 04:03:23 -0000      1.46
+++ src/fd.c    24 May 2006 14:31:52 -0000
@@ -84,7 +84,7 @@
        assert(F->write_handler == NULL);
     }
     debug(51, 3) ("fd_close FD %d %s\n", fd, F->desc);
-#if HAVE_EPOLL
+#if USE_EPOLL
     /* the epoll code needs to update the descriptor before flags.ope is 0 */
     commSetSelect(fd, COMM_SELECT_READ, NULL, NULL, 0);
     commSetSelect(fd, COMM_SELECT_WRITE, NULL, NULL, 0);
Index: src/forward.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/forward.c,v
retrieving revision 1.99
diff -u -r1.99 forward.c
--- src/forward.c       24 May 2006 11:37:30 -0000      1.99
+++ src/forward.c       24 May 2006 14:31:52 -0000
@@ -873,7 +873,7 @@
     else {
        int i = delayMostBytesWanted(mem, INT_MAX);
        if (0 == i) {
-#if HAVE_EPOLL
+#if USE_EPOLL
            if (fd >= 0) {
                mem->serverfd = fd;
                commDeferFD(fd);
@@ -899,7 +899,7 @@
         * few other corner cases.
         */
        if (fd >= 0 && mem->inmem_hi - mem->inmem_lo > SM_PAGE_SIZE + 
Config.Store.maxInMemObjSize + READ_AHEAD_GAP) {
-#if HAVE_EPOLL
+#if USE_EPOLL
            EBIT_SET(e->flags, ENTRY_DEFER_READ);
            mem->serverfd = fd;
            commDeferFD(fd);
@@ -909,7 +909,7 @@
     }
     if (fd >= 0 && mem->inmem_hi - storeLowestMemReaderOffset(e) > 
READ_AHEAD_GAP) {
        EBIT_SET(e->flags, ENTRY_DEFER_READ);
-#if HAVE_EPOLL
+#if USE_EPOLL
        mem->serverfd = fd;
        commDeferFD(fd);
 #endif
Index: src/main.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/main.c,v
retrieving revision 1.366
diff -u -r1.366 main.c
--- src/main.c  22 May 2006 21:54:56 -0000      1.366
+++ src/main.c  24 May 2006 14:31:52 -0000
@@ -776,12 +776,14 @@
        eventRun();
        if ((loop_delay = eventNextTime()) < 0)
            loop_delay = 0;
-#if HAVE_EPOLL
+#if USE_EPOLL
        switch (comm_epoll(loop_delay)) {
-#elif HAVE_POLL
+#elif USE_POLL
            switch (comm_poll(loop_delay)) {
-#else
+#elif USE_SELECT
        switch (comm_select(loop_delay)) {
+#elif
+#error Need poll, epoll or select defined!
 #endif
        case COMM_OK:
            errcount = 0;       /* reset if successful */
Index: src/protos.h
===================================================================
RCS file: /server/cvs-server/squid/squid/src/protos.h,v
retrieving revision 1.469
diff -u -r1.469 protos.h
--- src/protos.h        22 May 2006 23:05:27 -0000      1.469
+++ src/protos.h        24 May 2006 14:31:52 -0000
@@ -188,12 +188,14 @@
  * comm_select.c
  */
 extern void comm_select_init(void);
-#if HAVE_EPOLL
+#if USE_EPOLL
 extern int comm_epoll(int);
-#elif HAVE_POLL
+#elif USE_POLL
 extern int comm_poll(int);
-#else
+#elif USE_SELECT
 extern int comm_select(int);
+#else
+#error USE_POLL, USE_EPOLL or USE_SELECT need to be defined!
 #endif
 extern void commUpdateReadBits(int, PF *);
 extern void commUpdateWriteBits(int, PF *);
Index: src/squid.h
===================================================================
RCS file: /server/cvs-server/squid/squid/src/squid.h,v
retrieving revision 1.234
diff -u -r1.234 squid.h
--- src/squid.h 22 May 2006 18:55:23 -0000      1.234
+++ src/squid.h 24 May 2006 14:31:52 -0000
@@ -250,15 +250,15 @@
  *  -- Oskar Pearson <[EMAIL PROTECTED]>
  *  -- Stewart Forster <[EMAIL PROTECTED]>
  */
-#if HAVE_POLL
+#if USE_POLL
 #if HAVE_POLL_H
 #include <poll.h>
 #else /* HAVE_POLL_H */
 #undef HAVE_POLL
 #endif /* HAVE_POLL_H */
-#endif /* HAVE_POLL */
+#endif /* USE_POLL */
 
-#if HAVE_EPOLL
+#if USE_EPOLL
 #include <sys/epoll.h>
 #endif
 
@@ -393,7 +393,7 @@
 #include "Stack.h"
 
 /* Needed for poll() on Linux at least */
-#if HAVE_POLL
+#if USE_POLL
 #ifndef POLLRDNORM
 #define POLLRDNORM POLLIN
 #endif
Index: src/ssl.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/ssl.c,v
retrieving revision 1.130
diff -u -r1.130 ssl.c
--- src/ssl.c   18 May 2006 04:03:24 -0000      1.130
+++ src/ssl.c   24 May 2006 14:31:52 -0000
@@ -139,7 +139,7 @@
     if (i == INT_MAX)
        return 0;
     if (i == 0) {
-#if HAVE_EPOLL
+#if USE_EPOLL
        commDeferFD(fd);
 #endif
        return 1;
Index: src/stat.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/stat.c,v
retrieving revision 1.366
diff -u -r1.366 stat.c
--- src/stat.c  18 May 2006 04:03:24 -0000      1.366
+++ src/stat.c  24 May 2006 14:31:52 -0000
@@ -835,7 +835,7 @@
     storeAppendPrintf(sentry, "aborted_requests = %f/sec\n",
        XAVG(aborted_requests));
 
-#if HAVE_POLL || HAVE_EPOLL
+#if USE_POLL || USE_EPOLL
     storeAppendPrintf(sentry, "syscalls.polls = %f/sec\n", 
XAVG(syscalls.polls));
 #else
     storeAppendPrintf(sentry, "syscalls.selects = %f/sec\n", 
XAVG(syscalls.selects));
Index: src/store_client.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/store_client.c,v
retrieving revision 1.118
diff -u -r1.118 store_client.c
--- src/store_client.c  20 May 2006 12:58:51 -0000      1.118
+++ src/store_client.c  24 May 2006 14:31:52 -0000
@@ -285,7 +285,7 @@
            debug(20, 3) ("storeClientCopy3: %s - clearing ENTRY_DEFER_READ\n", 
e->mem_obj->url);
            /* Clear the flag and re-poll the fd */
            EBIT_CLR(e->flags, ENTRY_DEFER_READ);
-#if HAVE_EPOLL
+#if USE_EPOLL
            if (mem->serverfd != 0) {
                commResumeFD(mem->serverfd);
                mem->serverfd = 0;
Index: src/store_swapout.c
===================================================================
RCS file: /server/cvs-server/squid/squid/src/store_swapout.c,v
retrieving revision 1.91
diff -u -r1.91 store_swapout.c
--- src/store_swapout.c 18 May 2006 12:48:51 -0000      1.91
+++ src/store_swapout.c 24 May 2006 14:31:52 -0000
@@ -152,7 +152,7 @@
 
            if (mem->inmem_hi - mem->inmem_lo <= READ_AHEAD_GAP) {
                EBIT_CLR(e->flags, ENTRY_DEFER_READ);
-#if HAVE_EPOLL
+#if USE_EPOLL
                if (mem->serverfd != 0) {
                    commResumeFD(mem->serverfd);
                    mem->serverfd = 0;
Index: src/structs.h
===================================================================
RCS file: /server/cvs-server/squid/squid/src/structs.h,v
retrieving revision 1.457
diff -u -r1.457 structs.h
--- src/structs.h       22 May 2006 22:24:09 -0000      1.457
+++ src/structs.h       24 May 2006 14:31:53 -0000
@@ -2011,7 +2011,7 @@
            int recvfroms;
            int sendtos;
        } sock;
-#if HAVE_POLL || HAVE_EPOLL
+#if USE_POLL || USE_EPOLL
        int polls;
 #else
        int selects;
Index: configure.in
===================================================================
RCS file: /server/cvs-server/squid/squid/configure.in,v
retrieving revision 1.334
diff -u -r1.334 configure.in
--- configure.in        24 May 2006 11:20:11 -0000      1.334
+++ configure.in        24 May 2006 14:31:53 -0000
@@ -783,6 +783,24 @@
   esac
 ])
 
+dnl Enable select()
+AC_ARG_ENABLE(select,
+[  --enable-select         Enable select() support.
+  --disable-select        Disable select() support. ],
+
+[
+  case "$enableval" in
+  yes)
+    echo "Forcing select() to be enabled"
+    ac_cv_func_select='yes'
+    ;;
+  no)
+    echo "Forcing select() to be disabled"
+    ac_cv_func_select='no'
+    ;;
+  esac
+])
+
 dnl Enable epoll()
 AC_ARG_ENABLE(epoll,
 [  --enable-epoll          Enable epoll() instead of poll() or select().  
@@ -2082,6 +2100,31 @@
        bswap_32 \
 )
 
+dnl Magic which checks whether we are forcing a type of comm loop we
+dnl are actually going to (ab)use
+
+dnl Actually do the define magic now
+dnl mostly ripped from squid-commloops, thanks to adrian and benno
+
+if test "$ac_cv_func_epoll" = "yes" ; then
+        SELECT_TYPE="epoll"
+        AC_DEFINE(USE_EPOLL,1,[Use epoll() for the IO loop])
+        AC_CHECK_LIB(epoll, epoll_create, [EPOLL_LIBS="-lepoll"])
+        AC_SUBST(EPOLL_LIBS)
+elif test "$ac_cv_func_poll" = "yes" ; then
+        SELECT_TYPE="poll"
+        AC_DEFINE(USE_POLL,1,[Use poll() for the IO loop])
+elif test "$ac_cv_func_select" = "yes" ; then
+        SELECT_TYPE="select"
+        AC_DEFINE(USE_SELECT,1,[Use select() for the IO loop])
+else
+        echo "Eep!  Can't find poll, epoll, or select!"
+        echo "I'll try select and hope for the best."
+        SELECT_TYPE="select"
+        AC_DEFINE(USE_SELECT,1)
+fi
+echo "Using ${SELECT_TYPE} for select loop."
+
 dnl Yay!  Another Linux brokenness.  Its not good enough
 dnl to know that setresuid() exists, because RedHat 5.0 declares
 dnl setresuid() but doesn't implement it.
Index: include/autoconf.h.in
===================================================================
RCS file: /server/cvs-server/squid/squid/include/autoconf.h.in,v
retrieving revision 1.140
diff -u -r1.140 autoconf.h.in
--- include/autoconf.h.in       23 May 2006 14:54:36 -0000      1.140
+++ include/autoconf.h.in       24 May 2006 14:31:53 -0000
@@ -753,6 +753,9 @@
    dnsserver processes instead. */
 #undef USE_DNSSERVERS
 
+/* Use epoll() for the IO loop */
+#undef USE_EPOLL
+
 /* Define if we should use GNU regex */
 #undef USE_GNUREGEX
 
@@ -778,10 +781,16 @@
    (USE_SSL) */
 #undef USE_OPENSSL
 
+/* Use poll() for the IO loop */
+#undef USE_POLL
+
 /* If you want to log Referer request header values, define this. By default,
    they are written to referer.log in the Squid log directory. */
 #undef USE_REFERER_LOG
 
+/* Use select() for the IO loop */
+#undef USE_SELECT
+
 /* Define this to include code for SSL encryption. */
 #undef USE_SSL
 

Reply via email to