RE: 2.5 and delay pools
Adrian wrote: -fd_set slowfds; +char slowfds[SQUID_MAXFD]; -static fd_set delay_no_delay; +static int delay_no_delay[SQUID_MAXFD]; Firstly, either decide on an int array or a char array to replace the current bitmask. On a typical 8k FD cache, that's either 8kb or 32kb rather than the current 1kb for the bitmask, which may seem like not much memory but remember on the one hand, people are running caches on systems with pretty small CPU caches, and on the other hand, ints may be faster to access than bitmasks on some faster/larger machines - so it's a tradeoff either way. Secondly, as per Henrik, confirm that use of the fd_set is the cause of your brokenness. Thirdly, you could consider using your own bitmask structure and the FD_SET/FD_CLR/etc functions. Only FD_ZERO won't work on an incorrectly sized structure as it references the calculated size of an fd_set. If you look on Linux, you'll find these are defined to something like: # define __FD_SET(fd, fdsp) \ __asm__ __volatile__ (btsl %1,%0 : =m (__FDS_BITS (fdsp)[__FDELT (fd)]) : r (((int) (fd)) % __NFDBITS) : cc,memory) ie - rather fast opcodes to access an array of longs as a bitmask. David.
Re: 2.5 and delay pools
On Sun, 7 Mar 2004, Adrian Chadd wrote: -#if !HAVE_POLL +#if HAVE_POLL +static char global_readfds[SQUID_MAXFD]; +static char global_writefds[SQUID_MAXFD]; Are these even used in poll? Have you verified that fd_set is the culpit to your problems by using the assert given earlier? Regards Henrik
Re: 2.5 and delay pools
On Thu, Mar 04, 2004, Adrian Chadd wrote: I can't leave that in - pinger/unlinkd use an fd_set. Gah, I'll have to do a little bit of tidying up to deal with the nreadfds and nwritefds in a more efficient fashion. Thanks. Right. Here's what I have. Its a bit evil, but broken fdset manipulation doesn't crash when using poll+delay pools. Comments? adrian :styx:/usr/local/src/squid-2/squid-2.5/src# cat /tmp/diff Index: comm_select.c === RCS file: /squid/squid/src/comm_select.c,v retrieving revision 1.53.2.7 diff -u -r1.53.2.7 comm_select.c --- comm_select.c 11 May 2003 17:30:13 - 1.53.2.7 +++ comm_select.c 8 Mar 2004 06:34:58 - @@ -63,11 +63,14 @@ static void comm_select_dns_incoming(void); #endif -#if !HAVE_POLL +#if HAVE_POLL +static char global_readfds[SQUID_MAXFD]; +static char global_writefds[SQUID_MAXFD]; +#else static struct timeval zero_tv; -#endif static fd_set global_readfds; static fd_set global_writefds; +#endif static int nreadfds; static int nwritefds; @@ -310,7 +313,7 @@ { struct pollfd pfds[SQUID_MAXFD]; #if DELAY_POOLS -fd_set slowfds; +char slowfds[SQUID_MAXFD]; #endif PF *hdl = NULL; int fd; @@ -332,7 +335,7 @@ /* Handle any fs callbacks that need doing */ storeDirCallback(); #if DELAY_POOLS - FD_ZERO(slowfds); + bzero(slowfds, sizeof(slowfds)); #endif if (commCheckICPIncoming) comm_poll_icp_incoming(); @@ -358,7 +361,7 @@ #if DELAY_POOLS case -1: events |= POLLRDNORM; - FD_SET(i, slowfds); + slowfds[i] = 1; break; #endif default: @@ -437,7 +440,7 @@ if (NULL == (hdl = F-read_handler)) (void) 0; #if DELAY_POOLS - else if (FD_ISSET(fd, slowfds)) + else if (slowfds[i]) commAddSlowFd(fd); #endif else { @@ -950,8 +953,13 @@ cachemgrRegister(comm_incoming, comm_incoming() stats, commIncomingStats, 0, 1); +#if HAVE_POLL +bzero(global_readfds, sizeof(global_readfds)); +bzero(global_writefds, sizeof(global_writefds)); +#else FD_ZERO(global_readfds); FD_ZERO(global_writefds); +#endif nreadfds = nwritefds = 0; } @@ -1082,6 +1090,32 @@ statHistDump(f-comm_http_incoming, sentry, statHistIntDumper); } +#if HAVE_POLL +void +commUpdateReadBits(int fd, PF * handler) +{ +if (handler !global_readfds[fd]) { +global_readfds[fd] = 1; + nreadfds++; +} else if (! handler global_readfds[fd]) { +global_readfds[fd] = 0; + nreadfds--; +} +} + +void +commUpdateWriteBits(int fd, PF * handler) +{ +if (handler !global_writefds[fd]) { +global_writefds[fd] = 1; + nwritefds++; +} else if (! handler global_writefds[fd]) { +global_writefds[fd] = 0; + nwritefds--; +} +} + +#else void commUpdateReadBits(int fd, PF * handler) { @@ -1105,6 +1139,7 @@ nwritefds--; } } +#endif /* Called by async-io or diskd to speed up the polling */ void Index: delay_pools.c === RCS file: /squid/squid/src/delay_pools.c,v retrieving revision 1.19.2.8 diff -u -r1.19.2.8 delay_pools.c --- delay_pools.c 18 Jun 2003 23:53:35 - 1.19.2.8 +++ delay_pools.c 8 Mar 2004 06:34:58 - @@ -89,7 +89,7 @@ typedef union _delayPool delayPool; static delayPool *delay_data = NULL; -static fd_set delay_no_delay; +static int delay_no_delay[SQUID_MAXFD]; static time_t delay_pools_last_update = 0; static hash_table *delay_id_ptr_hash = NULL; static long memory_used = 0; @@ -134,7 +134,7 @@ delayPoolsInit(void) { delay_pools_last_update = getCurrentTime(); -FD_ZERO(delay_no_delay); +bzero(delay_no_delay, sizeof(delay_no_delay)); cachemgrRegister(delay, Delay Pool Levels, delayPoolStats, 0, 1); } @@ -283,19 +283,19 @@ void delaySetNoDelay(int fd) { -FD_SET(fd, delay_no_delay); +delay_no_delay[fd] = 1; } void delayClearNoDelay(int fd) { -FD_CLR(fd, delay_no_delay); +delay_no_delay[fd] = 0; } int delayIsNoDelay(int fd) { -return FD_ISSET(fd, delay_no_delay); +return (delay_no_delay[fd] == 1); } static delay_id
Re: 2.5 and delay pools
On Thu, 4 Mar 2004, Adrian Chadd wrote: On Thu, Mar 04, 2004, Adrian Chadd wrote: You need to remove far more fd_set references if this is the problem. There is also seveal delay pool related fd_set usage in comm_poll, and a few other places I think. Ok. I must've missed them. Let me go through the codebase and remove all references to fd_set when you're not actually using select(). Ok, the only use I can see is in the slowfds use. The other use of fd_set is in the select() codepath. Here's the patch I'd like to commit: There is also at least the commUpdateReadBits() functions, probably more. A quick fix to detect them all is to add #if HAVE_POLL #define fd_set INVALID_USE_OF_FD_SET #endif to the end of squid.h. and then eleminate any errors seen on make -C src squid (assuing make clean) Regards Henrik
Re: 2.5 and delay pools
On Thu, Mar 04, 2004, Henrik Nordstrom wrote: Ok. I must've missed them. Let me go through the codebase and remove all references to fd_set when you're not actually using select(). Ok, the only use I can see is in the slowfds use. The other use of fd_set is in the select() codepath. Here's the patch I'd like to commit: There is also at least the commUpdateReadBits() functions, probably more. A quick fix to detect them all is to add #if HAVE_POLL #define fd_set INVALID_USE_OF_FD_SET #endif I can't leave that in - pinger/unlinkd use an fd_set. Gah, I'll have to do a little bit of tidying up to deal with the nreadfds and nwritefds in a more efficient fashion. Thanks. Arian
Re: 2.5 and delay pools
On Wed, 3 Mar 2004, Adrian Chadd wrote: I'm still having issues with squid-2.5 and delay pools. The FDSET stuff is _very_ broken when you're using 1024 fds. More likely the way FD_SETSIZE is extended is broken for your libc headers.. You need to remove far more fd_set references if this is the problem. There is also seveal delay pool related fd_set usage in comm_poll, and a few other places I think. To verify the FD_SETSIZE extension you can use the following assert(sizeof(fd_set) = (SQUID_MAXFD + 7) / 8); if this triggers you are in deep trouble. We probably SHOULD add this somewhere to make sure the problem is quickly detected if so should happen. Regards Henrik
Re: 2.5 and delay pools
On Thu, Mar 04, 2004, Henrik Nordstrom wrote: On Wed, 3 Mar 2004, Adrian Chadd wrote: I'm still having issues with squid-2.5 and delay pools. The FDSET stuff is _very_ broken when you're using 1024 fds. More likely the way FD_SETSIZE is extended is broken for your libc headers.. I agree, but its becoming a pain to work around this. You need to remove far more fd_set references if this is the problem. There is also seveal delay pool related fd_set usage in comm_poll, and a few other places I think. Ok. I must've missed them. Let me go through the codebase and remove all references to fd_set when you're not actually using select(). Adrian
Re: 2.5 and delay pools
On Thu, Mar 04, 2004, Adrian Chadd wrote: You need to remove far more fd_set references if this is the problem. There is also seveal delay pool related fd_set usage in comm_poll, and a few other places I think. Ok. I must've missed them. Let me go through the codebase and remove all references to fd_set when you're not actually using select(). Ok, the only use I can see is in the slowfds use. The other use of fd_set is in the select() codepath. Here's the patch I'd like to commit: ndex: delay_pools.c === RCS file: /squid/squid/src/delay_pools.c,v retrieving revision 1.19.2.8 diff -u -r1.19.2.8 delay_pools.c --- delay_pools.c 18 Jun 2003 23:53:35 - 1.19.2.8 +++ delay_pools.c 4 Mar 2004 07:47:21 - @@ -89,7 +89,7 @@ typedef union _delayPool delayPool; static delayPool *delay_data = NULL; -static fd_set delay_no_delay; +static int delay_no_delay[SQUID_MAXFD]; static time_t delay_pools_last_update = 0; static hash_table *delay_id_ptr_hash = NULL; static long memory_used = 0; @@ -134,7 +134,7 @@ delayPoolsInit(void) { delay_pools_last_update = getCurrentTime(); -FD_ZERO(delay_no_delay); +bzero(delay_no_delay, sizeof(delay_no_delay)); cachemgrRegister(delay, Delay Pool Levels, delayPoolStats, 0, 1); } @@ -283,19 +283,19 @@ void delaySetNoDelay(int fd) { -FD_SET(fd, delay_no_delay); +delay_no_delay[fd] = 1; } void delayClearNoDelay(int fd) { -FD_CLR(fd, delay_no_delay); +delay_no_delay[fd] = 0; } int delayIsNoDelay(int fd) { -return FD_ISSET(fd, delay_no_delay); +return (delay_no_delay[fd] == 1); } static delay_id Index: comm_select.c === RCS file: /squid/squid/src/comm_select.c,v retrieving revision 1.53.2.7 diff -u -r1.53.2.7 comm_select.c --- comm_select.c 11 May 2003 17:30:13 - 1.53.2.7 +++ comm_select.c 4 Mar 2004 07:47:21 - @@ -310,7 +310,7 @@ { struct pollfd pfds[SQUID_MAXFD]; #if DELAY_POOLS -fd_set slowfds; +char slowfds[SQUID_MAXFD]; #endif PF *hdl = NULL; int fd; @@ -332,7 +332,7 @@ /* Handle any fs callbacks that need doing */ storeDirCallback(); #if DELAY_POOLS - FD_ZERO(slowfds); + bzero(slowfds, sizeof(slowfds)); #endif if (commCheckICPIncoming) comm_poll_icp_incoming(); @@ -358,7 +358,7 @@ #if DELAY_POOLS case -1: events |= POLLRDNORM; - FD_SET(i, slowfds); + slowfds[i] = 1; break; #endif default: @@ -437,7 +437,7 @@ if (NULL == (hdl = F-read_handler)) (void) 0; #if DELAY_POOLS - else if (FD_ISSET(fd, slowfds)) + else if (slowfds[i]) commAddSlowFd(fd); #endif else {
Re: 2.5 and delay pools
On Thu, 4 Mar 2004, Adrian Chadd wrote: More likely the way FD_SETSIZE is extended is broken for your libc headers.. I agree, but its becoming a pain to work around this. Please verify the assert I sent. If it triggers we at least know this is the problem. Which libc are you using? You need to remove far more fd_set references if this is the problem. There is also seveal delay pool related fd_set usage in comm_poll, and a few other places I think. Ok. I must've missed them. Let me go through the codebase and remove all references to fd_set when you're not actually using select(). I prefer to not touch the comm loop within the 2.5 cycle.. Regards Henrik