martin 98/06/02 05:50:47
Modified: src CHANGES src/modules/proxy mod_proxy.h proxy_cache.c Log: Do Proxy Garbage Collection in background & other proxy garbage fixes Revision Changes Path 1.881 +5 -0 apache-1.3/src/CHANGES Index: CHANGES =================================================================== RCS file: /home/cvs/apache-1.3/src/CHANGES,v retrieving revision 1.880 retrieving revision 1.881 diff -u -u -r1.880 -r1.881 --- CHANGES 1998/06/02 08:01:49 1.880 +++ CHANGES 1998/06/02 12:50:44 1.881 @@ -1,5 +1,10 @@ Changes with Apache 1.3.1 + *) Proxy Cache Fixes: account for directory sizes, fork off garbage collection + to continue in background, use predefined types (off_t, size_t, time_t), + log the current cache usage percentage at LogLevel debug + [Martin Kraemer, based on discussion between Dean Gaudet & Dirk vanGulik] + Changes with Apache 1.3.0 *) Using a type map file as a custom error document was not possible. 1.35 +6 -6 apache-1.3/src/modules/proxy/mod_proxy.h Index: mod_proxy.h =================================================================== RCS file: /home/cvs/apache-1.3/src/modules/proxy/mod_proxy.h,v retrieving revision 1.34 retrieving revision 1.35 diff -u -u -r1.34 -r1.35 --- mod_proxy.h 1998/05/27 22:56:04 1.34 +++ mod_proxy.h 1998/06/02 12:50:46 1.35 @@ -188,11 +188,11 @@ /* static information about the local cache */ struct cache_conf { const char *root; /* the location of the cache directory */ - int space; /* Maximum cache size (in 1024 bytes) */ - int maxexpire; /* Maximum time to keep cached files in secs */ - int defaultexpire; /* default time to keep cached file in secs */ + off_t space; /* Maximum cache size (in 1024 bytes) */ + time_t maxexpire; /* Maximum time to keep cached files in secs */ + time_t defaultexpire; /* default time to keep cached file in secs */ double lmfactor; /* factor for estimating expires date */ - int gcinterval; /* garbage collection interval, in seconds */ + time_t gcinterval; /* garbage collection interval, in seconds */ int dirlevels; /* Number of levels of subdirectories */ int dirlength; /* Length of subdirectory names */ }; @@ -207,7 +207,7 @@ array_header *nocaches; char *domain; /* domain name to use in absence of a domain name in the request */ int req; /* true if proxy requests are enabled */ - int recv_buffer_size; + size_t recv_buffer_size; } proxy_server_conf; struct hdr_entry { @@ -229,7 +229,7 @@ time_t lmod; /* last-modified date of cached entity */ time_t date; /* the date the cached file was last touched */ int version; /* update count of the file */ - unsigned int len; /* content length */ + off_t len; /* content length */ char *protocol; /* Protocol, and major/minor number, e.g. HTTP/1.1 */ int status; /* the status of the cached file */ char *resp_line; /* the whole status like (protocol, code + message) */ 1.42 +182 -55 apache-1.3/src/modules/proxy/proxy_cache.c Index: proxy_cache.c =================================================================== RCS file: /home/cvs/apache-1.3/src/modules/proxy/proxy_cache.c,v retrieving revision 1.41 retrieving revision 1.42 diff -u -u -r1.41 -r1.42 --- proxy_cache.c 1998/05/29 18:20:58 1.41 +++ proxy_cache.c 1998/06/02 12:50:46 1.42 @@ -71,33 +71,37 @@ DEF_Explain -#ifndef abs -#define abs(c) ((c) >= 0 ? (c) : -(c)) -#endif - struct gc_ent { unsigned long int len; time_t expire; char file[HASH_LEN + 1]; - }; - -static int gcdiff(const void *ap, const void *bp) -{ - const struct gc_ent *a = *(const struct gc_ent * const *) ap; - const struct gc_ent *b = *(const struct gc_ent * const *) bp; - - if (a->expire > b->expire) - return 1; - else if (a->expire < b->expire) - return -1; - else - return 0; -} -static int curbytes, cachesize, every; -static unsigned long int curblocks; -static time_t garbage_now, garbage_expire; +/* Poor man's 61 bit arithmetic */ +typedef struct { + long lower; /* lower 30 bits of result */ + long upper; /* upper 31 bits of result */ +} long61_t; + +/* FIXME: The block size can be different on a `per file system' base. + * This would make automatic detection highly OS specific. + * In the GNU fileutils code for du(1), you can see how complicated it can + * become to detect the block size. And, with BSD-4.x fragments, it + * it even more difficult to get precise results. + * As a compromise (and to improve on the incorrect counting of cache + * size on byte level, omitting directory sizes entirely, which was + * used up to apache-1.3b7) we're rounding to multiples of 512 here. + * Your file system may be using larger blocks (I certainly hope so!) + * but it will hardly use smaller blocks. + * (So this approximation is still closer to reality than the old behavior). + * The best solution would be automatic detection, the next best solution + * IMHO is a sensible default and the possibility to override it. + */ + +#define ROUNDUP2BLOCKS(_bytes) (((_bytes)+block_size-1) & ~(block_size-1)) +static long block_size = 512; /* this must be a power of 2 */ +static long61_t curbytes, cachesize; +static time_t every, garbage_now, garbage_expire; static char *filename; static mutex *garbage_mutex = NULL; @@ -114,6 +118,10 @@ static int sub_garbage_coll(request_rec *r, array_header *files, const char *cachedir, const char *cachesubdir); static void help_proxy_garbage_coll(request_rec *r); +#if !defined(WIN32) && !defined(MPE) && !defined(__EMX__) +static void detached_proxy_garbage_coll(request_rec *r); +#endif + void ap_proxy_garbage_coll(request_rec *r) { @@ -128,7 +136,13 @@ inside = 1; (void) ap_release_mutex(garbage_mutex); + ap_block_alarms(); /* avoid SIGALRM on big cache cleanup */ +#if !defined(WIN32) && !defined(MPE) && !defined(__EMX__) + detached_proxy_garbage_coll(r); +#else help_proxy_garbage_coll(r); +#endif + ap_unblock_alarms(); (void) ap_acquire_mutex(garbage_mutex); inside = 0; @@ -136,6 +150,116 @@ } +static void +add_long61 (long61_t *accu, long val) +{ + /* Add in lower 30 bits */ + accu->lower += (val & 0x3FFFFFFFL); + /* add in upper bits, and carry */ + accu->upper += (val >> 30) + ((accu->lower & ~0x3FFFFFFFL) != 0L); + /* Clear carry */ + accu->lower &= 0x3FFFFFFFL; +} + +static void +sub_long61 (long61_t *accu, long val) +{ + int carry = (val & 0x3FFFFFFFL) > accu->lower; + /* Subtract lower 30 bits */ + accu->lower = accu->lower - (val & 0x3FFFFFFFL) + ((carry) ? 0x40000000 : 0); + /* add in upper bits, and carry */ + accu->upper -= (val >> 30) + carry; +} + +/* Compare two long61's: + * return <0 when left < right + * return 0 when left == right + * return >0 when left > right + */ +static long +cmp_long61 (long61_t *left, long61_t *right) +{ + return (left->upper == right->upper) ? (left->lower - right->lower) + : (left->upper - right->upper); +} + +/* Compare two gc_ent's, sort them by expiration date */ +static int gcdiff(const void *ap, const void *bp) +{ + const struct gc_ent *a = (const struct gc_ent * const) ap; + const struct gc_ent *b = (const struct gc_ent * const) bp; + + if (a->expire > b->expire) + return 1; + else if (a->expire < b->expire) + return -1; + else + return 0; +} + +#if !defined(WIN32) && !defined(MPE) && !defined(__EMX__) +static void detached_proxy_garbage_coll(request_rec *r) +{ + pid_t pid; + int status; + pid_t pgrp; + + switch (pid = fork()) { + case -1: + ap_log_error(APLOG_MARK, APLOG_ERR, r->server, + "proxy: fork() for cache cleanup failed"); + return; + + case 0: /* Child */ + + /* close all sorts of things, including the socket fd */ + ap_cleanup_for_exec(); + + /* Fork twice to disassociate from the child */ + switch (pid = fork()) { + case -1: + ap_log_error(APLOG_MARK, APLOG_ERR, r->server, + "proxy: fork(2nd) for cache cleanup failed"); + exit(1); + + case 0: /* Child */ + /* The setpgrp() stuff was snarfed from http_main.c */ +#ifndef NO_SETSID + if ((pgrp = setsid()) == -1) { + perror("setsid"); + fprintf(stderr, "httpd: setsid failed\n"); + exit(1); + } +#elif defined(NEXT) || defined(NEWSOS) + if (setpgrp(0, getpid()) == -1 || (pgrp = getpgrp(0)) == -1) { + perror("setpgrp"); + fprintf(stderr, "httpd: setpgrp or getpgrp failed\n"); + exit(1); + } +#else + if ((pgrp = setpgrp(getpid(), 0)) == -1) { + perror("setpgrp"); + fprintf(stderr, "httpd: setpgrp failed\n"); + exit(1); + } +#endif + help_proxy_garbage_coll(r); + exit(0); + + default: /* Father */ + /* After grandson has been forked off, */ + /* there's nothing else to do. */ + exit(0); + } + default: + /* Wait until grandson has been forked off */ + /* (without wait we'd leave a zombie) */ + waitpid(pid, &status, 0); + return; + } +} +#endif /* ndef WIN32 */ + static void help_proxy_garbage_coll(request_rec *r) { const char *cachedir; @@ -145,17 +269,24 @@ const struct cache_conf *conf = &pconf->cache; array_header *files; struct stat buf; - struct gc_ent *fent, **elts; + struct gc_ent *fent; int i, timefd; - static time_t lastcheck = BAD_DATE; /* static data!!! */ + static time_t lastcheck = BAD_DATE; /* static (per-process) data!!! */ cachedir = conf->root; - cachesize = conf->space; + /* configured size is given in kB. Make it bytes, convert to long61_t: */ + cachesize.lower = cachesize.upper = 0; + add_long61(&cachesize, conf->space << 10); every = conf->gcinterval; if (cachedir == NULL || every == -1) return; garbage_now = time(NULL); + /* Usually, the modification time of <cachedir>/.time can only increase. + * Thus, even with several child processes having their own copy of + * lastcheck, if time(NULL) still < lastcheck then it's not time + * for GC yet. + */ if (garbage_now != -1 && lastcheck != BAD_DATE && garbage_now < lastcheck + every) return; @@ -176,7 +307,7 @@ ap_log_error(APLOG_MARK, APLOG_ERR, r->server, "proxy: creat(%s)", filename); else - lastcheck = abs(garbage_now); /* someone else got in there */ + lastcheck = garbage_now; /* someone else got in there */ ap_unblock_alarms(); return; } @@ -192,22 +323,24 @@ ap_log_error(APLOG_MARK, APLOG_ERR, r->server, "proxy: utimes(%s)", filename); } - files = ap_make_array(r->pool, 100, sizeof(struct gc_ent *)); - curblocks = 0; - curbytes = 0; + files = ap_make_array(r->pool, 100, sizeof(struct gc_ent)); + curbytes.upper = curbytes.lower = 0L; sub_garbage_coll(r, files, cachedir, "/"); - if (curblocks < cachesize || curblocks + curbytes <= cachesize) { + if (cmp_long61(&curbytes, &cachesize) < 0L) { + ap_log_error(APLOG_MARK, APLOG_DEBUG|APLOG_NOERRNO, r->server, + "proxy GC: Cache is %ld%% full (nothing deleted)", + ((curbytes.upper<<20)|(curbytes.lower>>10))*100/conf->space); ap_unblock_alarms(); return; } - qsort(files->elts, files->nelts, sizeof(struct gc_ent *), gcdiff); + /* sort the files we found by expiration date */ + qsort(files->elts, files->nelts, sizeof(struct gc_ent), gcdiff); - elts = (struct gc_ent **) files->elts; for (i = 0; i < files->nelts; i++) { - fent = elts[i]; + fent = &((struct gc_ent *) files->elts)[i]; sprintf(filename, "%s%s", cachedir, fent->file); Explain3("GC Unlinking %s (expiry %ld, garbage_now %ld)", filename, fent->expire, garbage_now); #if TESTING @@ -221,16 +354,15 @@ else #endif { - curblocks -= fent->len >> 10; - curbytes -= fent->len & 0x3FF; - if (curbytes < 0) { - curbytes += 1024; - curblocks--; - } - if (curblocks < cachesize || curblocks + curbytes <= cachesize) + sub_long61(&curbytes, ROUNDUP2BLOCKS(fent->len)); + if (cmp_long61(&curbytes, &cachesize) < 0) break; } } + + ap_log_error(APLOG_MARK, APLOG_DEBUG|APLOG_NOERRNO, r->server, + "proxy GC: Cache is %ld%% full (%d deleted)", + ((curbytes.upper<<20)|(curbytes.lower>>10))*100/conf->space, i); ap_unblock_alarms(); } @@ -342,6 +474,9 @@ rmdir(newcachedir); #endif --nfiles; + } else { + /* Directory is not empty. Account for its size: */ + add_long61(&curbytes, ROUNDUP2BLOCKS(buf.st_size)); } continue; } @@ -378,22 +513,14 @@ * file. * */ - /* FIXME: We should make the array an array of gc_ents, not gc_ent *s - */ - fent = ap_palloc(r->pool, sizeof(struct gc_ent)); + fent = (struct gc_ent *) ap_push_array(files); fent->len = buf.st_size; fent->expire = garbage_expire; strcpy(fent->file, cachesubdir); strcat(fent->file, ent->d_name); - *(struct gc_ent **) ap_push_array(files) = fent; /* accumulate in blocks, to cope with directories > 4Gb */ - curblocks += buf.st_size >> 10; /* Kbytes */ - curbytes += buf.st_size & 0x3FF; - if (curbytes >= 1024) { - curbytes -= 1024; - curblocks++; - } + add_long61(&curbytes, ROUNDUP2BLOCKS(buf.st_size)); } closedir(dir); @@ -467,7 +594,7 @@ q = ap_proxy_get_header(c->hdrs, "Content-Length"); if (q == NULL) { strp = ap_palloc(p, 15); - ap_snprintf(strp, 15, "%u", c->len); + ap_snprintf(strp, 15, "%lu", c->len); ap_proxy_add_header(c->hdrs, "Content-Length", strp, HDR_REP); } } @@ -590,7 +717,7 @@ } ap_pclosef(r->pool, cachefp->fd); Explain0("Use local copy, cached file hasn't changed"); - return USE_LOCAL_COPY; + return HTTP_NOT_MODIFIED; } /* Ok, has been modified */ @@ -735,7 +862,7 @@ /* no date header! */ /* add one; N.B. use the time _now_ rather than when we were checking the cache */ - date = abs(now); + date = now; p = ap_gm_timestr_822(r->pool, now); dates = ap_proxy_add_header(resp_hdrs, "Date", p, HDR_REP); Explain0("Added date header"); @@ -775,10 +902,10 @@ double maxex = conf->cache.maxexpire; if (x > maxex) x = maxex; - expc = abs(now) + (int) x; + expc = now + (int) x; } else - expc = abs(now) + conf->cache.defaultexpire; + expc = now + conf->cache.defaultexpire; Explain1("Expiry date calculated %ld", expc); } @@ -820,7 +947,7 @@ ap_pclosef(r->pool, c->fp->fd); Explain0("Remote document not modified, use local copy"); /* CHECKME: Is this right? Shouldn't we check IMS again here? */ - return USE_LOCAL_COPY; + return HTTP_NOT_MODIFIED; } else { /* return the whole document */