On Fri, May 30, 2008 at 1:59 PM, Akins, Brian <[EMAIL PROTECTED]> wrote: > How we handle purge:
Oh that reminds me, a long time ago, I wrote htcacheadmin - a generic command line utility for administering mod_disk_cache caches. Which is how I /used/ to handle this situation. (I've attached the source, but it's *very* stale). If I find the time next week, I'll get this working again, I think it might be useful to others! -- Colm
/* Copyright 2001-2005 The Apache Software Foundation or its licensors, as * applicable. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * htcacheadmin.c: a utility to allow administrators to track down urls * in their caches, and perform actions on that basis. * * Contributed by Colm MacCarthaigh <colm stdlib.net> * 11 Aug 2005 */ #include "apr.h" #include "apr_lib.h" #include "apr_strings.h" #include "apr_file_io.h" #include "apr_file_info.h" #include "apr_pools.h" #include "apr_md5.h" #include "apr_getopt.h" #include "apr_date.h" #include "apr_uri.h" #if APR_HAVE_UNISTD_H #include <unistd.h> #endif #if APR_HAVE_STDLIB_H #include <stdlib.h> #endif /* mod_disk_cache.c extract start */ #define VARY_FORMAT_VERSION 3 #define DISK_FORMAT_VERSION 4 typedef struct { /* Indicates the format of the header struct stored on-disk. */ apr_uint32_t format; /* The HTTP status code returned for this response. */ int status; /* The size of the entity name that follows. */ apr_size_t name_len; /* The number of times we've cached this entity. */ apr_size_t entity_version; /* Miscellaneous time values. */ apr_time_t date; apr_time_t expire; apr_time_t request_time; apr_time_t response_time; } disk_cache_info_t; /* mod_disk_cache.c extract end */ /* cache_util.c extract started */ static void cache_hash(const char *it, char *val, int ndepth, int nlength) { apr_md5_ctx_t context; unsigned char digest[16]; char tmp[22]; int i, k, d; unsigned int x; static const char enc_table[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@"; apr_md5_init(&context); apr_md5_update(&context, (const unsigned char *) it, strlen(it)); apr_md5_final(digest, &context); /* encode 128 bits as 22 characters, using a modified uuencoding * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters */ for (i = 0, k = 0; i < 15; i += 3) { x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2]; tmp[k++] = enc_table[x >> 18]; tmp[k++] = enc_table[(x >> 12) & 0x3f]; tmp[k++] = enc_table[(x >> 6) & 0x3f]; tmp[k++] = enc_table[x & 0x3f]; } /* one byte left */ x = digest[15]; tmp[k++] = enc_table[x >> 2]; /* use up 6 bits */ tmp[k++] = enc_table[(x << 4) & 0x3f]; /* now split into directory levels */ for (i = k = d = 0; d < ndepth; ++d) { memcpy(&val[i], &tmp[k], nlength); k += nlength; val[i + nlength] = '/'; i += nlength + 1; } memcpy(&val[i], &tmp[k], 22 - k); val[i + 22 - k] = '\0'; } /* cache_util.c extract end */ static apr_file_t *errfile; /* stderr file handle */ /* short program name as called */ static const char *shortname = "htcacheadmin"; /* * usage info */ #define NL APR_EOL_STR static void usage(void) { apr_file_printf(errfile, "%s -- utility for administration of the disk cache." NL "Usage: %s [-gq] [-h HOSTNAME] -c ROOT -d DIRLEVEL -l DIRLENGTH URL" NL " %s [-emHRbD] [-E DATE] HEADERFILE" NL NL "Options:" NL NL " Locate mode:" NL NL " -c Use ROOT as the root directory for the cache. Must be identical" NL " to the CacheRoot setting for Apache." NL NL " -d Use DIRLEVEL as the number of subdirectories mod_disk_cache" NL " creates for cached files. Must be identical to the" NL " CacheDirLevels setting for Apache." NL NL " -l Use DIRLENGTH as the length of the subdirectories" NL " mod_disk_cache creates for cached files. Must be identical to" NL " the CacheDirLength setting for Apache." NL NL " -h Use HOSTNAME as the value of the HTTP \"Host:\" header for the" NL " cached URL." NL NL " -g Use some educated guesses to locate alternative locations for" NL " the cached entity." NL NL " -q Do not output translated headername(s). 0 will be returned if" NL " a cached entity is found, 1 otherwise." NL NL " Operate mode:" NL NL " -e Return 0 if the entity described by HEADERFILE has not expired." NL " Return 1 otherwise." NL NL " -E Change the expires time for HEADERFILE to a time indicated by" NL " the string DATE." NL NL " -m Output mod_disk_cache meta information for HEADERFILE." NL NL " -H Output the cached HTTP response header information for" NL " HEADERFILE" NL NL " -R Output the cached HTTP request header information for" NL " HEADERFILE" NL NL " -b Output the cached body for HEADERFILE." NL NL " -D Delete the cached header and any data files for HEADERFILE." NL, shortname, shortname, shortname ); exit(1); } #undef NL /* * main */ int main(int argc, const char *const argv[]) { apr_status_t status; apr_pool_t *pool, *instance; apr_getopt_t *o; int quiet, delete, showheaders, showbody, showrequest, checkexpires, showmetadata, dirlevels, dirlength, guess; char opt; const char *arg; char *proxyroot, *root, *host, *headerpath, *datapath; char prefix[66]; apr_uri_t inspect_uri; apr_file_t *header, *data, *out; enum { LOCATE, OPERATE } mode = OPERATE; quiet = 0; delete = 0; showheaders = 0; showbody = 0; showrequest = 0; showmetadata = 0; dirlevels = 0; dirlength = 0; checkexpires = 0; guess = 0; proxyroot = NULL; host = NULL; if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) { return 1; } atexit(apr_terminate); if (argc) { shortname = apr_filepath_name_get(argv[0]); } if (apr_pool_create(&pool, NULL) != APR_SUCCESS) { return 1; } apr_file_open_stderr(&errfile, pool); apr_file_open_stderr(&out, pool); apr_getopt_init(&o, pool, argc, argv); while (1) { status = apr_getopt(o, "tghmeHRbDc:d:l:", &opt, &arg); if (status == APR_EOF) { break; } else if (status != APR_SUCCESS) { usage(); } else { switch (opt) { case 'q': if (quiet) { usage(); } mode = LOCATE; quiet = 1; break; case 'g': if (guess) { usage(); } mode = LOCATE; guess = 1; break; case 'h': if (host) { usage(); } mode = LOCATE; host = apr_pstrdup(pool, arg); break; case 'c': if (proxyroot) { usage(); } mode = LOCATE; proxyroot = apr_pstrdup(pool, arg); if (apr_filepath_set(proxyroot, pool) != APR_SUCCESS) { usage(); } break; case 'd': if (dirlevels) { usage(); } mode = LOCATE; dirlevels = apr_atoi64(arg); break; case 'l': if (dirlength) { usage(); } mode = LOCATE; dirlength = apr_atoi64(arg); break; case 'e': if (checkexpires || mode == LOCATE) { usage(); } checkexpires = 1; break; case 'm': if (showmetadata || mode == LOCATE) { usage(); } showmetadata = 1; break; case 'H': if (showheaders || mode == LOCATE) { usage(); } showheaders = 1; break; case 'R': if (showrequest || mode == LOCATE) { usage(); } showrequest = 1; break; case 'b': if (showbody || mode == LOCATE) { usage(); } showbody = 1; break; case 'D': if (delete || mode == LOCATE) { usage(); } delete = 1; break; } /* switch */ } /* else */ } /* while */ /* We should be given one more argument */ if (o->ind != (argc - 1)) { usage(); } /* XXX: We need to do some more argument validation to make sure that * arguments for both modes have not been given */ if (mode == LOCATE) { char *keys[7]; int numkeys = 0; /* In this mode, the spare argument is a url */ if (apr_uri_parse(pool, argv[o->ind], &inspect_uri) != APR_SUCCESS) { usage(); } /* Right now, we only handle HTTP */ if (inspect_uri.scheme && strcasecmp(inspect_uri.scheme, "http")) { usage(); } /* We need at least valid dirlength, dirlevel and root arguments */ if (proxyroot == NULL || dirlevels <= 0 || dirlength <= 0) { usage(); } if (apr_filepath_get(&root, 0, pool) != APR_SUCCESS) { usage(); } /* Generate a key from the url given */ if (!inspect_uri.path) { inspect_uri.path = "/"; } if (guess) { /* Depending on whether the content is locally generated, proxied * , transparently proxied or requested with a port specified the * same url can be cached in different ways. If the user has asked * us to, we try some educated guesses; * * /path/? * hostname/path/? * http://hostname/path/? * hostnamehttp://hostname/path/? * http://hostname:80/path/? * hostnamehttp://hostname:80/path/? * * If no hostname was supplied with the -h argument, extract it * from the parsed uri, and vice versa. If neither was supplied, * only the first key can be checked. */ if(!host) { host = inspect_uri.hostname; } if (!inspect_uri.hostname) { inspect_uri.hostname = host; } keys[numkeys++] = apr_pstrcat(pool, inspect_uri.path, "?", inspect_uri.query, NULL); if (host) { keys[numkeys++] = apr_pstrcat(pool, host, inspect_uri.path, "?", inspect_uri.query, NULL); keys[numkeys++] = apr_pstrcat(pool, apr_uri_unparse(pool, &inspect_uri, APR_URI_UNP_OMITUSERINFO), inspect_uri.query ? NULL : "?", NULL ); keys[numkeys++] = apr_pstrcat(pool, host, apr_uri_unparse(pool, &inspect_uri, APR_URI_UNP_OMITUSERINFO), inspect_uri.query ? NULL : "?", NULL ); /* If the user specified a port, then leave this alone */ if (!inspect_uri.port_str) { inspect_uri.port_str = ":80"; inspect_uri.port = 80; keys[numkeys++] = apr_pstrcat(pool, apr_uri_unparse(pool, &inspect_uri, APR_URI_UNP_OMITUSERINFO), inspect_uri.query ? NULL : "?", NULL ); keys[numkeys++] = apr_pstrcat(pool, host, apr_uri_unparse(pool, &inspect_uri, APR_URI_UNP_OMITUSERINFO), inspect_uri.query ? NULL : "?", NULL ); } } keys[numkeys] = NULL; } else { /* Set the key as exactly as the user supplied it to us, * adding only the "?" if neccessary */ if(host) { keys[0] = apr_pstrcat(pool, host, argv[o->ind], inspect_uri.query ? NULL : "?", NULL ); } else { keys[0] = apr_pstrcat(pool, argv[o->ind], inspect_uri.query ? NULL : "?", NULL ); } keys[1] = NULL; } /* Run through whatever keys we have been asked to check */ for(numkeys = 0; keys[numkeys]; numkeys++) { /* Calculate the hash */ cache_hash(keys[numkeys], prefix, dirlevels, dirlength); printf("key: %s\n", keys[numkeys]); #if 0 /* Open the header file */ status = apr_file_open(&header, headerpath, APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { if (status != APR_ENOENT) { apr_file_printf(errfile, "%s: could not determine if %s " "exists. Possible perrmissions problem." APR_EOL_STR, shortname, headerpath); } } /* Close the header file */ apr_file_close(header); #endif } /* File is not cached */ return 1; } /* We are in operate mode */ headerpath = apr_pstrcat(pool, root, "/", prefix, ".header", NULL); datapath = apr_pstrcat(pool, root, "/", prefix, ".data", NULL); /* Output the filenames */ if (!quiet) { apr_file_printf(out, "header: %s" APR_EOL_STR "data: %s" APR_EOL_STR, headerpath, datapath); } /* Open the header file */ status = apr_file_open(&header, headerpath, APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { if (status != APR_ENOENT) { apr_file_printf(errfile, "%s: could not determine if %s exists. " "Possible perrmissions problem." APR_EOL_STR, shortname, datapath); } /* File is not cached */ return 1; } /* Open the data file */ status = apr_file_open(&data, datapath, APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { apr_file_printf(errfile, "%s: header exists, but data file does not." APR_EOL_STR, shortname); if (status != APR_ENOENT) { apr_file_printf(errfile, "%s: could not determine if %s exists. " "Possible perrmissions problem." APR_EOL_STR, shortname, datapath); } /* File is not cached */ return 1; } /* Should the header file be investigated? */ if (showmetadata || showheaders || showrequest || checkexpires) { apr_uint32_t format; apr_size_t len = sizeof(format); apr_off_t offset = 0; disk_cache_info_t disk_info; char *entity; /* Check for the format of the header file */ if (apr_file_read_full(header, &format, len, &len) != APR_SUCCESS) { apr_file_printf(errfile, "%s: error reading from %s" APR_EOL_STR, shortname, headerpath); return 1; } /* Rewind to the start of the file */ apr_file_seek(header, APR_SET, &offset); if (format == DISK_FORMAT_VERSION) { len = sizeof(disk_cache_info_t); /* Read in the meta information */ if (apr_file_read_full(header, &disk_info, len, &len) != APR_SUCCESS) { apr_file_printf(errfile, "%s: error reading from %s" APR_EOL_STR, shortname, headerpath); return 1; } } else if (format == VARY_FORMAT_VERSION) { apr_file_printf(errfile, "%s: header file in unsupported vary format" APR_EOL_STR, shortname); return 0; } else { apr_file_printf(errfile, "%s: header file in unrecognised format %d" APR_EOL_STR, shortname, format); return 1; } /* Read in the entity name */ len = disk_info.name_len + 1; if (!(entity = (char *)apr_pcalloc(pool, len))) { apr_file_printf(errfile, "%s: Cannot assign memory" APR_EOL_STR, shortname); return 1; } if (apr_file_read_full(header, entity, len - 1, &len) != APR_SUCCESS) { apr_file_printf(errfile, "%s: error reading from %s" APR_EOL_STR, shortname, headerpath); return 1; } /* At this point, we now have the meta-information */ if (showmetadata) { char sdate[APR_RFC822_DATE_LEN], sexpire[APR_RFC822_DATE_LEN], sreq[APR_RFC822_DATE_LEN], sresp[APR_RFC822_DATE_LEN]; apr_rfc822_date(sdate, disk_info.date); apr_rfc822_date(sexpire, disk_info.expire); apr_rfc822_date(sreq, disk_info.request_time); apr_rfc822_date(sresp, disk_info.response_time); apr_file_printf(out, "Entity Name: %s" APR_EOL_STR, entity); apr_file_printf(out, "HTTP Status code: %d" APR_EOL_STR, disk_info.status); apr_file_printf(out, "Entity version: %lu" APR_EOL_STR, disk_info.entity_version); apr_file_printf(out, "Date: %s" APR_EOL_STR, sdate); apr_file_printf(out, "Expire: %s" APR_EOL_STR, sexpire); apr_file_printf(out, "Request time: %s" APR_EOL_STR, sreq); apr_file_printf(out, "Response time: %s" APR_EOL_STR, sresp); } /* Output the cached headers */ if (showheaders) { apr_table_t * req_hdrs = apr_table_make(pool, 20); } /* Output the cached request headers */ if (showrequest) { apr_table_t * resp_hdrs = apr_table_make(pool, 20); } } /* If the user wants to see the body, write it out */ if (showbody) { char buffer[4096]; apr_size_t len = sizeof(buffer); /* Output the body */ while ((status = apr_file_read(data, buffer, &len)) == APR_SUCCESS) { apr_file_write(out, buffer, &len); } if (status != APR_EOF) { apr_file_printf(errfile, "%s: error reading all of %s" APR_EOL_STR, shortname, datapath); return 1; } } /* Close the files */ apr_file_close(header); apr_file_close(data); /* Try and delete the files */ if (delete) { if (apr_file_remove(headerpath, pool) != APR_SUCCESS) { apr_file_printf(errfile, "%s: error deleting %s", shortname, headerpath); return 0; } else if (apr_file_remove(datapath, pool) != APR_SUCCESS) { apr_file_printf(errfile, "%s: error deleting %s", shortname, datapath); } } return 0; }