On Thu, Aug 11, 2005 at 11:48:21PM -0700, Justin Erenkrantz wrote: > Right. I think Paul mentioned that we also need to fix up htcacheclean to > remove the .vary subdirectories as well. -- justin
Next time someone is commiting to htcacheclean; it's define's for VARY_FORMAT_VERSION and DISK_FORMAT_VERSION are wrong. They should be 3, and 4 respectively - as per mod_disk_cache.c. Right now they are 1 and 2. I'm writing something else now, to help me debug cache edge cases, I'm still seeing some misbehaviour that I need to track down, but I'm having trouble re-creating vary caching. If the vary content is locally generated, it is saved as per content location. And right now, I can't get it cache proxied vary content at all. Though it doesn't help that I've hamfisting the mod_proxy code in trunk to even compile. Does anyone have a remote URI on some webserver somewhere that reliably returns a Vary response that is cacheable? That something else; htcacheadmin Now that I'm running with an expanded cache, and trying to debug things while I'm at it, I keep coming accross needing to do the same tasks, but "find ./ -type f | xargs grep" just isn't a reliable way of tracking down cache entities. So I've written htcacheadmin. It's *extremely* useful for debugging, which is why I'm posting it now - it's helped me a lot. It's useful for administrators too, but I'm not sure if it's useful enough Vs confusing enough for support/ Anyway, right now it's got most of its functionality working, but it only works for locally generated non-vary content. I'm going to add a loop to allow it to work through the various key permutations for proxy support, eg. ftp.heanet.ie/pub/heanet/100.txt? ftp.heanet.iehttp://ftp.heanet.ie/pub/heanet/100.txt? ftp.heanet.iehttp://ftp.heanet.ie:80/pub/heanet/100.txt? to track down likely cache matches. Though I wonder is there a hope of convinving anyone to change the cache key to something utterly determinstic like; http://ftp.heanet.ie:80/pub/heanet/100.txt? including the scheme will help me when I go to make caching work for ftp, and the rest just helps make it reliably deterministic. Anyway, other features I want to add including allowing the administrator to extend the expiry of particular entities, and I think I might split it's functionality so that it has one mode which locates (and only locates) cache entities, and then another mode which does the information retrieval taking a .header file as an argument (more useful to admins since they can guarantee the output is for one instance only). See attachment, feedback appreciated. -- Colm MacCárthaigh Public Key: [EMAIL PROTECTED]
/* Copyright 2001-2005 The Apache Software Foundation or its licensors, as * applicable. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * htcacheadmin.c: a utility to allow administrators to track down urls * in their caches, and perform actions on that basis. * * Contributed by Colm MacCarthaigh <colm stdlib.net> * 11 Aug 2005 */ #include "apr.h" #include "apr_lib.h" #include "apr_strings.h" #include "apr_file_io.h" #include "apr_file_info.h" #include "apr_pools.h" #include "apr_md5.h" #include "apr_getopt.h" #include "apr_date.h" #include "apr_uri.h" #if APR_HAVE_UNISTD_H #include <unistd.h> #endif #if APR_HAVE_STDLIB_H #include <stdlib.h> #endif /* mod_disk_cache.c extract start */ #define VARY_FORMAT_VERSION 3 #define DISK_FORMAT_VERSION 4 typedef struct { /* Indicates the format of the header struct stored on-disk. */ apr_uint32_t format; /* The HTTP status code returned for this response. */ int status; /* The size of the entity name that follows. */ apr_size_t name_len; /* The number of times we've cached this entity. */ apr_size_t entity_version; /* Miscellaneous time values. */ apr_time_t date; apr_time_t expire; apr_time_t request_time; apr_time_t response_time; } disk_cache_info_t; /* mod_disk_cache.c extract end */ /* cache_util.c extract started */ static void cache_hash(const char *it, char *val, int ndepth, int nlength) { apr_md5_ctx_t context; unsigned char digest[16]; char tmp[22]; int i, k, d; unsigned int x; static const char enc_table[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@"; apr_md5_init(&context); apr_md5_update(&context, (const unsigned char *) it, strlen(it)); apr_md5_final(digest, &context); /* encode 128 bits as 22 characters, using a modified uuencoding * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters */ for (i = 0, k = 0; i < 15; i += 3) { x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2]; tmp[k++] = enc_table[x >> 18]; tmp[k++] = enc_table[(x >> 12) & 0x3f]; tmp[k++] = enc_table[(x >> 6) & 0x3f]; tmp[k++] = enc_table[x & 0x3f]; } /* one byte left */ x = digest[15]; tmp[k++] = enc_table[x >> 2]; /* use up 6 bits */ tmp[k++] = enc_table[(x << 4) & 0x3f]; /* now split into directory levels */ for (i = k = d = 0; d < ndepth; ++d) { memcpy(&val[i], &tmp[k], nlength); k += nlength; val[i + nlength] = '/'; i += nlength + 1; } memcpy(&val[i], &tmp[k], 22 - k); val[i + 22 - k] = '\0'; } /* cache_util.c extract end */ static apr_file_t *errfile; /* stderr file handle */ /* short program name as called */ static const char *shortname = "htcacheadmin"; /* * usage info */ #define NL APR_EOL_STR static void usage(void) { apr_file_printf(errfile, "%s -- utility for administration of the disk cache." NL "Usage: %s [-tmeHRbD] -c ROOT -d DIRLEVEL -l DIRLENGTH url" NL NL "Options:" NL " -c Use ROOT as the root directory for the cache. Must be identical" NL " to the CacheRoot setting for Apache." NL NL " -d Use DIRLEVEL as the number of subdirectories mod_disk_cache" NL " creates for cached files. Must be identical to the" NL " CacheDirLevels setting for Apache." NL NL " -l Use DIRLENGTH as the length of the subdirectories" NL " mod_disk_cache creates for cached files. Must be identical to" NL " the CacheDirLength setting for Apache." NL NL " -t Translate the url into header and data filenames, and output" NL " those filenames, whether the content is cached or not." NL NL " -e Return 0 if url is cached, and has not expired. Return 1" NL " otherwise. Default behaviour is to return 0 if url is cached." NL NL " -m Output mod_disk_cache meta information for the URL." NL NL " -H Output the cached HTTP response header information for the URL." NL NL " -R Output the cached HTTP request header information for the URL." NL NL " -b Output the cached body." NL NL " -D Delete the cached header and data file for specified url." NL, shortname, shortname ); exit(1); } #undef NL /* * main */ int main(int argc, const char *const argv[]) { apr_status_t status; apr_pool_t *pool, *instance; apr_getopt_t *o; int translate, delete, showheaders, showbody, showrequest, checkexpires, showmetadata, dirlevels, dirlength, baselen; char opt; const char *arg; char *proxyroot, *root, *key, *headerpath, *datapath; char prefix[66]; apr_uri_t inspect_uri; apr_file_t *header, *data, *out; translate = 0; delete = 0; showheaders = 0; showbody = 0; showrequest = 0; showmetadata = 0; dirlevels = 0; dirlength = 0; checkexpires = 0; proxyroot = NULL; if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) { return 1; } atexit(apr_terminate); if (argc) { shortname = apr_filepath_name_get(argv[0]); } if (apr_pool_create(&pool, NULL) != APR_SUCCESS) { return 1; } apr_file_open_stderr(&errfile, pool); apr_file_open_stderr(&out, pool); apr_getopt_init(&o, pool, argc, argv); while (1) { status = apr_getopt(o, "tmeHRbDc:d:l:", &opt, &arg); if (status == APR_EOF) { break; } else if (status != APR_SUCCESS) { usage(); } else { switch (opt) { case 't': if (translate) { usage(); } translate = 1; break; case 'e': if (checkexpires) { usage(); } checkexpires = 1; break; case 'm': if (showmetadata) { usage(); } showmetadata = 1; break; case 'H': if (showheaders) { usage(); } showheaders = 1; break; case 'R': if (showrequest) { usage(); } showrequest = 1; break; case 'b': if (showbody) { usage(); } showbody = 1; break; case 'D': if (delete) { usage(); } delete = 1; break; case 'c': if (proxyroot) { usage(); } proxyroot = apr_pstrdup(pool, arg); if (apr_filepath_set(proxyroot, pool) != APR_SUCCESS) { usage(); } break; case 'd': if (dirlevels) { usage(); } dirlevels = apr_atoi64(arg); break; case 'l': if (dirlength) { usage(); } dirlength = apr_atoi64(arg); break; } /* switch */ } /* else */ } /* while */ /* We should be given a url as an argument */ if (o->ind != (argc - 1)) { usage(); } if (apr_uri_parse(pool, argv[o->ind], &inspect_uri) != APR_SUCCESS) { usage(); } /* Right now, we only handle HTTP */ if (inspect_uri.scheme && strcasecmp(inspect_uri.scheme, "http")) { usage(); } /* We need at least valid dirlength, dirlevel and root arguments */ if (proxyroot == NULL || dirlevels <= 0 || dirlength <= 0) { usage(); } if (apr_filepath_get(&root, 0, pool) != APR_SUCCESS) { usage(); } baselen = strlen(root); /* Generate a key from the url given */ if (!inspect_uri.path) { inspect_uri.path = "/"; } if (inspect_uri.hostname) { key = apr_pstrcat(pool, inspect_uri.hostname, inspect_uri.path, "?", inspect_uri.query, NULL); } else { key = apr_pstrcat(pool, inspect_uri.path, "?", inspect_uri.query, NULL); } /* Calculate the hash */ cache_hash(key, prefix, dirlevels, dirlength); headerpath = apr_pstrcat(pool, root, "/", prefix, ".header", NULL); datapath = apr_pstrcat(pool, root, "/", prefix, ".data", NULL); /* Output the filenames */ if (translate) { apr_file_printf(out, "header: %s" APR_EOL_STR "data: %s" APR_EOL_STR, headerpath, datapath); } /* Open the header file */ status = apr_file_open(&header, headerpath, APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { if (status != APR_ENOENT) { apr_file_printf(errfile, "%s: could not determine if %s exists. " "Possible perrmissions problem." APR_EOL_STR, shortname, datapath); } /* File is not cached */ return 1; } /* Open the data file */ status = apr_file_open(&data, datapath, APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT, pool); if (status != APR_SUCCESS) { apr_file_printf(errfile, "%s: header exists, but data file does not." APR_EOL_STR, shortname); if (status != APR_ENOENT) { apr_file_printf(errfile, "%s: could not determine if %s exists. " "Possible perrmissions problem." APR_EOL_STR, shortname, datapath); } /* File is not cached */ return 1; } /* Should the header file be investigated? */ if (showmetadata || showheaders || showrequest || checkexpires) { apr_uint32_t format; apr_size_t len = sizeof(format); apr_off_t offset = 0; disk_cache_info_t disk_info; char *entity; /* Check for the format of the header file */ if (apr_file_read_full(header, &format, len, &len) != APR_SUCCESS) { apr_file_printf(errfile, "%s: error reading from %s" APR_EOL_STR, shortname, headerpath); return 1; } /* Rewind to the start of the file */ apr_file_seek(header, APR_SET, &offset); if (format == DISK_FORMAT_VERSION) { len = sizeof(disk_cache_info_t); /* Read in the meta information */ if (apr_file_read_full(header, &disk_info, len, &len) != APR_SUCCESS) { apr_file_printf(errfile, "%s: error reading from %s" APR_EOL_STR, shortname, headerpath); return 1; } } else if (format == VARY_FORMAT_VERSION) { apr_file_printf(errfile, "%s: header file in unsupported vary format" APR_EOL_STR, shortname); return 0; } else { apr_file_printf(errfile, "%s: header file in unrecognised format %d" APR_EOL_STR, shortname, format); return 1; } /* Read in the entity name */ len = disk_info.name_len + 1; if (!(entity = (char *)apr_pcalloc(pool, len))) { apr_file_printf(errfile, "%s: Cannot assign memory" APR_EOL_STR, shortname); return 1; } if (apr_file_read_full(header, entity, len - 1, &len) != APR_SUCCESS) { apr_file_printf(errfile, "%s: error reading from %s" APR_EOL_STR, shortname, headerpath); return 1; } /* At this point, we now have the meta-information */ if (showmetadata) { char sdate[APR_RFC822_DATE_LEN], sexpire[APR_RFC822_DATE_LEN], sreq[APR_RFC822_DATE_LEN], sresp[APR_RFC822_DATE_LEN]; apr_rfc822_date(sdate, disk_info.date); apr_rfc822_date(sexpire, disk_info.expire); apr_rfc822_date(sreq, disk_info.request_time); apr_rfc822_date(sresp, disk_info.response_time); apr_file_printf(out, "Entity Name: %s" APR_EOL_STR, entity); apr_file_printf(out, "HTTP Status code: %d" APR_EOL_STR, disk_info.status); apr_file_printf(out, "Entity version: %lu" APR_EOL_STR, disk_info.entity_version); apr_file_printf(out, "Date: %s" APR_EOL_STR, sdate); apr_file_printf(out, "Expire: %s" APR_EOL_STR, sexpire); apr_file_printf(out, "Request time: %s" APR_EOL_STR, sreq); apr_file_printf(out, "Response time: %s" APR_EOL_STR, sresp); } /* Output the cached headers */ if (showheaders) { apr_table_t * req_hdrs = apr_table_make(pool, 20); } /* Output the cached request headers */ if (showrequest) { apr_table_t * resp_hdrs = apr_table_make(pool, 20); } } /* If the user wants to see the body, write it out */ if (showbody) { char buffer[4096]; apr_size_t len = sizeof(buffer); /* Output the body */ while ((status = apr_file_read(data, buffer, &len)) == APR_SUCCESS) { apr_file_write(out, buffer, &len); } if (status != APR_EOF) { apr_file_printf(errfile, "%s: error reading all of %s" APR_EOL_STR, shortname, datapath); return 1; } } /* Close the files */ apr_file_close(header); apr_file_close(data); /* Try and delete the files */ if (delete) { if (apr_file_remove(headerpath, pool) != APR_SUCCESS) { apr_file_printf(errfile, "%s: error deleting %s", shortname, headerpath); return 0; } else if (apr_file_remove(datapath, pool) != APR_SUCCESS) { apr_file_printf(errfile, "%s: error deleting %s", shortname, datapath); } } return 0; }