On Thu, Aug 11, 2005 at 11:48:21PM -0700, Justin Erenkrantz wrote:
> Right. I think Paul mentioned that we also need to fix up htcacheclean to
> remove the .vary subdirectories as well. -- justin
Next time someone is commiting to htcacheclean; it's define's for
VARY_FORMAT_VERSION and DISK_FORMAT_VERSION are wrong. They should be 3,
and 4 respectively - as per mod_disk_cache.c. Right now they are 1 and
2.
I'm writing something else now, to help me debug cache edge cases, I'm
still seeing some misbehaviour that I need to track down, but I'm having
trouble re-creating vary caching.
If the vary content is locally generated, it is saved as per content
location. And right now, I can't get it cache proxied vary content at
all. Though it doesn't help that I've hamfisting the mod_proxy code in
trunk to even compile.
Does anyone have a remote URI on some webserver somewhere that reliably
returns a Vary response that is cacheable?
That something else; htcacheadmin
Now that I'm running with an expanded cache, and trying to debug things
while I'm at it, I keep coming accross needing to do the same tasks, but
"find ./ -type f | xargs grep" just isn't a reliable way of tracking
down cache entities. So I've written htcacheadmin.
It's *extremely* useful for debugging, which is why I'm posting it now -
it's helped me a lot. It's useful for administrators too, but I'm not
sure if it's useful enough Vs confusing enough for support/
Anyway, right now it's got most of its functionality working, but it
only works for locally generated non-vary content. I'm going to add a
loop to allow it to work through the various key permutations for proxy
support, eg.
ftp.heanet.ie/pub/heanet/100.txt?
ftp.heanet.iehttp://ftp.heanet.ie/pub/heanet/100.txt?
ftp.heanet.iehttp://ftp.heanet.ie:80/pub/heanet/100.txt?
to track down likely cache matches. Though I wonder is there a hope of
convinving anyone to change the cache key to something utterly
determinstic like;
http://ftp.heanet.ie:80/pub/heanet/100.txt?
including the scheme will help me when I go to make caching work for
ftp, and the rest just helps make it reliably deterministic.
Anyway, other features I want to add including allowing the
administrator to extend the expiry of particular entities, and I think I
might split it's functionality so that it has one mode which locates
(and only locates) cache entities, and then another mode which does the
information retrieval taking a .header file as an argument (more useful
to admins since they can guarantee the output is for one instance only).
See attachment, feedback appreciated.
--
Colm MacCárthaigh Public Key: [EMAIL PROTECTED]
/* Copyright 2001-2005 The Apache Software Foundation or its licensors, as
* applicable.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* htcacheadmin.c: a utility to allow administrators to track down urls
* in their caches, and perform actions on that basis.
*
* Contributed by Colm MacCarthaigh <colm stdlib.net>
* 11 Aug 2005
*/
#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_md5.h"
#include "apr_getopt.h"
#include "apr_date.h"
#include "apr_uri.h"
#if APR_HAVE_UNISTD_H
#include <unistd.h>
#endif
#if APR_HAVE_STDLIB_H
#include <stdlib.h>
#endif
/* mod_disk_cache.c extract start */
#define VARY_FORMAT_VERSION 3
#define DISK_FORMAT_VERSION 4
typedef struct
{
/* Indicates the format of the header struct stored on-disk. */
apr_uint32_t format;
/* The HTTP status code returned for this response. */
int status;
/* The size of the entity name that follows. */
apr_size_t name_len;
/* The number of times we've cached this entity. */
apr_size_t entity_version;
/* Miscellaneous time values. */
apr_time_t date;
apr_time_t expire;
apr_time_t request_time;
apr_time_t response_time;
} disk_cache_info_t;
/* mod_disk_cache.c extract end */
/* cache_util.c extract started */
static void cache_hash(const char *it, char *val, int ndepth, int nlength)
{
apr_md5_ctx_t context;
unsigned char digest[16];
char tmp[22];
int i, k, d;
unsigned int x;
static const char enc_table[64] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";
apr_md5_init(&context);
apr_md5_update(&context, (const unsigned char *) it, strlen(it));
apr_md5_final(digest, &context);
/* encode 128 bits as 22 characters, using a modified uuencoding
* the encoding is 3 bytes -> 4 characters* i.e. 128 bits is
* 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
*/
for (i = 0, k = 0; i < 15; i += 3) {
x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2];
tmp[k++] = enc_table[x >> 18];
tmp[k++] = enc_table[(x >> 12) & 0x3f];
tmp[k++] = enc_table[(x >> 6) & 0x3f];
tmp[k++] = enc_table[x & 0x3f];
}
/* one byte left */
x = digest[15];
tmp[k++] = enc_table[x >> 2]; /* use up 6 bits */
tmp[k++] = enc_table[(x << 4) & 0x3f];
/* now split into directory levels */
for (i = k = d = 0; d < ndepth; ++d) {
memcpy(&val[i], &tmp[k], nlength);
k += nlength;
val[i + nlength] = '/';
i += nlength + 1;
}
memcpy(&val[i], &tmp[k], 22 - k);
val[i + 22 - k] = '\0';
}
/* cache_util.c extract end */
static apr_file_t *errfile; /* stderr file handle */
/* short program name as called */
static const char *shortname = "htcacheadmin";
/*
* usage info
*/
#define NL APR_EOL_STR
static void usage(void)
{
apr_file_printf(errfile,
"%s -- utility for administration of the disk cache." NL
"Usage: %s [-tmeHRbD] -c ROOT -d DIRLEVEL -l DIRLENGTH url" NL
NL
"Options:" NL
" -c Use ROOT as the root directory for the cache. Must be identical" NL
" to the CacheRoot setting for Apache." NL
NL
" -d Use DIRLEVEL as the number of subdirectories mod_disk_cache" NL
" creates for cached files. Must be identical to the" NL
" CacheDirLevels setting for Apache." NL
NL
" -l Use DIRLENGTH as the length of the subdirectories" NL
" mod_disk_cache creates for cached files. Must be identical to" NL
" the CacheDirLength setting for Apache." NL
NL
" -t Translate the url into header and data filenames, and output" NL
" those filenames, whether the content is cached or not." NL
NL
" -e Return 0 if url is cached, and has not expired. Return 1" NL
" otherwise. Default behaviour is to return 0 if url is cached." NL
NL
" -m Output mod_disk_cache meta information for the URL." NL
NL
" -H Output the cached HTTP response header information for the URL." NL
NL
" -R Output the cached HTTP request header information for the URL." NL
NL
" -b Output the cached body." NL
NL
" -D Delete the cached header and data file for specified url." NL,
shortname,
shortname
);
exit(1);
}
#undef NL
/*
* main
*/
int main(int argc, const char *const argv[])
{
apr_status_t status;
apr_pool_t *pool, *instance;
apr_getopt_t *o;
int translate, delete, showheaders, showbody, showrequest, checkexpires,
showmetadata, dirlevels, dirlength, baselen;
char opt;
const char *arg;
char *proxyroot, *root, *key, *headerpath, *datapath;
char prefix[66];
apr_uri_t inspect_uri;
apr_file_t *header, *data, *out;
translate = 0;
delete = 0;
showheaders = 0;
showbody = 0;
showrequest = 0;
showmetadata = 0;
dirlevels = 0;
dirlength = 0;
checkexpires = 0;
proxyroot = NULL;
if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
return 1;
}
atexit(apr_terminate);
if (argc) {
shortname = apr_filepath_name_get(argv[0]);
}
if (apr_pool_create(&pool, NULL) != APR_SUCCESS)
{
return 1;
}
apr_file_open_stderr(&errfile, pool);
apr_file_open_stderr(&out, pool);
apr_getopt_init(&o, pool, argc, argv);
while (1) {
status = apr_getopt(o, "tmeHRbDc:d:l:", &opt, &arg);
if (status == APR_EOF) {
break;
}
else if (status != APR_SUCCESS) {
usage();
}
else {
switch (opt) {
case 't':
if (translate) {
usage();
}
translate = 1;
break;
case 'e':
if (checkexpires) {
usage();
}
checkexpires = 1;
break;
case 'm':
if (showmetadata) {
usage();
}
showmetadata = 1;
break;
case 'H':
if (showheaders) {
usage();
}
showheaders = 1;
break;
case 'R':
if (showrequest) {
usage();
}
showrequest = 1;
break;
case 'b':
if (showbody) {
usage();
}
showbody = 1;
break;
case 'D':
if (delete) {
usage();
}
delete = 1;
break;
case 'c':
if (proxyroot) {
usage();
}
proxyroot = apr_pstrdup(pool, arg);
if (apr_filepath_set(proxyroot, pool) != APR_SUCCESS)
{
usage();
}
break;
case 'd':
if (dirlevels) {
usage();
}
dirlevels = apr_atoi64(arg);
break;
case 'l':
if (dirlength) {
usage();
}
dirlength = apr_atoi64(arg);
break;
} /* switch */
} /* else */
} /* while */
/* We should be given a url as an argument */
if (o->ind != (argc - 1)) {
usage();
}
if (apr_uri_parse(pool, argv[o->ind], &inspect_uri) != APR_SUCCESS)
{
usage();
}
/* Right now, we only handle HTTP */
if (inspect_uri.scheme && strcasecmp(inspect_uri.scheme, "http")) {
usage();
}
/* We need at least valid dirlength, dirlevel and root arguments */
if (proxyroot == NULL || dirlevels <= 0 || dirlength <= 0) {
usage();
}
if (apr_filepath_get(&root, 0, pool) != APR_SUCCESS)
{
usage();
}
baselen = strlen(root);
/* Generate a key from the url given */
if (!inspect_uri.path) {
inspect_uri.path = "/";
}
if (inspect_uri.hostname) {
key = apr_pstrcat(pool, inspect_uri.hostname, inspect_uri.path, "?",
inspect_uri.query, NULL);
}
else {
key = apr_pstrcat(pool, inspect_uri.path, "?", inspect_uri.query,
NULL);
}
/* Calculate the hash */
cache_hash(key, prefix, dirlevels, dirlength);
headerpath = apr_pstrcat(pool, root, "/", prefix, ".header", NULL);
datapath = apr_pstrcat(pool, root, "/", prefix, ".data", NULL);
/* Output the filenames */
if (translate) {
apr_file_printf(out, "header: %s" APR_EOL_STR "data: %s" APR_EOL_STR,
headerpath, datapath);
}
/* Open the header file */
status = apr_file_open(&header, headerpath,
APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT,
pool);
if (status != APR_SUCCESS) {
if (status != APR_ENOENT) {
apr_file_printf(errfile, "%s: could not determine if %s exists. "
"Possible perrmissions problem." APR_EOL_STR,
shortname, datapath);
}
/* File is not cached */
return 1;
}
/* Open the data file */
status = apr_file_open(&data, datapath,
APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT,
pool);
if (status != APR_SUCCESS) {
apr_file_printf(errfile,
"%s: header exists, but data file does not."
APR_EOL_STR, shortname);
if (status != APR_ENOENT) {
apr_file_printf(errfile, "%s: could not determine if %s exists. "
"Possible perrmissions problem." APR_EOL_STR,
shortname, datapath);
}
/* File is not cached */
return 1;
}
/* Should the header file be investigated? */
if (showmetadata || showheaders || showrequest || checkexpires) {
apr_uint32_t format;
apr_size_t len = sizeof(format);
apr_off_t offset = 0;
disk_cache_info_t disk_info;
char *entity;
/* Check for the format of the header file */
if (apr_file_read_full(header, &format, len, &len) != APR_SUCCESS) {
apr_file_printf(errfile, "%s: error reading from %s" APR_EOL_STR,
shortname, headerpath);
return 1;
}
/* Rewind to the start of the file */
apr_file_seek(header, APR_SET, &offset);
if (format == DISK_FORMAT_VERSION) {
len = sizeof(disk_cache_info_t);
/* Read in the meta information */
if (apr_file_read_full(header, &disk_info, len, &len) !=
APR_SUCCESS) {
apr_file_printf(errfile,
"%s: error reading from %s" APR_EOL_STR,
shortname, headerpath);
return 1;
}
}
else if (format == VARY_FORMAT_VERSION) {
apr_file_printf(errfile,
"%s: header file in unsupported vary format"
APR_EOL_STR, shortname);
return 0;
}
else {
apr_file_printf(errfile, "%s: header file in unrecognised format %d"
APR_EOL_STR, shortname, format);
return 1;
}
/* Read in the entity name */
len = disk_info.name_len + 1;
if (!(entity = (char *)apr_pcalloc(pool, len))) {
apr_file_printf(errfile,
"%s: Cannot assign memory" APR_EOL_STR,
shortname);
return 1;
}
if (apr_file_read_full(header, entity, len - 1, &len) != APR_SUCCESS) {
apr_file_printf(errfile,
"%s: error reading from %s" APR_EOL_STR,
shortname, headerpath);
return 1;
}
/* At this point, we now have the meta-information */
if (showmetadata) {
char sdate[APR_RFC822_DATE_LEN], sexpire[APR_RFC822_DATE_LEN],
sreq[APR_RFC822_DATE_LEN], sresp[APR_RFC822_DATE_LEN];
apr_rfc822_date(sdate, disk_info.date);
apr_rfc822_date(sexpire, disk_info.expire);
apr_rfc822_date(sreq, disk_info.request_time);
apr_rfc822_date(sresp, disk_info.response_time);
apr_file_printf(out, "Entity Name: %s" APR_EOL_STR, entity);
apr_file_printf(out, "HTTP Status code: %d" APR_EOL_STR,
disk_info.status);
apr_file_printf(out, "Entity version: %lu" APR_EOL_STR,
disk_info.entity_version);
apr_file_printf(out, "Date: %s" APR_EOL_STR, sdate);
apr_file_printf(out, "Expire: %s" APR_EOL_STR, sexpire);
apr_file_printf(out, "Request time: %s" APR_EOL_STR, sreq);
apr_file_printf(out, "Response time: %s" APR_EOL_STR, sresp);
}
/* Output the cached headers */
if (showheaders) {
apr_table_t * req_hdrs = apr_table_make(pool, 20);
}
/* Output the cached request headers */
if (showrequest) {
apr_table_t * resp_hdrs = apr_table_make(pool, 20);
}
}
/* If the user wants to see the body, write it out */
if (showbody) {
char buffer[4096];
apr_size_t len = sizeof(buffer);
/* Output the body */
while ((status = apr_file_read(data, buffer, &len)) == APR_SUCCESS) {
apr_file_write(out, buffer, &len);
}
if (status != APR_EOF) {
apr_file_printf(errfile, "%s: error reading all of %s" APR_EOL_STR,
shortname, datapath);
return 1;
}
}
/* Close the files */
apr_file_close(header);
apr_file_close(data);
/* Try and delete the files */
if (delete) {
if (apr_file_remove(headerpath, pool) != APR_SUCCESS) {
apr_file_printf(errfile, "%s: error deleting %s", shortname,
headerpath);
return 0;
}
else if (apr_file_remove(datapath, pool) != APR_SUCCESS) {
apr_file_printf(errfile, "%s: error deleting %s", shortname,
datapath);
}
}
return 0;
}