dgaudet 97/08/04 23:02:47
Modified: htdocs/manual vhosts-in-depth.html
src CHANGES http_conf_globals.h http_config.c
http_main.c http_protocol.c httpd.h
Log:
Hashed ip-vhosts, including some semantic changes to vhosts in general
which should improve the vhost situation overall.
Revision Changes Path
1.13 +4 -0 apache/htdocs/manual/vhosts-in-depth.html
Index: vhosts-in-depth.html
===================================================================
RCS file: /export/home/cvs/apache/htdocs/manual/vhosts-in-depth.html,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- vhosts-in-depth.html 1997/07/06 17:18:57 1.12
+++ vhosts-in-depth.html 1997/08/05 06:02:38 1.13
@@ -186,6 +186,10 @@
<h3>Vhost Matching</h3>
+
+<p><strong>Apache 1.3 differs from what is documented
+here, and documentation still has to be written.</strong>
+
<p>
The server determines which vhost to use for a request as follows:
1.382 +14 -0 apache/src/CHANGES
Index: CHANGES
===================================================================
RCS file: /export/home/cvs/apache/src/CHANGES,v
retrieving revision 1.381
retrieving revision 1.382
diff -u -r1.381 -r1.382
--- CHANGES 1997/08/02 22:43:50 1.381
+++ CHANGES 1997/08/05 06:02:39 1.382
@@ -1,5 +1,19 @@
Changes with Apache 1.3a2
+ *) ip-based vhosts are stored and queried using a hashing function, which
+ has been shown to improve performance on servers with many ip-vhosts.
+ Some other changes had to be made to accomodate this:
+ - the * address for vhosts now behaves like _default_
+ - the matching process now is:
+ - match an ip-vhost directly via hash (possibly matches main
+ server)
+ - if that fails, just pretend it matched the main server
+ - if so far only the main server has been matched, perform
+ name-based lookups (ServerName, ServerAlias, ServerPath)
+ *only on name-based vhosts*
+ - if they fail, look for _default_ vhosts
+ [Dean Gaudet, Dave Hankins <[EMAIL PROTECTED]>]
+
*) dbmmanage overhaul:
- merge dbmmanage and dbmmanage.new functionality, remove dbmmanage.new
- tie() to AnyDBM_File which will use one of DB_File, NDBM_File or
1.17 +2 -1 apache/src/http_conf_globals.h
Index: http_conf_globals.h
===================================================================
RCS file: /export/home/cvs/apache/src/http_conf_globals.h,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- http_conf_globals.h 1997/08/03 20:29:18 1.16
+++ http_conf_globals.h 1997/08/05 06:02:40 1.17
@@ -87,8 +87,9 @@
extern char server_root[MAX_STRING_LEN];
extern char server_confname[MAX_STRING_LEN];
+extern server_rec_chain *vhash_table[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP];
+
/* We want this to have the least chance of being correupted if there
* is some memory corruption, so we allocate it statically.
*/
extern char coredump_dir[MAX_STRING_LEN];
-
1.70 +5 -0 apache/src/http_config.c
Index: http_config.c
===================================================================
RCS file: /export/home/cvs/apache/src/http_config.c,v
retrieving revision 1.69
retrieving revision 1.70
diff -u -r1.69 -r1.70
--- http_config.c 1997/08/03 20:29:18 1.69
+++ http_config.c 1997/08/05 06:02:40 1.70
@@ -1154,6 +1154,11 @@
bind_address.s_addr = htonl(INADDR_ANY);
listeners = NULL;
listenbacklog = DEFAULT_LISTENBACKLOG;
+
+ /* Global virtual host hash bucket pointers. Init to null. */
+ memset (vhash_table, 0,
+ (VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP) * sizeof (vhash_table[0]));
+
strncpy(coredump_dir, server_root, sizeof(coredump_dir)-1);
coredump_dir[sizeof(coredump_dir)-1] = '\0';
}
1.197 +160 -39 apache/src/http_main.c
Index: http_main.c
===================================================================
RCS file: /export/home/cvs/apache/src/http_main.c,v
retrieving revision 1.196
retrieving revision 1.197
diff -u -r1.196 -r1.197
--- http_main.c 1997/08/04 09:21:16 1.196
+++ http_main.c 1997/08/05 06:02:41 1.197
@@ -191,6 +191,23 @@
listen_rec *listeners;
static listen_rec *head_listener;
+/* A (n) bucket hash table, each entry has a pointer to a server rec and
+ * a pointer to the other entries in that bucket. Each individual address,
+ * even for virtualhosts with multiple addresses, has an entry in this hash
+ * table. There are extra buckets for _default_, and name-vhost entries.
+ *
+ * The main_server's addresses appear in the main part of this table.
+ * They're differentiated from real vhosts by server->is_virtual == 0.
+ *
+ * The VHASH_DEFAULT_BUCKET is a list of all the _default_ server_addr_recs.
+ *
+ * The VHASH_MAIN_BUCKET is a list of one server_addr_rec from each name
+ * based vhost. At the moment none of the name based vhost code is hashed,
+ * and it's just more convenient to have a list of all the name-based vhosts
+ * rather than a list of all the names of name-based vhosts.
+ */
+server_rec_chain *vhash_table[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP];
+
char server_root[MAX_STRING_LEN];
char server_confname[MAX_STRING_LEN];
char coredump_dir[MAX_STRING_LEN];
@@ -1694,50 +1711,128 @@
return (suexec_enabled);
}
+/* This hashing function is designed to get good distribution in the cases
+ * where the server is handling entire "networks" of servers. i.e. a
+ * whack of /24s. This is probably the most common configuration for
+ * ISPs with large virtual servers.
+ *
+ * Hash function provided by David Hankins.
+ */
+static inline unsigned hash_inaddr( unsigned key )
+{
+ key ^= (key >> 16);
+ return ((key >> 8) ^ key) % VHASH_TABLE_SIZE;
+}
static server_rec *find_virtual_server (struct in_addr server_ip,
unsigned port, server_rec *server)
{
- server_rec *virt;
server_addr_rec *sar;
- server_rec *def;
+ server_rec_chain *trav;
+ unsigned buk;
- def = server;
- for (virt = server->next; virt; virt = virt->next) {
- for (sar = virt->addrs; sar; sar = sar->next) {
- if ((virt->is_virtual == 1) && /* VirtualHost */
- (sar->host_addr.s_addr == htonl(INADDR_ANY) ||
- sar->host_addr.s_addr == server_ip.s_addr) &&
- (sar->host_port == 0 || sar->host_port == port)) {
- return virt;
- } else if ( sar->host_addr.s_addr == DEFAULT_VHOST_ADDR
- && (sar->host_port == 0 || sar->host_port == port)) {
- /* this is so that you can build a server that is the
- "default" for any interface which isn't explicitly
- specified. So that you can implement "deny anything
- which isn't expressly permitted" -djg */
- def = virt;
+ /* scan the hash table for an exact match first */
+ buk = hash_inaddr( server_ip.s_addr );
+ for (trav = vhash_table[buk]; trav; trav = trav->next) {
+ sar = trav->sar;
+ if ((sar->host_addr.s_addr == server_ip.s_addr)
+ && (sar->host_port == 0 || sar->host_port == port)) {
+ if (trav->server->is_virtual) {
+ return trav->server;
}
+ /* otherwise it's the "main server address", and we need
+ * to do _default_ handling
+ */
+ break;
}
}
- return def;
+ /* return the main server for now, might switch to a _default_ later */
+ return server_conf;
}
-void default_server_hostnames(server_rec *s)
+
+static void add_to_vhash_bucket (unsigned buk, server_rec *s,
+ server_addr_rec *sar)
+{
+ server_rec_chain *hashme;
+
+ hashme = palloc (pconf, sizeof (*hashme));
+ hashme->server = s;
+ hashme->sar = sar;
+ hashme->next = vhash_table[buk];
+ vhash_table[buk] = hashme;
+}
+
+
+/* hash table statistics, keep this in here for the beta period so
+ * we can find out if the hash function is ok
+ */
+#define VHASH_STATISTICS
+#ifdef VHASH_STATISTICS
+static int vhash_compare (const void *a, const void *b)
+{
+ return (*(const int *)b - *(const int *)a);
+}
+
+static void dump_vhash_statistics (void)
+{
+ unsigned count[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP];
+ int i;
+ server_rec_chain *src;
+ unsigned total;
+ char buf[HUGE_STRING_LEN];
+ char *p;
+
+ total = 0;
+ for (i = 0; i < VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP; ++i) {
+ count[i] = 0;
+ for (src = vhash_table[i]; src; src = src->next) {
+ ++count[i];
+ if (i < VHASH_TABLE_SIZE) {
+ /* don't count the slop buckets in the total */
+ ++total;
+ }
+ }
+ }
+ qsort (count, VHASH_TABLE_SIZE, sizeof (count[0]), vhash_compare);
+ p = buf + ap_snprintf (buf, sizeof (buf),
+ "vhash: total hashed = %u, avg chain = %u, #default = %u, "
+ "#name-vhost = %u, chain lengths (count x len):",
+ total, total / VHASH_TABLE_SIZE, count [VHASH_DEFAULT_BUCKET],
+ count [VHASH_MAIN_BUCKET]);
+ total = 1;
+ for (i = 1; i < VHASH_TABLE_SIZE; ++i) {
+ if (count[i-1] != count[i]) {
+ p += ap_snprintf (p, sizeof (buf) - (p - buf), " %ux%u",
+ total, count[i-1]);
+ total = 1;
+ } else {
+ ++total;
+ }
+ }
+ p += ap_snprintf (p, sizeof (buf) - (p - buf), " %ux%u",
+ total, count[VHASH_TABLE_SIZE-1]);
+ log_error(buf, server_conf);
+}
+#endif
+
+
+void default_server_hostnames(server_rec *main_s)
{
struct hostent *h;
- struct in_addr *main_addr;
- int num_addr;
char *def_hostname;
int n;
server_addr_rec *sar;
+ server_addr_rec *main_sar;
int has_default_vhost_addr;
- unsigned mainport = s->port;
int from_local=0;
+ server_rec *s;
+ int is_namevhost;
/* Main host first */
-
+ s = main_s;
+
if (!s->server_hostname) {
s->server_hostname = get_local_host(pconf);
from_local = 1;
@@ -1756,31 +1851,53 @@
};
exit(1);
}
- /* we need to use gethostbyaddr below... and since it shares a static
- area with gethostbyname it'd clobber the value we just got. So
- we need to make a copy. -djg */
- for (num_addr = 0; h->h_addr_list[num_addr] != NULL; num_addr++) {
- /* nop */
- }
- main_addr = palloc( pconf, sizeof( *main_addr ) * num_addr );
- for (n = 0; n < num_addr; n++) {
- main_addr[n] = *(struct in_addr *)h->h_addr_list[n];
+
+ /* we fill in s->addrs for two reasons. One so that we have
+ * server_addr_recs for the hash table. And also because gethostbyname
+ * and gethostbyaddr share a static data area and our result would be
+ * clobbered here if we didn't copy it somewhere. -djg
+ */
+ for (n = 0; h->h_addr_list[n] != NULL; n++) {
+ main_sar = pcalloc (pconf, sizeof (*main_sar));
+ main_sar->host_addr = *(struct in_addr *)h->h_addr_list[n];
+ main_sar->host_port = 0; /* we want this to match all ports */
+ main_sar->virthost = s->server_hostname;
+ main_sar->next = s->addrs;
+ s->addrs = main_sar;
+ add_to_vhash_bucket (hash_inaddr (main_sar->host_addr.s_addr),
+ s, main_sar);
}
/* Then virtual hosts */
-
+
for (s = s->next; s; s = s->next) {
/* Check to see if we might be a HTTP/1.1 virtual host - same IP */
has_default_vhost_addr = 0;
- for (n = 0; n < num_addr; n++) {
- for(sar = s->addrs; sar; sar = sar->next) {
- if (sar->host_addr.s_addr == main_addr[n].s_addr &&
- s->port == mainport)
+ for(sar = s->addrs; sar; sar = sar->next) {
+ is_namevhost = 0; /* guess addr doesn't match main server */
+ for (main_sar = main_s->addrs; main_sar; main_sar=main_sar->next) {
+ if (sar->host_addr.s_addr == main_sar->host_addr.s_addr
+ && s->port == main_s->port) {
+ add_to_vhash_bucket (VHASH_MAIN_BUCKET, s, sar);
+ /* XXX: only add it to the main bucket once since we're
+ * not optimizing name-vhosts yet */
s->is_virtual = 2;
- if( sar->host_addr.s_addr == DEFAULT_VHOST_ADDR ) {
- has_default_vhost_addr = 1;
+ is_namevhost = 1;
+ break;
}
}
+ if (sar->host_addr.s_addr == DEFAULT_VHOST_ADDR
+ || sar->host_addr.s_addr == INADDR_ANY) {
+ /* XXX: this probably isn't the best handling of INADDR_ANY */
+ /* add it to default bucket for each appropriate sar
+ * since we need to do a port test
+ */
+ has_default_vhost_addr = 1;
+ add_to_vhash_bucket (VHASH_DEFAULT_BUCKET, s, sar);
+ } else if (!is_namevhost) {
+ add_to_vhash_bucket (hash_inaddr (sar->host_addr.s_addr),
+ s, sar);
+ }
}
/* FIXME: some of this decision doesn't make a lot of sense in
@@ -1819,6 +1936,10 @@
}
}
}
+
+#ifdef VHASH_STATISTICS
+ dump_vhash_statistics ();
+#endif
}
conn_rec *new_connection (pool *p, server_rec *server, BUFF *inout,
1.150 +43 -13 apache/src/http_protocol.c
Index: http_protocol.c
===================================================================
RCS file: /export/home/cvs/apache/src/http_protocol.c,v
retrieving revision 1.149
retrieving revision 1.150
diff -u -r1.149 -r1.150
--- http_protocol.c 1997/08/04 02:55:11 1.149
+++ http_protocol.c 1997/08/05 06:02:42 1.150
@@ -68,6 +68,7 @@
*/
#include "util_date.h" /* For parseHTTPdate and BAD_DATE */
#include <stdarg.h>
+#include "http_conf_globals.h"
#define SET_BYTES_SENT(r) \
do { if (r->sent_bodyct) \
@@ -692,6 +693,7 @@
unsigned port = (*hostname) ? atoi(hostname) : 80;
server_rec *s;
int l;
+ server_rec_chain *src;
if (port && (port != r->server->port))
return;
@@ -703,15 +705,17 @@
r->hostname = host;
- for (s = r->server->next; s; s = s->next) {
+ for (src = vhash_table[VHASH_MAIN_BUCKET]; src; src = src->next) {
const char *names;
server_addr_rec *sar;
- if (s->addrs == NULL) {
- /* this server has been disabled because of DNS screwups during
- configuration */
- continue;
- }
+ s = src->server;
+
+ /* s->addrs != NULL because it's in a hash bucket */
+
+ /* Note that default_server_hostnames has ensured that each name-vhost
+ * appears only once in the VHASH_MAIN_BUCKET.
+ */
if ((!strcasecmp(host, s->server_hostname)) && (port == s->port)) {
r->server = r->connection->server = s;
@@ -754,13 +758,15 @@
void check_serverpath (request_rec *r) {
server_rec *s;
+ server_rec_chain *src;
/* This is in conjunction with the ServerPath code in
* http_core, so we get the right host attached to a non-
* Host-sending request.
*/
- for (s = r->server->next; s; s = s->next) {
+ for (src = vhash_table[VHASH_MAIN_BUCKET]; src; src = src->next) {
+ s = src->server;
if (s->addrs && s->path && !strncmp(r->uri, s->path, s->pathlen) &&
(s->path[s->pathlen - 1] == '/' ||
r->uri[s->pathlen] == '/' ||
@@ -769,6 +775,24 @@
}
}
+
+static void check_default_server (request_rec *r)
+{
+ server_addr_rec *sar;
+ server_rec_chain *trav;
+ unsigned port;
+
+ port = ntohs (r->connection->local_addr.sin_port);
+ for (trav = vhash_table[VHASH_DEFAULT_BUCKET]; trav; trav = trav->next) {
+ sar = trav->sar;
+ if (sar->host_port == 0 || sar->host_port == port) {
+ /* match! */
+ r->server = r->connection->server = trav->server;
+ return;
+ }
+ }
+}
+
request_rec *read_request (conn_rec *conn)
{
request_rec *r = (request_rec *)pcalloc (conn->pool,
sizeof(request_rec));
@@ -815,12 +839,18 @@
r->status = HTTP_OK; /* Until further notice. */
- /* handle Host header here, to get virtual server */
-
- if (r->hostname || (r->hostname = table_get(r->headers_in, "Host")))
- check_hostalias(r);
- else
- check_serverpath(r);
+ /* if it's the main server so far, we have to do name-vhost style
lookups */
+ if (r->server->is_virtual == 0) {
+ if (r->hostname || (r->hostname = table_get(r->headers_in, "Host")))
+ check_hostalias(r);
+ else
+ check_serverpath(r);
+ /* if that failed, then look for a default server */
+ if (r->server->is_virtual == 0) {
+ check_default_server (r);
+ }
+ }
+ /* we have finished the search for a vhost */
/* we may have switched to another server */
r->per_dir_config = r->server->lookup_defaults;
1.137 +23 -1 apache/src/httpd.h
Index: httpd.h
===================================================================
RCS file: /export/home/cvs/apache/src/httpd.h,v
retrieving revision 1.136
retrieving revision 1.137
diff -u -r1.136 -r1.137
--- httpd.h 1997/08/03 20:30:57 1.136
+++ httpd.h 1997/08/05 06:02:43 1.137
@@ -293,6 +293,19 @@
#define SCOREBOARD_MAINTENANCE_INTERVAL 1000000
#endif
+/* This defines the size of the hash table used for hashing ip addresses
+ * of virtual hosts. It must be a power of two.
+ */
+#ifndef VHASH_TABLE_SIZE
+#define VHASH_TABLE_SIZE 256
+#endif
+/* bucket where _default_ entries are stored */
+#define VHASH_DEFAULT_BUCKET (VHASH_TABLE_SIZE)
+/* bucket where name-vhosts are stored */
+#define VHASH_MAIN_BUCKET ((VHASH_TABLE_SIZE)+1)
+/* number of magic buckets */
+#define VHASH_EXTRA_SLOP 2
+
/* Number of requests to try to handle in a single process. If <= 0,
* the children don't die off. That's the default here, since I'm still
* interested in finding and stanching leaks.
@@ -498,7 +511,6 @@
const struct htaccess_result *next;
};
-
typedef struct conn_rec conn_rec;
typedef struct server_rec server_rec;
typedef struct request_rec request_rec;
@@ -677,6 +689,16 @@
char *virthost; /* The name given in <VirtualHost> */
};
+/* Meta linear list for hashes. Each server_rec can be in possibly multiple
+ * hash chains since it can have multiple ips
+ */
+typedef struct server_rec_chain server_rec_chain;
+struct server_rec_chain {
+ server_rec_chain *next;
+ server_rec *server;
+ server_addr_rec *sar; /* the record causing it to be in
+ * this chain */
+};
struct server_rec {