dgaudet     97/08/04 23:02:47

  Modified:    htdocs/manual  vhosts-in-depth.html
               src       CHANGES http_conf_globals.h http_config.c
                        http_main.c  http_protocol.c httpd.h
  Log:
  Hashed ip-vhosts, including some semantic changes to vhosts in general
  which should improve the vhost situation overall.
  
  Revision  Changes    Path
  1.13      +4 -0      apache/htdocs/manual/vhosts-in-depth.html
  
  Index: vhosts-in-depth.html
  ===================================================================
  RCS file: /export/home/cvs/apache/htdocs/manual/vhosts-in-depth.html,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- vhosts-in-depth.html      1997/07/06 17:18:57     1.12
  +++ vhosts-in-depth.html      1997/08/05 06:02:38     1.13
  @@ -186,6 +186,10 @@
   
   <h3>Vhost Matching</h3>
   
  +
  +<p><strong>Apache 1.3 differs from what is documented
  +here, and documentation still has to be written.</strong>
  +
   <p>
   The server determines which vhost to use for a request as follows:
   
  
  
  
  1.382     +14 -0     apache/src/CHANGES
  
  Index: CHANGES
  ===================================================================
  RCS file: /export/home/cvs/apache/src/CHANGES,v
  retrieving revision 1.381
  retrieving revision 1.382
  diff -u -r1.381 -r1.382
  --- CHANGES   1997/08/02 22:43:50     1.381
  +++ CHANGES   1997/08/05 06:02:39     1.382
  @@ -1,5 +1,19 @@
   Changes with Apache 1.3a2
   
  +  *) ip-based vhosts are stored and queried using a hashing function, which
  +     has been shown to improve performance on servers with many ip-vhosts.
  +     Some other changes had to be made to accomodate this:
  +     - the * address for vhosts now behaves like _default_
  +     - the matching process now is:
  +         - match an ip-vhost directly via hash (possibly matches main
  +             server)
  +         - if that fails, just pretend it matched the main server
  +         - if so far only the main server has been matched, perform
  +             name-based lookups (ServerName, ServerAlias, ServerPath)
  +             *only on name-based vhosts*
  +         - if they fail, look for _default_ vhosts
  +     [Dean Gaudet, Dave Hankins <[EMAIL PROTECTED]>]
  +
     *) dbmmanage overhaul:
        - merge dbmmanage and dbmmanage.new functionality, remove dbmmanage.new 
        - tie() to AnyDBM_File which will use one of DB_File, NDBM_File or
  
  
  
  1.17      +2 -1      apache/src/http_conf_globals.h
  
  Index: http_conf_globals.h
  ===================================================================
  RCS file: /export/home/cvs/apache/src/http_conf_globals.h,v
  retrieving revision 1.16
  retrieving revision 1.17
  diff -u -r1.16 -r1.17
  --- http_conf_globals.h       1997/08/03 20:29:18     1.16
  +++ http_conf_globals.h       1997/08/05 06:02:40     1.17
  @@ -87,8 +87,9 @@
   extern char server_root[MAX_STRING_LEN];
   extern char server_confname[MAX_STRING_LEN];
   
  +extern server_rec_chain *vhash_table[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP];
  +
   /* We want this to have the least chance of being correupted if there
    * is some memory corruption, so we allocate it statically.
    */
   extern char coredump_dir[MAX_STRING_LEN];
  -
  
  
  
  1.70      +5 -0      apache/src/http_config.c
  
  Index: http_config.c
  ===================================================================
  RCS file: /export/home/cvs/apache/src/http_config.c,v
  retrieving revision 1.69
  retrieving revision 1.70
  diff -u -r1.69 -r1.70
  --- http_config.c     1997/08/03 20:29:18     1.69
  +++ http_config.c     1997/08/05 06:02:40     1.70
  @@ -1154,6 +1154,11 @@
       bind_address.s_addr = htonl(INADDR_ANY);
       listeners = NULL;
       listenbacklog = DEFAULT_LISTENBACKLOG;
  +
  +    /* Global virtual host hash bucket pointers.  Init to null. */
  +    memset (vhash_table, 0,
  +     (VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP) * sizeof (vhash_table[0]));
  +
       strncpy(coredump_dir, server_root, sizeof(coredump_dir)-1);
       coredump_dir[sizeof(coredump_dir)-1] = '\0';
   }
  
  
  
  1.197     +160 -39   apache/src/http_main.c
  
  Index: http_main.c
  ===================================================================
  RCS file: /export/home/cvs/apache/src/http_main.c,v
  retrieving revision 1.196
  retrieving revision 1.197
  diff -u -r1.196 -r1.197
  --- http_main.c       1997/08/04 09:21:16     1.196
  +++ http_main.c       1997/08/05 06:02:41     1.197
  @@ -191,6 +191,23 @@
   listen_rec *listeners;
   static listen_rec *head_listener;
   
  +/* A (n) bucket hash table, each entry has a pointer to a server rec and
  + * a pointer to the other entries in that bucket.  Each individual address,
  + * even for virtualhosts with multiple addresses, has an entry in this hash
  + * table.  There are extra buckets for _default_, and name-vhost entries.
  + *
  + * The main_server's addresses appear in the main part of this table.
  + * They're differentiated from real vhosts by server->is_virtual == 0.
  + *
  + * The VHASH_DEFAULT_BUCKET is a list of all the _default_ server_addr_recs.
  + *
  + * The VHASH_MAIN_BUCKET is a list of one server_addr_rec from each name
  + * based vhost.  At the moment none of the name based vhost code is hashed,
  + * and it's just more convenient to have a list of all the name-based vhosts
  + * rather than a list of all the names of name-based vhosts.
  + */
  +server_rec_chain *vhash_table[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP];
  +
   char server_root[MAX_STRING_LEN];
   char server_confname[MAX_STRING_LEN];
   char coredump_dir[MAX_STRING_LEN];
  @@ -1694,50 +1711,128 @@
       return (suexec_enabled);
   }
   
  +/* This hashing function is designed to get good distribution in the cases
  + * where the server is handling entire "networks" of servers.  i.e. a
  + * whack of /24s.  This is probably the most common configuration for
  + * ISPs with large virtual servers.
  + *
  + * Hash function provided by David Hankins.
  + */
  +static inline unsigned hash_inaddr( unsigned key )
  +{
  +    key ^= (key >> 16);
  +    return ((key >> 8) ^ key) % VHASH_TABLE_SIZE;
  +}
   
   static server_rec *find_virtual_server (struct in_addr server_ip,
                                unsigned port, server_rec *server)
   {
  -    server_rec *virt;
       server_addr_rec *sar;
  -    server_rec *def;
  +    server_rec_chain *trav;
  +    unsigned buk;
   
  -    def = server;
  -    for (virt = server->next; virt; virt = virt->next) {
  -     for (sar = virt->addrs; sar; sar = sar->next) {
  -         if ((virt->is_virtual == 1) &&      /* VirtualHost */
  -             (sar->host_addr.s_addr == htonl(INADDR_ANY) ||
  -             sar->host_addr.s_addr == server_ip.s_addr) &&
  -             (sar->host_port == 0 || sar->host_port == port)) {
  -             return virt;
  -         } else if ( sar->host_addr.s_addr == DEFAULT_VHOST_ADDR
  -             && (sar->host_port == 0 || sar->host_port == port)) {
  -             /* this is so that you can build a server that is the
  -                 "default" for any interface which isn't explicitly
  -                 specified.  So that you can implement "deny anything
  -                 which isn't expressly permitted" -djg */
  -             def = virt;
  +    /* scan the hash table for an exact match first */
  +    buk = hash_inaddr( server_ip.s_addr );
  +    for (trav = vhash_table[buk]; trav; trav = trav->next) {
  +     sar = trav->sar;
  +     if ((sar->host_addr.s_addr == server_ip.s_addr)
  +         && (sar->host_port == 0 || sar->host_port == port)) {
  +         if (trav->server->is_virtual) {
  +             return trav->server;
            }
  +         /* otherwise it's the "main server address", and we need
  +          * to do _default_ handling
  +          */
  +         break;
        }
       }
   
  -    return def;
  +    /* return the main server for now, might switch to a _default_ later */
  +    return server_conf;
   }
   
  -void default_server_hostnames(server_rec *s)
  +
  +static void add_to_vhash_bucket (unsigned buk, server_rec *s,
  +    server_addr_rec *sar)
  +{
  +    server_rec_chain *hashme;
  +
  +    hashme = palloc (pconf, sizeof (*hashme));
  +    hashme->server = s;
  +    hashme->sar = sar;
  +    hashme->next = vhash_table[buk];
  +    vhash_table[buk] = hashme;
  +}
  +
  +
  +/* hash table statistics, keep this in here for the beta period so
  + * we can find out if the hash function is ok
  + */
  +#define VHASH_STATISTICS
  +#ifdef VHASH_STATISTICS
  +static int vhash_compare (const void *a, const void *b)
  +{
  +    return (*(const int *)b - *(const int *)a);
  +}
  +
  +static void dump_vhash_statistics (void)
  +{
  +    unsigned count[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP];
  +    int i;
  +    server_rec_chain *src;
  +    unsigned total;
  +    char buf[HUGE_STRING_LEN];
  +    char *p;
  +
  +    total = 0;
  +    for (i = 0; i < VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP; ++i) {
  +     count[i] = 0;
  +     for (src = vhash_table[i]; src; src = src->next) {
  +         ++count[i];
  +         if (i < VHASH_TABLE_SIZE) {
  +             /* don't count the slop buckets in the total */
  +             ++total;
  +         }
  +     }
  +    }
  +    qsort (count, VHASH_TABLE_SIZE, sizeof (count[0]), vhash_compare);
  +    p = buf + ap_snprintf (buf, sizeof (buf),
  +     "vhash: total hashed = %u, avg chain = %u, #default = %u, "
  +     "#name-vhost = %u, chain lengths (count x len):",
  +     total, total / VHASH_TABLE_SIZE, count [VHASH_DEFAULT_BUCKET],
  +     count [VHASH_MAIN_BUCKET]);
  +    total = 1;
  +    for (i = 1; i < VHASH_TABLE_SIZE; ++i) {
  +     if (count[i-1] != count[i]) {
  +         p += ap_snprintf (p, sizeof (buf) - (p - buf), " %ux%u",
  +             total, count[i-1]);
  +         total = 1;
  +     } else {
  +         ++total;
  +     }
  +    }
  +    p += ap_snprintf (p, sizeof (buf) - (p - buf), " %ux%u",
  +     total, count[VHASH_TABLE_SIZE-1]);
  +    log_error(buf, server_conf);
  +}
  +#endif
  +
  +
  +void default_server_hostnames(server_rec *main_s)
   {
       struct hostent *h;
  -    struct in_addr *main_addr;
  -    int num_addr;
       char *def_hostname;
       int n;
       server_addr_rec *sar;
  +    server_addr_rec *main_sar;
       int has_default_vhost_addr;
  -    unsigned mainport = s->port;
       int from_local=0;  
  +    server_rec *s;
  +    int is_namevhost;
   
       /* Main host first */
  -    
  +    s = main_s;
  +
       if (!s->server_hostname) {
        s->server_hostname = get_local_host(pconf);
        from_local = 1;
  @@ -1756,31 +1851,53 @@
        };
        exit(1);
       }
  -    /* we need to use gethostbyaddr below... and since it shares a static
  -     area with gethostbyname it'd clobber the value we just got.  So
  -     we need to make a copy.  -djg */
  -    for (num_addr = 0; h->h_addr_list[num_addr] != NULL; num_addr++) {
  -     /* nop */
  -    }
  -    main_addr = palloc( pconf, sizeof( *main_addr ) * num_addr );
  -    for (n = 0; n < num_addr; n++) {
  -     main_addr[n] = *(struct in_addr *)h->h_addr_list[n];
  +
  +    /* we fill in s->addrs for two reasons.  One so that we have
  +     * server_addr_recs for the hash table.  And also because gethostbyname
  +     * and gethostbyaddr share a static data area and our result would be
  +     * clobbered here if we didn't copy it somewhere. -djg
  +     */
  +    for (n = 0; h->h_addr_list[n] != NULL; n++) {
  +     main_sar = pcalloc (pconf, sizeof (*main_sar));
  +     main_sar->host_addr = *(struct in_addr *)h->h_addr_list[n];
  +     main_sar->host_port = 0;        /* we want this to match all ports */
  +     main_sar->virthost = s->server_hostname;
  +     main_sar->next = s->addrs;
  +     s->addrs = main_sar;
  +     add_to_vhash_bucket (hash_inaddr (main_sar->host_addr.s_addr),
  +         s, main_sar);
       }
   
       /* Then virtual hosts */
  -    
  +
       for (s = s->next; s; s = s->next) {
        /* Check to see if we might be a HTTP/1.1 virtual host - same IP */
        has_default_vhost_addr = 0;
  -     for (n = 0; n < num_addr; n++) {
  -         for(sar = s->addrs; sar; sar = sar->next) {
  -             if (sar->host_addr.s_addr == main_addr[n].s_addr &&
  -                 s->port == mainport)
  +     for(sar = s->addrs; sar; sar = sar->next) {
  +         is_namevhost = 0; /* guess addr doesn't match main server */
  +         for (main_sar = main_s->addrs; main_sar; main_sar=main_sar->next) {
  +             if (sar->host_addr.s_addr == main_sar->host_addr.s_addr
  +                 && s->port == main_s->port) {
  +                 add_to_vhash_bucket (VHASH_MAIN_BUCKET, s, sar);
  +                 /* XXX: only add it to the main bucket once since we're
  +                  * not optimizing name-vhosts yet */
                    s->is_virtual = 2;
  -             if( sar->host_addr.s_addr == DEFAULT_VHOST_ADDR ) {
  -                 has_default_vhost_addr = 1;
  +                 is_namevhost = 1;
  +                 break;
                }
            }
  +         if (sar->host_addr.s_addr == DEFAULT_VHOST_ADDR
  +             || sar->host_addr.s_addr == INADDR_ANY) {
  +             /* XXX: this probably isn't the best handling of INADDR_ANY */
  +             /* add it to default bucket for each appropriate sar
  +              * since we need to do a port test
  +              */
  +             has_default_vhost_addr = 1;
  +             add_to_vhash_bucket (VHASH_DEFAULT_BUCKET, s, sar);
  +         } else if (!is_namevhost) {
  +             add_to_vhash_bucket (hash_inaddr (sar->host_addr.s_addr),
  +                 s, sar);
  +         }
        }
   
        /* FIXME: some of this decision doesn't make a lot of sense in
  @@ -1819,6 +1936,10 @@
            }
        }
       }
  +
  +#ifdef VHASH_STATISTICS
  +    dump_vhash_statistics ();
  +#endif
   }
   
   conn_rec *new_connection (pool *p, server_rec *server, BUFF *inout,
  
  
  
  1.150     +43 -13    apache/src/http_protocol.c
  
  Index: http_protocol.c
  ===================================================================
  RCS file: /export/home/cvs/apache/src/http_protocol.c,v
  retrieving revision 1.149
  retrieving revision 1.150
  diff -u -r1.149 -r1.150
  --- http_protocol.c   1997/08/04 02:55:11     1.149
  +++ http_protocol.c   1997/08/05 06:02:42     1.150
  @@ -68,6 +68,7 @@
                                 */
   #include "util_date.h"          /* For parseHTTPdate and BAD_DATE */
   #include <stdarg.h>
  +#include "http_conf_globals.h"
   
   #define SET_BYTES_SENT(r) \
     do { if (r->sent_bodyct) \
  @@ -692,6 +693,7 @@
     unsigned port = (*hostname) ? atoi(hostname) : 80;
     server_rec *s;
     int l;
  +  server_rec_chain *src;
   
     if (port && (port != r->server->port))
       return;
  @@ -703,15 +705,17 @@
   
     r->hostname = host;
   
  -  for (s = r->server->next; s; s = s->next) {
  +  for (src = vhash_table[VHASH_MAIN_BUCKET]; src; src = src->next) {
       const char *names;
       server_addr_rec *sar;
   
  -    if (s->addrs == NULL) {
  -     /* this server has been disabled because of DNS screwups during
  -         configuration */
  -     continue;
  -    }
  +    s = src->server;
  +
  +    /* s->addrs != NULL because it's in a hash bucket */
  +
  +    /* Note that default_server_hostnames has ensured that each name-vhost
  +     * appears only once in the VHASH_MAIN_BUCKET.
  +     */
   
       if ((!strcasecmp(host, s->server_hostname)) && (port == s->port)) {
         r->server = r->connection->server = s;
  @@ -754,13 +758,15 @@
   
   void check_serverpath (request_rec *r) {
     server_rec *s;
  +  server_rec_chain *src;
   
     /* This is in conjunction with the ServerPath code in
      * http_core, so we get the right host attached to a non-
      * Host-sending request.
      */
   
  -  for (s = r->server->next; s; s = s->next) {
  +  for (src = vhash_table[VHASH_MAIN_BUCKET]; src; src = src->next) {
  +    s = src->server;
       if (s->addrs && s->path && !strncmp(r->uri, s->path, s->pathlen) &&
        (s->path[s->pathlen - 1] == '/' ||
         r->uri[s->pathlen] == '/' ||
  @@ -769,6 +775,24 @@
     }
   }
   
  +
  +static void check_default_server (request_rec *r)
  +{
  +    server_addr_rec *sar;
  +    server_rec_chain *trav;
  +    unsigned port;
  +
  +    port = ntohs (r->connection->local_addr.sin_port);
  +    for (trav = vhash_table[VHASH_DEFAULT_BUCKET]; trav; trav = trav->next) {
  +     sar = trav->sar;
  +     if (sar->host_port == 0 || sar->host_port == port) {
  +         /* match! */
  +         r->server = r->connection->server = trav->server;
  +         return;
  +     }
  +    }
  +}
  +
   request_rec *read_request (conn_rec *conn)
   {
       request_rec *r = (request_rec *)pcalloc (conn->pool, 
sizeof(request_rec));
  @@ -815,12 +839,18 @@
   
       r->status = HTTP_OK;                /* Until further notice. */
   
  -    /* handle Host header here, to get virtual server */
  -
  -    if (r->hostname || (r->hostname = table_get(r->headers_in, "Host")))
  -      check_hostalias(r);
  -    else
  -      check_serverpath(r);
  +    /* if it's the main server so far, we have to do name-vhost style 
lookups */
  +    if (r->server->is_virtual == 0) {
  +     if (r->hostname || (r->hostname = table_get(r->headers_in, "Host")))
  +         check_hostalias(r);
  +     else
  +         check_serverpath(r);
  +     /* if that failed, then look for a default server */
  +     if (r->server->is_virtual == 0) {
  +         check_default_server (r);
  +     }
  +    }
  +    /* we have finished the search for a vhost */
       
       /* we may have switched to another server */
       r->per_dir_config = r->server->lookup_defaults;
  
  
  
  1.137     +23 -1     apache/src/httpd.h
  
  Index: httpd.h
  ===================================================================
  RCS file: /export/home/cvs/apache/src/httpd.h,v
  retrieving revision 1.136
  retrieving revision 1.137
  diff -u -r1.136 -r1.137
  --- httpd.h   1997/08/03 20:30:57     1.136
  +++ httpd.h   1997/08/05 06:02:43     1.137
  @@ -293,6 +293,19 @@
   #define SCOREBOARD_MAINTENANCE_INTERVAL 1000000
   #endif
   
  +/* This defines the size of the hash table used for hashing ip addresses
  + * of virtual hosts.  It must be a power of two.
  + */
  +#ifndef VHASH_TABLE_SIZE
  +#define VHASH_TABLE_SIZE 256
  +#endif
  +/* bucket where _default_ entries are stored */
  +#define VHASH_DEFAULT_BUCKET (VHASH_TABLE_SIZE)
  +/* bucket where name-vhosts are stored */
  +#define VHASH_MAIN_BUCKET    ((VHASH_TABLE_SIZE)+1)
  +/* number of magic buckets */
  +#define VHASH_EXTRA_SLOP     2
  +
   /* Number of requests to try to handle in a single process.  If <= 0,
    * the children don't die off.  That's the default here, since I'm still
    * interested in finding and stanching leaks.
  @@ -498,7 +511,6 @@
       const struct htaccess_result *next;
   };
   
  -
   typedef struct conn_rec conn_rec;
   typedef struct server_rec server_rec;
   typedef struct request_rec request_rec;
  @@ -677,6 +689,16 @@
       char *virthost;          /* The name given in <VirtualHost> */
   };
   
  +/* Meta linear list for hashes.  Each server_rec can be in possibly multiple
  + * hash chains since it can have multiple ips
  + */
  +typedef struct server_rec_chain server_rec_chain;
  +struct server_rec_chain {
  +    server_rec_chain *next;
  +    server_rec *server;
  +    server_addr_rec *sar;    /* the record causing it to be in
  +                              * this chain */
  +};
   
   struct server_rec {
   
  
  
  

Reply via email to