In http://lists.samba.org/archive/distcc/2004q2/002250.html, Dan proposed randomizing the hosts list. Here's a patch to add that feature, controlled by a special --randomize option in the hosts list. On a large shared build cluster, this helps spread the load without requiring separate hosts lists for each user. Martin, what do you think?
-- Josh --- distcc-2.14/man/distcc.1.dist 2004-05-27 17:15:27.000000000 -0700 +++ distcc-2.14/man/distcc.1 2004-05-27 18:07:52.000000000 -0700 @@ -221,7 +221,9 @@ distcc prefers hosts towards the start of the list, so machines should be listed in descending order of speed. In particular, when only a single compilation can be run (such as from a configure script), the -first machine listed is used. +first machine listed is used (but see +.I --randomize +below). .PP Placing .I localhost @@ -235,6 +237,14 @@ client is less than one fifth of the total, then the client should be left out of the list. .PP +If you have a large shared build cluster and a single shared hosts file, +the above rules would cause the first few machines in the hosts +file to be tried first even though they are likely to be busier than machines +later in the list. To avoid this, place the keyword +.I --randomize +into the host list. This will cause the host list to be randomized, +which should improve performance slightly for large build clusters. +.PP Performance depends on the details of the source and makefiles used for the project, and the machine and network speeds. Experimenting with different settings for the host list and -j factor may improve @@ -245,6 +255,7 @@ .nf DISTCC_HOSTS = HOSTSPEC ... HOSTSPEC = LOCAL_HOST | SSH_HOST | TCP_HOST | OLDSTYLE_TCP_HOST + | GLOBAL_OPTION LOCAL_HOST = localhost[/LIMIT] SSH_HOST = [EMAIL PROTECTED]/LIMIT][:COMMAND][OPTIONS] TCP_HOST = HOSTID[:PORT][/LIMIT][OPTIONS] @@ -252,6 +263,7 @@ HOSTID = HOSTNAME | IPV4 OPTIONS = ,OPTION[OPTIONS] OPTION = lzo + GLOBAL_OPTION = --randomize .fi .PP Here are some individual examples of the syntax: @@ -298,6 +310,9 @@ .TP .B ,lzo Enables LZO compression for this TCP or SSH host. +.TP +.B --randomize +Randomize the host list before execution. .PP Here is an example demonstrating some possibilities: .PP --- distcc-2.14/src/hosts.c.dist 2004-05-25 16:21:27.000000000 -0700 +++ distcc-2.14/src/hosts.c 2004-05-28 10:41:25.000000000 -0700 @@ -4,6 +4,9 @@ * * Copyright (C) 2002, 2003, 2004 by Martin Pool <[EMAIL PROTECTED]> * + * dcc_randomize_host_list() and friends: + * Copyright (C) 2004 by Google (Josh Hyman <[EMAIL PROTECTED]>) + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the @@ -36,6 +39,7 @@ * DISTCC_HOSTS = HOSTSPEC ... HOSTSPEC = LOCAL_HOST | SSH_HOST | TCP_HOST | OLDSTYLE_TCP_HOST + | GLOBAL_OPTION LOCAL_HOST = localhost[/LIMIT] SSH_HOST = [EMAIL PROTECTED]/LIMIT][:COMMAND][OPTIONS] TCP_HOST = HOSTID[:PORT][/LIMIT][OPTIONS] @@ -43,6 +47,7 @@ HOSTID = HOSTNAME | IPV4 OPTIONS = ,OPTION[OPTIONS] OPTION = lzo + GLOBAL_OPTION = --randomize * * Any amount of whitespace may be present between hosts. * @@ -89,6 +94,8 @@ #include <errno.h> #include <time.h> #include <ctype.h> +#include <sys/time.h> +#include <sys/types.h> #include "distcc.h" #include "trace.h" @@ -100,6 +107,18 @@ const int dcc_default_port = DISTCC_DEFAULT_PORT; +/*** + * A simple container which would hold a host -> rand int pair + ***/ +struct rand_container { + struct dcc_hostdef *host; + int rand; +}; + +int dcc_randomize_host_list(struct dcc_hostdef **host_list, int length); + +int dcc_compare_container(const void *a, const void *b); + #ifndef HAVE_STRNDUP /** @@ -348,7 +367,7 @@ struct dcc_hostdef **ret_list, int *ret_nhosts) { - int ret; + int ret, flag_randomize = 0; struct dcc_hostdef *prev, *curr; /* TODO: Check for '/' in places where it might cause trouble with @@ -390,6 +409,13 @@ token_start = where; token_len = strcspn(where, " #\t\n\f"); + /* intercept keywords which are not actually hosts */ + if (!strncmp(token_start, "--randomize", 11)) { + flag_randomize = 1; + where = token_start + token_len; + continue; + } + /* Allocate new list item */ curr = calloc(1, sizeof(struct dcc_hostdef)); if (!curr) { @@ -441,6 +467,9 @@ } if (*ret_nhosts) { + if (flag_randomize) + if ((ret = dcc_randomize_host_list(ret_list, *ret_nhosts)) != 0) + return ret; return 0; } else { rs_log_warning("%s contained no hosts; can't distribute work", source_name); @@ -448,6 +477,68 @@ } } +int dcc_compare_container(const void *a, const void *b) +{ + struct rand_container *i, *j; + i = (struct rand_container *) a; + j = (struct rand_container *) b; + + if (i->rand == j->rand) + return 0; + else if (i->rand > j->rand) + return 1; + else + return -1; +} + +int dcc_randomize_host_list(struct dcc_hostdef **host_list, int length) +{ + int i, ret; + unsigned int rand_val; + struct dcc_hostdef *curr; + struct rand_container *c; + struct timeval tv; + + c = malloc(length * sizeof(struct rand_container)); + if (!c) { + rs_log_crit("failed to allocate host definition"); + return EXIT_OUT_OF_MEMORY; + } + +#ifdef HAVE_GETTIMEOFDAY + if ((ret = gettimeofday(&tv, NULL)) == 0) + rand_val = (unsigned int) tv.tv_usec; + else +#else + rand_val = (unsigned int) time(NULL) ^ (unsigned int) getpid(); +#endif + + /* create pairs of hosts -> random numbers */ + srand(rand_val); + curr = *host_list; + for (i = 0; i < length; i++) { + c[i].host = curr; + c[i].rand = rand(); + curr = curr->next; + } + + /* sort */ + qsort(c, length, sizeof(struct rand_container), &dcc_compare_container); + + /* reorder the list */ + for (i = 0; i < length; i++) { + if (i != length - 1) + c[i].host->next = c[i+1].host; + else + c[i].host->next = NULL; + } + + /* move the start of the list */ + *host_list = c[0].host; + + free(c); + return 0; +} int dcc_free_hostdef(struct dcc_hostdef *host) { --- distcc-2.14/src/distcc.c.dist 2004-05-27 17:11:07.000000000 -0700 +++ distcc-2.14/src/distcc.c 2004-05-27 17:12:38.000000000 -0700 @@ -105,6 +105,7 @@ " HOST:PORT TCP connection, specified port\n" " @HOST SSH connection\n" " [EMAIL PROTECTED] SSH connection to specified host\n" +" --randomize Randomize the server list before execution\n" "\n" "distcc distributes compilation jobs across volunteer machines running\n" "distccd. Jobs that cannot be distributed, such as linking or \n" --- distcc-2.14/NEWS.dist 2004-05-28 11:35:08.000000000 -0700 +++ distcc-2.14/NEWS 2004-05-28 11:40:43.000000000 -0700 @@ -13,6 +13,9 @@ * distcc can now be built with a build directory separate from the source directory. Patch from Dennis Henriksen. + * Added --randomize option to host list to help spread the load i + across large shared clusters. From patch by Google + (Josh Hyman <[EMAIL PROTECTED]>). DOCUMENTATION: __ distcc mailing list http://distcc.samba.org/ To unsubscribe or change options: http://lists.samba.org/mailman/listinfo/distcc