usage example: cons_hash $request_uri; Not only consistent hash is introduced as a load balancing, but also increase the hits of cache in upstream servers.
This modules is written according to round_robin, ip_hash and least_conn modules for upstream. So it inherits all the essential logic from them. This implementation is simple but has a high performance, The time comlexity remains O(log(T)) with a low constant coefficient, even when half of servers get down, regardless of the weight of servers, as analysis below. Suppose there are N upstream servers, with D down servers, and the weights are W1,W2,...,W(N-1). V is the amount of virtual nodes per unit. P is the probability to get an alive server from all nodes at random. Let T = (W1+W2+...+W(N-1))*V. then the space complexity is O(T), and the time comlexity is O(log(T) + (1-P)*N/(N-D)), 'N/(N-D)' of which is independent of W and V. # HG changeset patch # User Jianjun Zheng <codee...@gmail.com> # Date 1399531525 -28800 # Thu May 08 14:45:25 2014 +0800 # Node ID 063f37f1654ef6cc03bd311a7fe6189b299ce2f2 # Parent 48c97d83ab7f0a3f641987fb32ace8af7720aefc Upstream: add consistent hash module diff -r 48c97d83ab7f -r 063f37f1654e auto/modules --- a/auto/modules Tue Apr 29 22:22:38 2014 +0200 +++ b/auto/modules Thu May 08 14:45:25 2014 +0800 @@ -381,6 +381,11 @@ HTTP_SRCS="$HTTP_SRCS $HTTP_UPSTREAM_LEAST_CONN_SRCS" fi +if [ $HTTP_UPSTREAM_CONS_HASH = YES ]; then + HTTP_MODULES="$HTTP_MODULES $HTTP_UPSTREAM_CONS_HASH_MODULE" + HTTP_SRCS="$HTTP_SRCS $HTTP_UPSTREAM_CONS_HASH_SRCS" +fi + if [ $HTTP_UPSTREAM_KEEPALIVE = YES ]; then HTTP_MODULES="$HTTP_MODULES $HTTP_UPSTREAM_KEEPALIVE_MODULE" HTTP_SRCS="$HTTP_SRCS $HTTP_UPSTREAM_KEEPALIVE_SRCS" diff -r 48c97d83ab7f -r 063f37f1654e auto/options --- a/auto/options Tue Apr 29 22:22:38 2014 +0200 +++ b/auto/options Thu May 08 14:45:25 2014 +0800 @@ -100,6 +100,7 @@ HTTP_GZIP_STATIC=NO HTTP_UPSTREAM_IP_HASH=YES HTTP_UPSTREAM_LEAST_CONN=YES +HTTP_UPSTREAM_CONS_HASH=YES HTTP_UPSTREAM_KEEPALIVE=YES # STUB diff -r 48c97d83ab7f -r 063f37f1654e auto/sources --- a/auto/sources Tue Apr 29 22:22:38 2014 +0200 +++ b/auto/sources Thu May 08 14:45:25 2014 +0800 @@ -504,6 +504,11 @@ src/http/modules/ngx_http_upstream_least_conn_module.c" +HTTP_UPSTREAM_CONS_HASH_MODULE=ngx_http_upstream_cons_hash_module +HTTP_UPSTREAM_CONS_HASH_SRCS=" \ + src/http/modules/ngx_http_upstream_cons_hash_module.c" + + HTTP_UPSTREAM_KEEPALIVE_MODULE=ngx_http_upstream_keepalive_module HTTP_UPSTREAM_KEEPALIVE_SRCS=" \ src/http/modules/ngx_http_upstream_keepalive_module.c" diff -r 48c97d83ab7f -r 063f37f1654e src/http/modules/ngx_http_upstream_cons_hash_module.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/http/modules/ngx_http_upstream_cons_hash_module.c Thu May 08 14:45:25 2014 +0800 @@ -0,0 +1,591 @@ +#include <ngx_core.h> +#include <ngx_http.h> +#include <ngx_config.h> + + +#define NGX_HTTP_UPSTREAM_CH_VNODE_NUM 141 + + +typedef struct { + uint32_t hash; + + ngx_uint_t index; +} ngx_http_upstream_cons_hash_node_t; + + +typedef struct { + ngx_array_t *values; + ngx_array_t *lengths; + + ngx_uint_t node_number; + ngx_http_upstream_cons_hash_node_t *nodes; + + ngx_uint_t *nearest; + + ngx_http_upstream_rr_peers_t *peers; + + ngx_log_t *log; + + ngx_pool_t *pool; +} ngx_http_upstream_cons_hash_conf_t; + + +typedef struct { + /* the round robin data must be first */ + ngx_http_upstream_rr_peer_data_t rrp; + + ngx_uint_t found; + + ngx_http_upstream_cons_hash_conf_t *chcf; + + ngx_event_get_peer_pt get_rr_peer; +} ngx_http_upstream_ch_peer_data_t; + + +static void *ngx_http_upstream_cons_hash_create_conf(ngx_conf_t *cf); +static char *ngx_http_upstream_cons_hash(ngx_conf_t *cf, ngx_command_t *cmd, + void *conf); +static ngx_int_t ngx_http_upstream_init_cons_hash(ngx_conf_t *cf, + ngx_http_upstream_srv_conf_t *us); + +static ngx_int_t ngx_http_upstream_init_cons_hash_peer(ngx_http_request_t *r, + ngx_http_upstream_srv_conf_t *us); +static ngx_int_t ngx_http_upstream_get_cons_hash_peer( + ngx_peer_connection_t *pc, void *data); + +static ngx_uint_t ngx_http_upstream_find_cons_hash_peer( + ngx_http_upstream_cons_hash_conf_t *chcf, uint32_t hash); +static int ngx_http_upstream_cons_hash_cmp_dist(const void *one, + const void *two); +static int ngx_http_upstream_cons_hash_cmp_node(const void *one, + const void *two); +static ngx_int_t ngx_http_upstream_cons_hash_random( + ngx_http_upstream_cons_hash_conf_t *chcf, ngx_str_t value, ngx_uint_t id, + uint32_t *ret); +static ngx_int_t ngx_http_upstream_cons_hash_init_nearest( + ngx_http_upstream_cons_hash_conf_t *chcf); + +inline static ngx_int_t ngx_http_upstream_get_cons_hash_try_peer( + ngx_peer_connection_t *pc, void *data, ngx_uint_t index); + + +static ngx_command_t ngx_http_upstream_cons_hash_commands[] = { + + { ngx_string("cons_hash"), + NGX_HTTP_UPS_CONF | NGX_CONF_TAKE1, + ngx_http_upstream_cons_hash, + 0, + 0, + NULL }, + + ngx_null_command +}; + + +static ngx_http_module_t ngx_http_upstream_cons_hash_module_ctx = { + NULL, /* preconfiguration */ + NULL, /* postconfiguration */ + + NULL, /* create main configuration */ + NULL, /* init main configuration */ + + ngx_http_upstream_cons_hash_create_conf, /* create server configuration*/ + NULL, /* merge server configuration */ + + NULL, /* create location configuration */ + NULL /* merge location configuration */ +}; + + +ngx_module_t ngx_http_upstream_cons_hash_module = { + NGX_MODULE_V1, + &ngx_http_upstream_cons_hash_module_ctx, /* module context */ + ngx_http_upstream_cons_hash_commands, /* module directives */ + NGX_HTTP_MODULE, /* module type */ + NULL, /* init master */ + NULL, /* init module */ + NULL, /* init process */ + NULL, /* init thread */ + NULL, /* exit thread */ + NULL, /* exit process */ + NULL, /* exit master */ + NGX_MODULE_V1_PADDING +}; + + +static void * +ngx_http_upstream_cons_hash_create_conf(ngx_conf_t *cf) +{ + ngx_http_upstream_cons_hash_conf_t *conf; + + conf = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_cons_hash_conf_t)); + if (conf == NULL) { + return NULL; + } + + return conf; +} + + +static ngx_int_t +ngx_http_upstream_init_cons_hash(ngx_conf_t *cf, + ngx_http_upstream_srv_conf_t *us) +{ + uint32_t hash; + ngx_int_t rc; + ngx_str_t name; + ngx_uint_t i, j, k, n, nn, m; + ngx_http_upstream_rr_peers_t *peers; + ngx_http_upstream_cons_hash_conf_t *chcf; + + if (ngx_http_upstream_init_round_robin(cf, us) == NGX_ERROR) { + return NGX_ERROR; + } + + peers = us->peer.data; + + n = peers->number; + nn = 0; + + for (i = 0; i < n; ++i) { + nn += peers->peer[i].weight; + } + + nn *= (NGX_HTTP_UPSTREAM_CH_VNODE_NUM + 1); + + chcf = ngx_http_conf_upstream_srv_conf(us, + ngx_http_upstream_cons_hash_module); + + /* + * to guarantee nn % n == 0, there's no side effect, + * but much more convenient to construct the 'nearest' + */ + + nn = (nn + n - 1) / n * n; + chcf->node_number = nn; + + chcf->log = cf->log; + chcf->pool = cf->pool; + chcf->peers = peers; + + chcf->nodes = ngx_pcalloc(cf->pool, nn * + sizeof(ngx_http_upstream_cons_hash_node_t)); + if (chcf->nodes == NULL) { + return NGX_ERROR; + } + + for (i = 0, k = 0; i < n; ++i) { + + name = peers->peer[i].name; + m = peers->peer[i].weight * (1 + NGX_HTTP_UPSTREAM_CH_VNODE_NUM); + + for (j = 0; j < m; ++j, ++k) { + + chcf->nodes[k].index = i; + + rc = ngx_http_upstream_cons_hash_random(chcf, name, j, &hash); + if (rc == NGX_ERROR) { + return NGX_ERROR; + } + + chcf->nodes[k].hash = hash; + } + } + + for (i = 0; i < nn - k; ++i) { + chcf->nodes[k + i].index = chcf->nodes[0].index; + chcf->nodes[k + i].hash = chcf->nodes[0].hash; + } + + ngx_qsort(chcf->nodes, nn, sizeof(ngx_http_upstream_cons_hash_node_t), + ngx_http_upstream_cons_hash_cmp_node); + + rc = ngx_http_upstream_cons_hash_init_nearest(chcf); + if (rc == NGX_ERROR) { + return NGX_ERROR; + } + + us->peer.init = ngx_http_upstream_init_cons_hash_peer; + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_upstream_init_cons_hash_peer(ngx_http_request_t *r, + ngx_http_upstream_srv_conf_t *us) +{ + uint32_t hash; + ngx_str_t raw_value; + ngx_http_upstream_cons_hash_conf_t *chcf; + ngx_http_upstream_ch_peer_data_t *chp; + + chcf = ngx_http_conf_upstream_srv_conf(us, + ngx_http_upstream_cons_hash_module); + if (chcf == NULL) { + return NGX_ERROR; + } + + chp = ngx_pcalloc(r->pool, sizeof(ngx_http_upstream_ch_peer_data_t)); + if (chp == NULL) { + return NGX_ERROR; + } + + r->upstream->peer.data = &chp->rrp; + + if (ngx_http_upstream_init_round_robin_peer(r, us) != NGX_OK) { + return NGX_ERROR; + } + + if (!chp->rrp.peers->single) { + + /* raw_value.data is allocated in ngx_http_script_run from r->pool */ + + if (ngx_http_script_run(r, &raw_value, + chcf->lengths->elts, 0, chcf->values->elts) == NULL) { + return NGX_ERROR; + } + + hash = ngx_murmur_hash2(raw_value.data, raw_value.len); + + ngx_pfree(r->pool, raw_value.data); + + chp->found = ngx_http_upstream_find_cons_hash_peer(chcf, hash); + } + + r->upstream->peer.get = ngx_http_upstream_get_cons_hash_peer; + + chp->chcf = chcf; + chp->get_rr_peer = ngx_http_upstream_get_round_robin_peer; + + return NGX_OK; +} + + +inline static ngx_int_t +ngx_http_upstream_get_cons_hash_try_peer(ngx_peer_connection_t *pc, void *data, + ngx_uint_t index) +{ + ngx_http_upstream_ch_peer_data_t *chp = data; + + time_t now; + ngx_int_t rc; + ngx_uint_t n, m; + ngx_http_upstream_rr_peer_t *peer; + + n = index / (8 * sizeof(uintptr_t)); + m = (uintptr_t) 1 << index % (8 * sizeof(uintptr_t)); + + if (chp->rrp.tried[n] & m) { + return NGX_AGAIN; + } + + rc = NGX_AGAIN; + + now = ngx_time(); + + peer = &chp->chcf->peers->peer[index]; + + /* ngx_lock_mutex(chp->rrp.peers->mutex); */ + + if (!peer->down) { + if (peer->max_fails == 0 || peer->fails < peer->max_fails) { + rc = NGX_OK; + } + + if (now - peer->checked > peer->fail_timeout) { + peer->checked = now; + rc = NGX_OK; + } + } + + if (rc == NGX_OK) { + chp->rrp.current = index; + + pc->sockaddr = peer->sockaddr; + pc->socklen = peer->socklen; + pc->name = &peer->name; + } + + /* ngx_unlock_mutex(chp->rrp.peers->mutex); */ + + chp->rrp.tried[n] |= m; + + return rc; +} + + +static ngx_int_t +ngx_http_upstream_get_cons_hash_peer(ngx_peer_connection_t *pc, void *data) +{ + ngx_http_upstream_ch_peer_data_t *chp = data; + + ngx_int_t rc; + ngx_uint_t i, j, n, nn; + ngx_uint_t *nearest; + ngx_http_upstream_rr_peers_t *peers; + ngx_http_upstream_cons_hash_node_t *nodes; + + if (chp->rrp.peers->single) { + return chp->get_rr_peer(pc, &chp->rrp); + } + + pc->cached = 0; + pc->connection = NULL; + + peers = chp->chcf->peers; + nodes = chp->chcf->nodes; + nearest = chp->chcf->nearest; + + n = peers->number; + nn = chp->chcf->node_number; + + for (i = chp->found; i % n != 0; i = (i + 1) % nn) { + + rc = ngx_http_upstream_get_cons_hash_try_peer(pc, data, + nodes[i].index); + if (rc == NGX_OK) { + return NGX_OK; + } + } + + for (j = (i + n) % nn; i != j; i = (i + 1) % nn) { + + rc = ngx_http_upstream_get_cons_hash_try_peer(pc, data, + nearest[i]); + if (rc == NGX_OK) { + return NGX_OK; + } + } + + /* all peers failed, mark them as live for quick recovery */ + + for (i = 0; i < peers->number; i++) { + peers->peer[i].fails = 0; + } + + pc->name = peers->name; + + return NGX_BUSY; +} + + +static ngx_uint_t +ngx_http_upstream_find_cons_hash_peer(ngx_http_upstream_cons_hash_conf_t *chcf, + uint32_t hash) +{ + uint32_t mid_hash; + ngx_int_t l, r, mid; + + l = 0; + r = chcf->node_number - 1; + + while (l <= r) { + mid = (l + r) >> 1; + mid_hash = chcf->nodes[mid].hash; + + if (mid_hash < hash) { + l = mid + 1; + } else { + r = mid - 1; + } + } + + if (l == (ngx_int_t)chcf->node_number) { + l = 0; + } + + return l; +} + + +static char * +ngx_http_upstream_cons_hash(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) +{ + ngx_str_t *value; + ngx_http_script_compile_t sc; + ngx_http_upstream_srv_conf_t *uscf; + ngx_http_upstream_cons_hash_conf_t *chcf; + + uscf = ngx_http_conf_get_module_srv_conf(cf, ngx_http_upstream_module); + + chcf = ngx_http_conf_upstream_srv_conf(uscf, + ngx_http_upstream_cons_hash_module); + if (uscf->peer.init_upstream) { + ngx_conf_log_error(NGX_LOG_WARN, cf, 0, + "load balancing method redefined"); + } + uscf->peer.init_upstream = ngx_http_upstream_init_cons_hash; + + value = cf->args->elts; + ngx_memzero(&sc, sizeof(ngx_http_script_compile_t)); + + sc.cf = cf; + sc.source = &value[1]; + sc.lengths = &chcf->lengths;; + sc.values = &chcf->values; + sc.complete_lengths = 1; + sc.complete_values = 1; + + if (ngx_http_script_compile(&sc) != NGX_OK) { + return NGX_CONF_ERROR; + } + + uscf->flags = NGX_HTTP_UPSTREAM_CREATE + |NGX_HTTP_UPSTREAM_WEIGHT + |NGX_HTTP_UPSTREAM_MAX_FAILS + |NGX_HTTP_UPSTREAM_FAIL_TIMEOUT + |NGX_HTTP_UPSTREAM_DOWN; + + return NGX_CONF_OK; +} + + +static ngx_int_t +ngx_http_upstream_cons_hash_random(ngx_http_upstream_cons_hash_conf_t *chcf, + ngx_str_t value, ngx_uint_t id, uint32_t *ret) +{ + /* repeatable random with same (val, id) */ + + u_char *buf, *pos; + ngx_uint_t total; + + total = NGX_INT_T_LEN + value.len; + + buf = ngx_calloc(total, chcf->log); + if (buf == NULL) { + return NGX_ERROR; + } + + pos = ngx_snprintf(buf, total, "%i-%*s", id, value.len, value.data); + + *ret = ngx_murmur_hash2(buf, pos - buf); + + ngx_free(buf); + + return NGX_OK; +} + + +static ngx_int_t +ngx_http_upstream_cons_hash_init_nearest( + ngx_http_upstream_cons_hash_conf_t *chcf) +{ + ngx_int_t k; + ngx_uint_t i, j, n, nn; + ngx_uint_t *nearest, *temp; + ngx_http_upstream_cons_hash_node_t *nodes; + + n = chcf->peers->number; + nn = chcf->node_number; + + nodes = chcf->nodes; + + nearest = ngx_pcalloc(chcf->pool, nn * sizeof(ngx_uint_t)); + if (nearest == NULL) { + return NGX_ERROR; + } + + chcf->nearest = nearest; + + temp = ngx_pcalloc(chcf->pool, n * sizeof(ngx_uint_t)); + if (temp == NULL) { + return NGX_ERROR; + } + + for (i = 0; i < nn; ++i) { + nearest[i] = nn; + } + + for (i = 0; i < nn; i += n) { + for (j = 0; j < n; ++j) { + temp[j] = nn; + } + for (k = n - 1; k >= 0; --k) { + temp[nodes[i + k].index] = i + k; + } + for (j = 0; j < n; ++j) { + nearest[i + j] = temp[j]; + } + } + + ngx_pfree(chcf->pool, temp); + + /* update the 'nearest' twice */ + + for (i = 0; i < 2; ++i) { + for (k = nn - n; k >= 0; k -= n) { + for (j = 0; j < n; ++j) { + if (nearest[k + j] == nn) { + nearest[k + j] = nearest[(k + j + n) % nn]; + } + } + } + } + + for (i = 0; i < nn; i += n) { + + /* there is no elt equals to nn in the 'nearest' now */ + + for (j = 0; j < n; ++j) { + if (nearest[i + j] < i) { + nearest[i + j] += nn; + } + } + + ngx_qsort(nearest + i, n, sizeof(ngx_uint_t), + ngx_http_upstream_cons_hash_cmp_dist); + + for (j = 0; j < n; ++j) { + if (nearest[i + j] >= nn) { + nearest[i + j] -= nn; + } + } + } + + for (i = 0; i < nn; ++i) { + nearest[i] = nodes[nearest[i]].index; + } + + return NGX_OK; +} + + +static int +ngx_http_upstream_cons_hash_cmp_dist(const void *one, const void *two) +{ + ngx_uint_t first, second; + + first = *(ngx_uint_t *)one; + second = *(ngx_uint_t *)two; + + if (first < second) { + return -1; + } + + if (first > second) { + return 1; + } + + return 0; +} + + +static int +ngx_http_upstream_cons_hash_cmp_node(const void *one, const void *two) +{ + ngx_http_upstream_cons_hash_node_t *first, *second; + + first = (ngx_http_upstream_cons_hash_node_t *) one; + second = (ngx_http_upstream_cons_hash_node_t *) two; + + if (first->hash < second->hash) { + return -1; + } + + if (first->hash > second->hash) { + return 1; + } + + return 0; +}
_______________________________________________ nginx-devel mailing list nginx-devel@nginx.org http://mailman.nginx.org/mailman/listinfo/nginx-devel