This is an automated email from the ASF dual-hosted git repository. membphis pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push: new e58a55f fix(chash): ensure retry can try every node (#3651) e58a55f is described below commit e58a55fbc6e5ad2007ff206ac44096945c869390 Author: 罗泽轩 <spacewander...@gmail.com> AuthorDate: Sat Feb 27 22:17:22 2021 +0800 fix(chash): ensure retry can try every node (#3651) Previously the default number of retry is equal to the number of node, but the same node will be tried again according to its weight. Also ensure the same picker will be used in the whole request, especially during the retry. --- apisix/balancer.lua | 8 +++++-- apisix/balancer/chash.lua | 39 ++++++++++++++++++++++++++++-- t/admin/balancer.t | 2 ++ t/node/chash-balance.t | 60 +++++++++++++++++++++++++++++++++++++++++++++++ t/node/healthcheck.t | 2 +- 5 files changed, 106 insertions(+), 5 deletions(-) diff --git a/apisix/balancer.lua b/apisix/balancer.lua index 2540f2f..4be56fc 100644 --- a/apisix/balancer.lua +++ b/apisix/balancer.lua @@ -161,8 +161,12 @@ local function pick_server(route, ctx) version = version .. "#" .. checker.status_ver end - local server_picker = lrucache_server_picker(key, version, - create_server_picker, up_conf, checker) + -- the same picker will be used in the whole request, especially during the retry + local server_picker = ctx.server_picker + if not server_picker then + server_picker = lrucache_server_picker(key, version, + create_server_picker, up_conf, checker) + end if not server_picker then return nil, "failed to fetch server picker" end diff --git a/apisix/balancer/chash.lua b/apisix/balancer/chash.lua index df1568a..f9dbdbb 100644 --- a/apisix/balancer/chash.lua +++ b/apisix/balancer/chash.lua @@ -22,6 +22,9 @@ local str_gsub = string.gsub local pairs = pairs +local CONSISTENT_POINTS = 160 -- points per server, taken from `resty.chash` + + local _M = {} @@ -62,27 +65,59 @@ end function _M.new(up_nodes, upstream) local str_null = str_char(0) + local nodes_count = 0 + local safe_limit = 0 local servers, nodes = {}, {} for serv, weight in pairs(up_nodes) do local id = str_gsub(serv, ":", str_null) + nodes_count = nodes_count + 1 + safe_limit = safe_limit + weight servers[id] = serv nodes[id] = weight end + safe_limit = safe_limit * CONSISTENT_POINTS local picker = resty_chash:new(nodes) return { upstream = upstream, get = function (ctx) local id - if ctx.balancer_try_count > 1 and ctx.chash_last_server_index then - id, ctx.chash_last_server_index = picker:next(ctx.chash_last_server_index) + if ctx.balancer_tried_servers then + if ctx.balancer_tried_servers_count == nodes_count then + return nil, "all upstream servers tried" + end + + -- the 'safe_limit' is a best effort limit to prevent infinite loop caused by bug + for i = 1, safe_limit do + id, ctx.chash_last_server_index = picker:next(ctx.chash_last_server_index) + if not ctx.balancer_tried_servers[servers[id]] then + break + end + end else local chash_key = fetch_chash_hash_key(ctx, upstream) id, ctx.chash_last_server_index = picker:find(chash_key) end -- core.log.warn("chash id: ", id, " val: ", servers[id]) return servers[id] + end, + after_balance = function (ctx, before_retry) + if not before_retry then + if ctx.balancer_tried_servers then + core.tablepool.release("balancer_tried_servers", ctx.balancer_tried_servers) + ctx.balancer_tried_servers = nil + end + + return nil + end + + if not ctx.balancer_tried_servers then + ctx.balancer_tried_servers = core.tablepool.fetch("balancer_tried_servers", 0, 2) + end + + ctx.balancer_tried_servers[ctx.balancer_server] = true + ctx.balancer_tried_servers_count = (ctx.balancer_tried_servers_count or 0) + 1 end } end diff --git a/t/admin/balancer.t b/t/admin/balancer.t index b9a76c5..d1b9027 100644 --- a/t/admin/balancer.t +++ b/t/admin/balancer.t @@ -52,6 +52,8 @@ add_block_preprocessor(sub { for _, key in ipairs(keys) do ngx.say("host: ", key, " count: ", res[key]) end + + ctx.server_picker = nil end _EOC_ $block->set_value("init_by_lua_block", $init_by_lua_block); diff --git a/t/node/chash-balance.t b/t/node/chash-balance.t index 02eee5f..dfde2aa 100644 --- a/t/node/chash-balance.t +++ b/t/node/chash-balance.t @@ -496,3 +496,63 @@ GET /t --- error_code_like: ^(?:50\d)$ --- error_log failed to find valid upstream server, no valid upstream node + + + +=== TEST 13: set route(ensure retry can try every node) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/server_port", + "upstream": { + "key": "arg_device_id", + "type": "chash", + "nodes": { + "127.0.0.1:1979": 1000, + "127.0.0.1:1980": 1 + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 14: hit routes +--- config + location /t { + content_by_lua_block { + local http = require "resty.http" + local uri = "http://127.0.0.1:" .. ngx.var.server_port + .. "/server_port?device_id=1" + + local httpc = http.new() + local res, err = httpc:request_uri(uri, {method = "GET"}) + if not res then + ngx.say(err) + return + end + + ngx.say(res.status) + } + } +--- request +GET /t +--- response_body +200 diff --git a/t/node/healthcheck.t b/t/node/healthcheck.t index d9bacb8..9f77a43 100644 --- a/t/node/healthcheck.t +++ b/t/node/healthcheck.t @@ -488,7 +488,7 @@ qr{.*http://127.0.0.1:1960/server_port.* .*http://127.0.0.1:1961/server_port.* .*http://127.0.0.1:1961/server_port.* .*http://127.0.0.1:1961/server_port.* -.*http://127.0.0.1:1960/server_port.*} +.*http://127.0.0.1:1961/server_port.*} --- timeout: 10