shuosiw opened a new issue, #12923:
URL: https://github.com/apache/apisix/issues/12923
### Description
Help: 503 errors triggered when scaling down an upstream whose traffic-split
weight is 0
We use the traffic-split plugin for canary releases in production. The steps
are:
1. Update the route so that all traffic is forwarded to the Service of the
production deployment, and configure traffic-split to send 100% of the traffic
to the default route.
2. Start the canary deployment, and use traffic-split to send 30% of the
traffic to the Service of the canary deployment.
3. After verification, perform a rolling update on the production service.
Once it’s completed, adjust traffic-split so that all traffic is routed back to
the production service.
4. Wait for 30 seconds, then scale the replicas of the canary deployment
down to 0.
In step 4 above, we occasionally see several 503 errors in the production
environment, but once all Pods of the canary deployment have finished scaling
down, the 503 errors no longer occur.
```
2026/01/14 16:43:30 [error] 60#60: *154846901 [lua] init.lua:541:
handle_upstream(): failed to set upstream: no valid upstream node: nil, client:
10.x.x.x, server: _, request: "GET /xxx/sdk/server_time HTTP/1.1", host:
"xxx.com"
...
```
The corresponding route configuration is as follows:
```
{
"uri": "/*",
"name": "xxx-prod",
"desc": "xxx项目正式环境路由",
"methods": [
"GET",
"POST",
"PUT",
"DELETE",
"PATCH",
"HEAD",
"OPTIONS",
"CONNECT",
"TRACE",
"PURGE"
],
"host": "xxxx.com",
"plugins": {
"traffic-split": {
"rules": [
{
"weighted_upstreams": [
{
"upstream_id": "00000000000000004385",
"weight": 0
},
{
"weight": 100
}
]
}
]
}
},
"upstream": {
"retries": 2,
"timeout": {
"connect": 6,
"send": 60,
"read": 60
},
"type": "roundrobin",
"checks": {
"active": {
"concurrency": 10,
"healthy": {
"http_statuses": [
200,
302,
404
],
"interval": 5,
"successes": 2
},
"http_path": "/ping",
"https_verify_certificate": true,
"timeout": 3,
"type": "http",
"unhealthy": {
"http_failures": 3,
"http_statuses": [
500,
501,
502,
503,
504,
505
],
"interval": 5,
"tcp_failures": 2,
"timeouts": 3
}
}
},
"hash_on": "vars",
"scheme": "http",
"discovery_type": "kubernetes",
"pass_host": "pass",
"service_name": "ns-xxx-prod/xxx-prod:http",
"keepalive_pool": {
"idle_timeout": 60,
"requests": 1000,
"size": 16
}
},
"labels": {
"apisix": "apisix-prod",
"appinstance": "xxx-prod",
},
"status": 1
}
```
The configuration of upstream 00000000000000004385 is as follows:
```
{
"retries": 2,
"timeout": {
"connect": 6,
"send": 60,
"read": 60
},
"type": "roundrobin",
"hash_on": "vars",
"scheme": "http",
"discovery_type": "kubernetes",
"pass_host": "pass",
"name": "xxxx-prod-canary",
"desc": "xxx生产环境路由——灰度",
"service_name": "ns-xxx-prod/xxx-prod-canary:http",
"keepalive_pool": {
"idle_timeout": 60,
"requests": 1000,
"size": 16
}
}
```
### Environment
- APISIX version : 3.10.0
- Operating system
```
6.1.0-18-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.76-1 (2024-02-01) x86_64
GNU/Linux
```
- OpenResty / Nginx version
```
nginx version: openresty/1.25.3.2
built by gcc 10.2.1 20210110 (Debian 10.2.1-6)
built with OpenSSL 3.2.0 23 Nov 2023
TLS SNI support enabled
configure arguments: --prefix=/usr/local/openresty/nginx --with-cc-opt='-O2
-DAPISIX_RUNTIME_VER=1.2.1 -DNGX_LUA_ABORT_AT_PANIC
-I/usr/local/openresty/zlib/include -I/usr/local/openresty/pcre/include
-I/usr/local/openresty/openssl3/include' --add-module=../ngx_devel_kit-0.3.3
--add-module=../echo-nginx-module-0.63 --add-module=../xss-nginx-module-0.06
--add-module=../ngx_coolkit-0.2 --add-module=../set-misc-nginx-module-0.33
--add-module=../form-input-nginx-module-0.12
--add-module=../encrypted-session-nginx-module-0.09
--add-module=../srcache-nginx-module-0.33 --add-module=../ngx_lua-0.10.26
--add-module=../ngx_lua_upstream-0.07
--add-module=../headers-more-nginx-module-0.37
--add-module=../array-var-nginx-module-0.06
--add-module=../memc-nginx-module-0.20 --add-module=../redis2-nginx-module-0.15
--add-module=../redis-nginx-module-0.3.9 --add-module=../ngx_stream_lua-0.0.14
--with-ld-opt='-Wl,-rpath,/usr/local/openresty/luajit/lib
-Wl,-rpath,/usr/local/openresty/wasmtime-c-api/li
b -L/usr/local/openresty/zlib/lib -L/usr/local/openresty/pcre/lib
-L/usr/local/openresty/openssl3/lib
-Wl,-rpath,/usr/local/openresty/zlib/lib:/usr/local/openresty/pcre/lib:/usr/local/openresty/openssl3/lib'
--add-module=/tmp/tmp.JZFDtqhgYj/openresty-1.25.3.2/../mod_dubbo-1.0.2
--add-module=/tmp/tmp.JZFDtqhgYj/openresty-1.25.3.2/../ngx_multi_upstream_module-1.2.0
--add-module=/tmp/tmp.JZFDtqhgYj/openresty-1.25.3.2/../apisix-nginx-module-1.16.1
--add-module=/tmp/tmp.JZFDtqhgYj/openresty-1.25.3.2/../apisix-nginx-module-1.16.1/src/stream
--add-module=/tmp/tmp.JZFDtqhgYj/openresty-1.25.3.2/../apisix-nginx-module-1.16.1/src/meta
--add-module=/tmp/tmp.JZFDtqhgYj/openresty-1.25.3.2/../wasm-nginx-module-0.7.0
--add-module=/tmp/tmp.JZFDtqhgYj/openresty-1.25.3.2/../lua-var-nginx-module-v0.5.3
--add-module=/tmp/tmp.JZFDtqhgYj/openresty-1.25.3.2/../lua-resty-events-0.2.0
--with-poll_module --with-pcre-jit --with-stream --with-stream_ssl_module
--with-stream_ssl_preread_module --with-http_v2_mod
ule --with-http_v3_module --without-mail_pop3_module
--without-mail_imap_module --without-mail_smtp_module
--with-http_stub_status_module --with-http_realip_module
--with-http_addition_module --with-http_auth_request_module
--with-http_secure_link_module --with-http_random_index_module
--with-http_gzip_static_module --with-http_sub_module --with-http_dav_module
--with-http_flv_module --with-http_mp4_module --with-http_gunzip_module
--with-threads --with-compat --with-stream --without-pcre2
--with-http_ssl_module
```
- etcd version: 3.5.16
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]