This is an automated email from the ASF dual-hosted git repository.
nic-6443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new 93ace5d5c fix(ai-proxy): preserve upstream 429/5xx error response body
(#13565)
93ace5d5c is described below
commit 93ace5d5cdd62074d29228b9637ae18b3c653f65
Author: Nic <[email protected]>
AuthorDate: Wed Jun 17 13:57:14 2026 +0800
fix(ai-proxy): preserve upstream 429/5xx error response body (#13565)
---
apisix/plugins/ai-proxy-multi.lua | 12 ++++--
apisix/plugins/ai-proxy/base.lua | 28 +++++++++++++-
docs/en/latest/plugins/ai-proxy-multi.md | 7 ++++
docs/en/latest/plugins/ai-proxy.md | 4 ++
t/plugin/ai-proxy-multi-retry.t | 7 +++-
t/plugin/ai-proxy.t | 64 ++++++++++++++++++++++++++++++++
6 files changed, 115 insertions(+), 7 deletions(-)
diff --git a/apisix/plugins/ai-proxy-multi.lua
b/apisix/plugins/ai-proxy-multi.lua
index 4ba204ecc..9b8f2eab5 100644
--- a/apisix/plugins/ai-proxy-multi.lua
+++ b/apisix/plugins/ai-proxy-multi.lua
@@ -641,7 +641,7 @@ function _M.access(conf, ctx)
end
-local function retry_on_error(ctx, conf, code)
+local function retry_on_error(ctx, conf, code, body)
if not ctx.server_picker then
return code
end
@@ -676,11 +676,17 @@ local function retry_on_error(ctx, conf, code)
end
end
+ local failed_instance = ctx.picked_ai_instance_name
local name, ai_instance, err = pick_ai_instance(ctx, conf)
if err then
core.log.error("failed to pick new AI instance: ", err)
return 502
end
+ -- The failed attempt's body never reaches the client (a later attempt
+ -- responds instead), so surface the upstream error here for
diagnostics.
+ core.log.warn("ai instance ", failed_instance, " returned status ",
code,
+ ", falling back to ", name, ". upstream error body: ",
+ body or "")
ctx.balancer_ip = name
ctx.picked_ai_instance_name = name
ctx.picked_ai_instance = ai_instance
@@ -751,8 +757,8 @@ end
function _M.before_proxy(conf, ctx)
- return base.before_proxy(conf, ctx, function (ctx, conf, code)
- return retry_on_error(ctx, conf, code)
+ return base.before_proxy(conf, ctx, function (ctx, conf, code, body)
+ return retry_on_error(ctx, conf, code, body)
end)
end
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 80c025f7a..cb258ff30 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -55,6 +55,21 @@ local function resolve_cap(cap_entry, key, conf, ctx)
return val
end
+
+-- Read the upstream error response body (429/5xx) so the provider's error
+-- details are not discarded: they are logged on fallback and returned to the
+-- client when no retry happens. Error bodies are small, so a single
read_body()
+-- is enough. Sets res._upstream_bytes for upstream-state accounting.
+local function read_upstream_error_body(res)
+ local body, err = res:read_body()
+ if not body then
+ core.log.warn("failed to read upstream error response body: ", err)
+ return nil
+ end
+ res._upstream_bytes = #body
+ return body
+end
+
function _M.set_logging(ctx, summaries, payloads)
if summaries then
ctx.llm_summary = {
@@ -298,15 +313,24 @@ function _M.before_proxy(conf, ctx, on_error)
core.response.set_response_source(ctx, "upstream")
if res.status == 429 or (res.status >= 500 and res.status < 600)
then
+ -- Read the upstream error body before closing so the
provider's
+ -- error details survive: logged on fallback (see
retry_on_error)
+ -- and returned to the client when no retry happens.
+ local error_body = read_upstream_error_body(res)
+ local content_type = res.headers["Content-Type"]
+ if content_type then
+ core.response.set_header("Content-Type", content_type)
+ end
if res._t0 then
apisix_upstream.update_upstream_state({
response_time = (ngx_now() - res._t0) * 1000,
+ response_length = res._upstream_bytes or 0,
})
end
if res._httpc then
res._httpc:close()
end
- return res.status
+ return res.status, error_body
end
local body_reader = res.body_reader
@@ -380,7 +404,7 @@ function _M.before_proxy(conf, ctx, on_error)
return 500
end
if code_or_err and on_error then
- local abort_code = on_error(ctx, conf, code_or_err)
+ local abort_code = on_error(ctx, conf, code_or_err, body)
if abort_code then
return abort_code, body
end
diff --git a/docs/en/latest/plugins/ai-proxy-multi.md
b/docs/en/latest/plugins/ai-proxy-multi.md
index 0368c5467..d95f9c26e 100644
--- a/docs/en/latest/plugins/ai-proxy-multi.md
+++ b/docs/en/latest/plugins/ai-proxy-multi.md
@@ -137,6 +137,13 @@ By default, `ai-proxy-multi` forwards the incoming client
request headers to the
Because the LLM upstream is often a third-party service, be aware that any
header the client sends (for example `Authorization`, `Cookie`, or internal
application headers) is forwarded to that provider unless it is overridden by
`auth.header`. If the client should not expose certain headers to the LLM
provider, strip them before the request reaches `ai-proxy-multi`, for example
with the [`proxy-rewrite`](./proxy-rewrite.md) plugin.
+## Upstream Error Responses
+
+When the selected LLM upstream returns a `429` or `5xx` status,
`ai-proxy-multi` reads the upstream error body before deciding whether to fall
back:
+
+- If the request is retried on another instance (per `fallback_strategy`,
`max_retries`, and `retry_on_failure_within_ms`), the failed instance's error
body is recorded in the error log for diagnostics, since a later attempt's
response is sent to the client instead.
+- If the request is not retried (no matching `fallback_strategy`, retries
exhausted, or the failure took longer than `retry_on_failure_within_ms`), the
upstream status code and error body are returned to the client, preserving the
upstream `Content-Type`.
+
## Examples
The examples below demonstrate how you can configure `ai-proxy-multi` for
different scenarios.
diff --git a/docs/en/latest/plugins/ai-proxy.md
b/docs/en/latest/plugins/ai-proxy.md
index 642652e81..e0d78e102 100644
--- a/docs/en/latest/plugins/ai-proxy.md
+++ b/docs/en/latest/plugins/ai-proxy.md
@@ -145,6 +145,10 @@ By default, `ai-proxy` forwards the incoming client
request headers to the confi
Because the LLM upstream is often a third-party service, be aware that any
header the client sends (for example `Authorization`, `Cookie`, or internal
application headers) is forwarded to that provider unless it is overridden by
`auth.header`. If the client should not expose certain headers to the LLM
provider, strip them before the request reaches `ai-proxy`, for example with
the [`proxy-rewrite`](./proxy-rewrite.md) plugin.
+## Upstream Error Responses
+
+When the LLM upstream returns a `429` or `5xx` status, `ai-proxy` reads the
upstream error body and returns it to the client together with the upstream
status code and `Content-Type`, so provider-side error details (such as
rate-limit information or validation errors) are not discarded.
+
## Examples
The examples below demonstrate how you can configure `ai-proxy` for different
scenarios.
diff --git a/t/plugin/ai-proxy-multi-retry.t b/t/plugin/ai-proxy-multi-retry.t
index a60fd7e10..e09bfc340 100644
--- a/t/plugin/ai-proxy-multi-retry.t
+++ b/t/plugin/ai-proxy-multi-retry.t
@@ -164,13 +164,15 @@ passed
-=== TEST 4: fast failure falls back to the healthy instance
+=== TEST 4: fast failure falls back to the healthy instance and logs the
upstream error body
--- request
POST /anything
{ "messages": [ { "role": "user", "content": "What is 1+1?"} ] }
--- response_body chomp
success
--- error_code: 200
+--- error_log
+fast internal error
@@ -207,10 +209,11 @@ passed
-=== TEST 6: slow failure does not fall back and returns the upstream error
+=== TEST 6: slow failure does not fall back and returns the upstream error
body to the client
--- request
POST /anything
{ "messages": [ { "role": "user", "content": "What is 1+1?"} ] }
--- error_code: 500
+--- response_body_like: slow internal error
--- error_log
exceeding retry_on_failure_within_ms 200
diff --git a/t/plugin/ai-proxy.t b/t/plugin/ai-proxy.t
index e73e41a86..24658a5b7 100644
--- a/t/plugin/ai-proxy.t
+++ b/t/plugin/ai-proxy.t
@@ -1263,3 +1263,67 @@ got token usage from ai service:
}
--- response_body
OK: auth.query is clean
+
+
+
+=== TEST 38: set route to an upstream that returns 5xx with an error body
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/anything",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai-compatible",
+ "auth": {
+ "header": {
+ "Authorization": "Bearer token"
+ }
+ },
+ "options": {
+ "model": "custom"
+ },
+ "override": {
+ "endpoint":
"http://127.0.0.1:6725/v1/chat/completions"
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 39: single-instance ai-proxy returns the upstream 5xx error body to
the client
+--- http_config
+ server {
+ server_name internal_error;
+ listen 6725;
+ default_type 'application/json';
+ location / {
+ content_by_lua_block {
+ ngx.status = 500
+ ngx.say([[{ "error": {"message":"upstream boom"}}]])
+ return
+ }
+ }
+ }
+--- request
+POST /anything
+{ "messages": [ { "role": "user", "content": "What is 1+1?"} ] }
+--- error_code: 500
+--- response_body_like: upstream boom
+--- response_headers
+Content-Type: application/json