This is an automated email from the ASF dual-hosted git repository.

nic-6443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git


The following commit(s) were added to refs/heads/master by this push:
     new 93ace5d5c fix(ai-proxy): preserve upstream 429/5xx error response body 
(#13565)
93ace5d5c is described below

commit 93ace5d5cdd62074d29228b9637ae18b3c653f65
Author: Nic <[email protected]>
AuthorDate: Wed Jun 17 13:57:14 2026 +0800

    fix(ai-proxy): preserve upstream 429/5xx error response body (#13565)
---
 apisix/plugins/ai-proxy-multi.lua        | 12 ++++--
 apisix/plugins/ai-proxy/base.lua         | 28 +++++++++++++-
 docs/en/latest/plugins/ai-proxy-multi.md |  7 ++++
 docs/en/latest/plugins/ai-proxy.md       |  4 ++
 t/plugin/ai-proxy-multi-retry.t          |  7 +++-
 t/plugin/ai-proxy.t                      | 64 ++++++++++++++++++++++++++++++++
 6 files changed, 115 insertions(+), 7 deletions(-)

diff --git a/apisix/plugins/ai-proxy-multi.lua 
b/apisix/plugins/ai-proxy-multi.lua
index 4ba204ecc..9b8f2eab5 100644
--- a/apisix/plugins/ai-proxy-multi.lua
+++ b/apisix/plugins/ai-proxy-multi.lua
@@ -641,7 +641,7 @@ function _M.access(conf, ctx)
 end
 
 
-local function retry_on_error(ctx, conf, code)
+local function retry_on_error(ctx, conf, code, body)
     if not ctx.server_picker then
         return code
     end
@@ -676,11 +676,17 @@ local function retry_on_error(ctx, conf, code)
             end
         end
 
+        local failed_instance = ctx.picked_ai_instance_name
         local name, ai_instance, err = pick_ai_instance(ctx, conf)
         if err then
             core.log.error("failed to pick new AI instance: ", err)
             return 502
         end
+        -- The failed attempt's body never reaches the client (a later attempt
+        -- responds instead), so surface the upstream error here for 
diagnostics.
+        core.log.warn("ai instance ", failed_instance, " returned status ", 
code,
+                      ", falling back to ", name, ". upstream error body: ",
+                      body or "")
         ctx.balancer_ip = name
         ctx.picked_ai_instance_name = name
         ctx.picked_ai_instance = ai_instance
@@ -751,8 +757,8 @@ end
 
 
 function _M.before_proxy(conf, ctx)
-     return base.before_proxy(conf, ctx, function (ctx, conf, code)
-        return retry_on_error(ctx, conf, code)
+     return base.before_proxy(conf, ctx, function (ctx, conf, code, body)
+        return retry_on_error(ctx, conf, code, body)
     end)
 end
 
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 80c025f7a..cb258ff30 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -55,6 +55,21 @@ local function resolve_cap(cap_entry, key, conf, ctx)
     return val
 end
 
+
+-- Read the upstream error response body (429/5xx) so the provider's error
+-- details are not discarded: they are logged on fallback and returned to the
+-- client when no retry happens. Error bodies are small, so a single 
read_body()
+-- is enough. Sets res._upstream_bytes for upstream-state accounting.
+local function read_upstream_error_body(res)
+    local body, err = res:read_body()
+    if not body then
+        core.log.warn("failed to read upstream error response body: ", err)
+        return nil
+    end
+    res._upstream_bytes = #body
+    return body
+end
+
 function _M.set_logging(ctx, summaries, payloads)
     if summaries then
         ctx.llm_summary = {
@@ -298,15 +313,24 @@ function _M.before_proxy(conf, ctx, on_error)
             core.response.set_response_source(ctx, "upstream")
 
             if res.status == 429 or (res.status >= 500 and res.status < 600) 
then
+                -- Read the upstream error body before closing so the 
provider's
+                -- error details survive: logged on fallback (see 
retry_on_error)
+                -- and returned to the client when no retry happens.
+                local error_body = read_upstream_error_body(res)
+                local content_type = res.headers["Content-Type"]
+                if content_type then
+                    core.response.set_header("Content-Type", content_type)
+                end
                 if res._t0 then
                     apisix_upstream.update_upstream_state({
                         response_time = (ngx_now() - res._t0) * 1000,
+                        response_length = res._upstream_bytes or 0,
                     })
                 end
                 if res._httpc then
                     res._httpc:close()
                 end
-                return res.status
+                return res.status, error_body
             end
 
             local body_reader = res.body_reader
@@ -380,7 +404,7 @@ function _M.before_proxy(conf, ctx, on_error)
             return 500
         end
         if code_or_err and on_error then
-            local abort_code = on_error(ctx, conf, code_or_err)
+            local abort_code = on_error(ctx, conf, code_or_err, body)
             if abort_code then
                 return abort_code, body
             end
diff --git a/docs/en/latest/plugins/ai-proxy-multi.md 
b/docs/en/latest/plugins/ai-proxy-multi.md
index 0368c5467..d95f9c26e 100644
--- a/docs/en/latest/plugins/ai-proxy-multi.md
+++ b/docs/en/latest/plugins/ai-proxy-multi.md
@@ -137,6 +137,13 @@ By default, `ai-proxy-multi` forwards the incoming client 
request headers to the
 
 Because the LLM upstream is often a third-party service, be aware that any 
header the client sends (for example `Authorization`, `Cookie`, or internal 
application headers) is forwarded to that provider unless it is overridden by 
`auth.header`. If the client should not expose certain headers to the LLM 
provider, strip them before the request reaches `ai-proxy-multi`, for example 
with the [`proxy-rewrite`](./proxy-rewrite.md) plugin.
 
+## Upstream Error Responses
+
+When the selected LLM upstream returns a `429` or `5xx` status, 
`ai-proxy-multi` reads the upstream error body before deciding whether to fall 
back:
+
+- If the request is retried on another instance (per `fallback_strategy`, 
`max_retries`, and `retry_on_failure_within_ms`), the failed instance's error 
body is recorded in the error log for diagnostics, since a later attempt's 
response is sent to the client instead.
+- If the request is not retried (no matching `fallback_strategy`, retries 
exhausted, or the failure took longer than `retry_on_failure_within_ms`), the 
upstream status code and error body are returned to the client, preserving the 
upstream `Content-Type`.
+
 ## Examples
 
 The examples below demonstrate how you can configure `ai-proxy-multi` for 
different scenarios.
diff --git a/docs/en/latest/plugins/ai-proxy.md 
b/docs/en/latest/plugins/ai-proxy.md
index 642652e81..e0d78e102 100644
--- a/docs/en/latest/plugins/ai-proxy.md
+++ b/docs/en/latest/plugins/ai-proxy.md
@@ -145,6 +145,10 @@ By default, `ai-proxy` forwards the incoming client 
request headers to the confi
 
 Because the LLM upstream is often a third-party service, be aware that any 
header the client sends (for example `Authorization`, `Cookie`, or internal 
application headers) is forwarded to that provider unless it is overridden by 
`auth.header`. If the client should not expose certain headers to the LLM 
provider, strip them before the request reaches `ai-proxy`, for example with 
the [`proxy-rewrite`](./proxy-rewrite.md) plugin.
 
+## Upstream Error Responses
+
+When the LLM upstream returns a `429` or `5xx` status, `ai-proxy` reads the 
upstream error body and returns it to the client together with the upstream 
status code and `Content-Type`, so provider-side error details (such as 
rate-limit information or validation errors) are not discarded.
+
 ## Examples
 
 The examples below demonstrate how you can configure `ai-proxy` for different 
scenarios.
diff --git a/t/plugin/ai-proxy-multi-retry.t b/t/plugin/ai-proxy-multi-retry.t
index a60fd7e10..e09bfc340 100644
--- a/t/plugin/ai-proxy-multi-retry.t
+++ b/t/plugin/ai-proxy-multi-retry.t
@@ -164,13 +164,15 @@ passed
 
 
 
-=== TEST 4: fast failure falls back to the healthy instance
+=== TEST 4: fast failure falls back to the healthy instance and logs the 
upstream error body
 --- request
 POST /anything
 { "messages": [ { "role": "user", "content": "What is 1+1?"} ] }
 --- response_body chomp
 success
 --- error_code: 200
+--- error_log
+fast internal error
 
 
 
@@ -207,10 +209,11 @@ passed
 
 
 
-=== TEST 6: slow failure does not fall back and returns the upstream error
+=== TEST 6: slow failure does not fall back and returns the upstream error 
body to the client
 --- request
 POST /anything
 { "messages": [ { "role": "user", "content": "What is 1+1?"} ] }
 --- error_code: 500
+--- response_body_like: slow internal error
 --- error_log
 exceeding retry_on_failure_within_ms 200
diff --git a/t/plugin/ai-proxy.t b/t/plugin/ai-proxy.t
index e73e41a86..24658a5b7 100644
--- a/t/plugin/ai-proxy.t
+++ b/t/plugin/ai-proxy.t
@@ -1263,3 +1263,67 @@ got token usage from ai service:
     }
 --- response_body
 OK: auth.query is clean
+
+
+
+=== TEST 38: set route to an upstream that returns 5xx with an error body
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai-compatible",
+                            "auth": {
+                                "header": {
+                                    "Authorization": "Bearer token"
+                                }
+                            },
+                            "options": {
+                                "model": "custom"
+                            },
+                            "override": {
+                                "endpoint": 
"http://127.0.0.1:6725/v1/chat/completions";
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 39: single-instance ai-proxy returns the upstream 5xx error body to 
the client
+--- http_config
+    server {
+        server_name internal_error;
+        listen 6725;
+        default_type 'application/json';
+        location / {
+            content_by_lua_block {
+                ngx.status = 500
+                ngx.say([[{ "error": {"message":"upstream boom"}}]])
+                return
+            }
+        }
+    }
+--- request
+POST /anything
+{ "messages": [ { "role": "user", "content": "What is 1+1?"} ] }
+--- error_code: 500
+--- response_body_like: upstream boom
+--- response_headers
+Content-Type: application/json

Reply via email to