This is an automated email from the ASF dual-hosted git repository.
nic-6443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new 17b9b56b8 fix(ai-proxy): populate $upstream_response_length for
cosocket transport (#13323)
17b9b56b8 is described below
commit 17b9b56b8fcd42a828cab435bbe07a759797571e
Author: Nic <[email protected]>
AuthorDate: Thu Apr 30 18:57:32 2026 +0800
fix(ai-proxy): populate $upstream_response_length for cosocket transport
(#13323)
---
apisix/plugins/ai-providers/base.lua | 24 +++++++-----
apisix/plugins/ai-proxy/base.lua | 1 +
docs/en/latest/plugins/ai-proxy.md | 1 +
t/plugin/ai-proxy-upstream-vars.t | 72 ++++++++++++++++++++++++++++++++++++
4 files changed, 89 insertions(+), 9 deletions(-)
diff --git a/apisix/plugins/ai-providers/base.lua
b/apisix/plugins/ai-providers/base.lua
index 253e048f6..17d9238da 100644
--- a/apisix/plugins/ai-providers/base.lua
+++ b/apisix/plugins/ai-providers/base.lua
@@ -294,6 +294,7 @@ function _M.parse_response(self, ctx, res, client_proto,
converter, conf)
res._httpc:close()
res._httpc = nil
end
+ res._upstream_bytes = total
return nil, "max_response_bytes exceeded", 502
end
parts[#parts + 1] = chunk
@@ -309,6 +310,7 @@ function _M.parse_response(self, ctx, res, client_proto,
converter, conf)
core.log.warn("failed to read response body: ", err)
return nil, err
end
+ res._upstream_bytes = #raw_res_body
ngx.status = res.status
ctx.var.llm_time_to_first_token = math.floor((ngx_now() -
ctx.llm_request_start_time) * 1000)
ctx.var.apisix_upstream_response_time = ctx.var.llm_time_to_first_token
@@ -377,6 +379,15 @@ function _M.parse_streaming_response(self, ctx, res,
target_proto, converter, co
-- uncommitted and causing nginx to fall through to the balancer phase.
local output_sent = false
+ -- Runaway-upstream safeguards. Both are opt-in; unset means no cap.
+ local max_duration_ms = conf and conf.max_stream_duration_ms
+ local max_bytes = conf and conf.max_response_bytes
+ local deadline
+ if max_duration_ms then
+ deadline = ctx.llm_request_start_time + max_duration_ms / 1000
+ end
+ local bytes_read = 0
+
local function abort_on_disconnect(flush_err)
core.log.info("client disconnected during AI streaming, ",
"aborting upstream read: ", flush_err)
@@ -384,24 +395,17 @@ function _M.parse_streaming_response(self, ctx, res,
target_proto, converter, co
res._httpc:close()
res._httpc = nil
end
+ res._upstream_bytes = bytes_read
ctx.var.llm_request_done = true
end
- -- Runaway-upstream safeguards. Both are opt-in; unset means no cap.
- local max_duration_ms = conf and conf.max_stream_duration_ms
- local max_bytes = conf and conf.max_response_bytes
- local deadline
- if max_duration_ms then
- deadline = ctx.llm_request_start_time + max_duration_ms / 1000
- end
- local bytes_read = 0
-
while true do
local chunk, err = body_reader()
ctx.var.apisix_upstream_response_time = math.floor((ngx_now() -
ctx.llm_request_start_time) * 1000)
if err then
core.log.warn("failed to read response chunk: ", err)
+ res._upstream_bytes = bytes_read
return transport_http.handle_error(err)
end
if not chunk then
@@ -410,6 +414,7 @@ function _M.parse_streaming_response(self, ctx, res,
target_proto, converter, co
#sse_buf)
end
+ res._upstream_bytes = bytes_read
if converter and not output_sent then
local msg = "streaming response completed without producing "
.. "any output; the upstream likely returned a "
@@ -521,6 +526,7 @@ function _M.parse_streaming_response(self, ctx, res,
target_proto, converter, co
-- Signal downstream filters (e.g. moderation plugins that defer
-- work until request completion) that no more content is coming.
ctx.var.llm_request_done = true
+ res._upstream_bytes = bytes_read
if output_sent then
-- Client has already received partial SSE; stop feeding
chunks.
-- nginx will close the downstream connection at end of content
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 82153993e..306d5b265 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -303,6 +303,7 @@ function _M.before_proxy(conf, ctx, on_error)
if res._t0 then
apisix_upstream.update_upstream_state({
response_time = (ngx_now() - res._t0) * 1000,
+ response_length = res._upstream_bytes or 0,
})
end
diff --git a/docs/en/latest/plugins/ai-proxy.md
b/docs/en/latest/plugins/ai-proxy.md
index adb7add41..b29a6f68f 100644
--- a/docs/en/latest/plugins/ai-proxy.md
+++ b/docs/en/latest/plugins/ai-proxy.md
@@ -2062,6 +2062,7 @@ In addition, the following standard nginx upstream
variables are automatically p
* `upstream_response_time`: Total time spent receiving the response from the
upstream LLM service, in seconds (e.g., `2.858`).
* `upstream_connect_time`: Time spent establishing the connection to the
upstream LLM service, in seconds.
* `upstream_header_time`: Time spent receiving the response headers from the
upstream LLM service, in seconds.
+* `upstream_response_length`: Total number of bytes received from the upstream
LLM service response body (e.g., `1024`).
* `upstream_host`: Hostname of the upstream LLM service as configured in the
endpoint (e.g., `api.openai.com`).
* `upstream_scheme`: Scheme used to connect to the upstream LLM service (e.g.,
`https`).
* `upstream_uri`: Request URI path sent to the upstream LLM service (e.g.,
`/v1/chat/completions`).
diff --git a/t/plugin/ai-proxy-upstream-vars.t
b/t/plugin/ai-proxy-upstream-vars.t
index 32050976e..44e201939 100644
--- a/t/plugin/ai-proxy-upstream-vars.t
+++ b/t/plugin/ai-proxy-upstream-vars.t
@@ -116,3 +116,75 @@ X-AI-Fixture: openai/chat-basic.json
--- error_code: 200
--- access_log eval
qr{http://127\.0\.0\.1/v1/chat/completions}
+
+
+
+=== TEST 5: set route with serverless plugin to log upstream_response_length
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/anything",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": {
+ "header": {
+ "Authorization": "Bearer test-key"
+ }
+ },
+ "options": {
+ "model": "gpt-4"
+ },
+ "override": {
+ "endpoint": "http://127.0.0.1:1980"
+ },
+ "ssl_verify": false
+ },
+ "serverless-post-function": {
+ "phase": "log",
+ "functions": ["return function(_, ctx)
ngx.log(ngx.WARN, 'upstream_response_length: ',
ngx.var.upstream_response_length) end"]
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 6: non-streaming request has non-zero upstream_response_length
+--- request
+POST /anything
+{"model":"gpt-4","messages":[{"role":"user","content":"hello"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- error_log eval
+qr/upstream_response_length: [1-9]\d*/
+--- no_error_log
+upstream_response_length: 0
+
+
+
+=== TEST 7: streaming request has non-zero upstream_response_length
+--- request
+POST /anything
+{"model":"gpt-4","messages":[{"role":"user","content":"hello"}],"stream":true}
+--- more_headers
+X-AI-Fixture: openai/chat-streaming.sse
+--- error_code: 200
+--- error_log eval
+qr/upstream_response_length: [1-9]\d*/
+--- no_error_log
+upstream_response_length: 0