This is an automated email from the ASF dual-hosted git repository.
AlinsRan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new bf9091acd feat(ai-proxy): add built-in nginx variables for LLM
observability (#13477)
bf9091acd is described below
commit bf9091acd3ec65bd8fd90e24e937131279fdcbce
Author: AlinsRan <[email protected]>
AuthorDate: Wed Jun 10 17:22:25 2026 +0800
feat(ai-proxy): add built-in nginx variables for LLM observability (#13477)
---
apisix/cli/ngx_tpl.lua | 8 +
apisix/core/ctx.lua | 8 +
apisix/plugins/ai-protocols/anthropic-messages.lua | 41 ++++
apisix/plugins/ai-protocols/openai-chat.lua | 68 ++++++-
apisix/plugins/ai-protocols/openai-responses.lua | 69 +++++--
apisix/plugins/ai-providers/base.lua | 26 +++
apisix/plugins/ai-proxy/base.lua | 33 +++-
t/APISIX.pm | 10 +-
.../anthropic/messages-streaming-with-tool-use.sse | 14 ++
t/fixtures/openai/chat-streaming-with-cache.sse | 6 +
.../openai/chat-streaming-with-tool-calls.sse | 7 +
.../openai/responses-streaming-with-cache.sse | 6 +
.../openai/responses-streaming-with-tool-call.sse | 3 +
t/fixtures/openai/responses-with-cache.json | 22 +++
t/plugin/ai-proxy-anthropic.t | 89 +++++++++
t/plugin/ai-proxy3.t | 219 +++++++++++++++++++--
16 files changed, 596 insertions(+), 33 deletions(-)
diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua
index ca6642b9a..c86f83e70 100644
--- a/apisix/cli/ngx_tpl.lua
+++ b/apisix/cli/ngx_tpl.lua
@@ -830,6 +830,14 @@ http {
set $llm_model '';
set $llm_prompt_tokens '0';
set $llm_completion_tokens '0';
+ set $llm_total_tokens '0';
+ set $llm_stream 'false';
+ set $llm_has_tool_calls 'false';
+ set $llm_tool_count '0';
+ set $llm_end_user_id '';
+ set $llm_cache_read_input_tokens '0';
+ set $llm_cache_creation_input_tokens '0';
+ set $llm_reasoning_tokens '0';
{% if use_apisix_base then %}
diff --git a/apisix/core/ctx.lua b/apisix/core/ctx.lua
index 07b30d7dd..58ee7c6b6 100644
--- a/apisix/core/ctx.lua
+++ b/apisix/core/ctx.lua
@@ -204,6 +204,14 @@ do
llm_model = true,
llm_prompt_tokens = true,
llm_completion_tokens = true,
+ llm_total_tokens = true,
+ llm_stream = true,
+ llm_has_tool_calls = true,
+ llm_tool_count = true,
+ llm_end_user_id = true,
+ llm_cache_read_input_tokens = true,
+ llm_cache_creation_input_tokens = true,
+ llm_reasoning_tokens = true,
upstream_mirror_host = true,
upstream_mirror_uri = true,
diff --git a/apisix/plugins/ai-protocols/anthropic-messages.lua
b/apisix/plugins/ai-protocols/anthropic-messages.lua
index a78d0c26e..a777f5136 100644
--- a/apisix/plugins/ai-protocols/anthropic-messages.lua
+++ b/apisix/plugins/ai-protocols/anthropic-messages.lua
@@ -66,6 +66,14 @@ function _M.parse_sse_event(event, ctx, state)
end
return { type = "skip" }
+ elseif event.type == "content_block_start" then
+ local data = core.json.decode(event.data, { null_as_nil = true })
+ if data and type(data.content_block) == "table"
+ and data.content_block.type == "tool_use" then
+ return { type = "skip", has_tool_call = true }
+ end
+ return { type = "skip" }
+
elseif event.type == "message_delta" then
local data, err = core.json.decode(event.data, { null_as_nil = true })
if not data then
@@ -102,6 +110,9 @@ function _M.parse_sse_event(event, ctx, state)
prompt_tokens = usage.input_tokens or 0,
completion_tokens = usage.output_tokens or 0,
total_tokens = (usage.input_tokens or 0) +
(usage.output_tokens or 0),
+ cache_read_input_tokens = usage.cache_read_input_tokens or
0,
+ cache_creation_input_tokens =
usage.cache_creation_input_tokens or 0,
+ reasoning_tokens = 0,
},
raw_usage = usage,
}
@@ -169,10 +180,40 @@ function _M.extract_usage(res_body)
prompt_tokens = prompt,
completion_tokens = completion,
total_tokens = prompt + completion,
+ cache_read_input_tokens = raw.cache_read_input_tokens or 0,
+ cache_creation_input_tokens = raw.cache_creation_input_tokens or 0,
+ reasoning_tokens = 0,
}, raw
end
+--- Detect whether a non-streaming response contains tool calls.
+function _M.has_tool_call(res_body)
+ if type(res_body) ~= "table" or type(res_body.content) ~= "table" then
+ return false
+ end
+ for _, block in ipairs(res_body.content) do
+ if type(block) == "table" and block.type == "tool_use" then
+ return true
+ end
+ end
+ return false
+end
+
+
+--- Extract the end-user identifier from a request body.
+function _M.extract_end_user_id(body)
+ if type(body) ~= "table" then
+ return nil
+ end
+ local meta = body.metadata
+ if type(meta) == "table" and type(meta.user_id) == "string" then
+ return meta.user_id
+ end
+ return nil
+end
+
+
--- Extract all text content from a request body for moderation.
function _M.extract_request_content(body)
local contents = {}
diff --git a/apisix/plugins/ai-protocols/openai-chat.lua
b/apisix/plugins/ai-protocols/openai-chat.lua
index faa48f665..096af76df 100644
--- a/apisix/plugins/ai-protocols/openai-chat.lua
+++ b/apisix/plugins/ai-protocols/openai-chat.lua
@@ -79,14 +79,18 @@ function _M.parse_sse_event(event, ctx, state)
local result = { type = "delta", data = data }
- -- Extract text content from choices
+ -- Extract text content and detect tool calls from choices
if type(data.choices) == "table" and #data.choices > 0 then
local texts = {}
for _, choice in ipairs(data.choices) do
- if type(choice) == "table"
- and type(choice.delta) == "table"
- and type(choice.delta.content) == "string" then
- core.table.insert(texts, choice.delta.content)
+ if type(choice) == "table" and type(choice.delta) == "table"
then
+ if type(choice.delta.content) == "string" then
+ core.table.insert(texts, choice.delta.content)
+ end
+ if type(choice.delta.tool_calls) == "table"
+ and #choice.delta.tool_calls > 0 then
+ result.has_tool_call = true
+ end
end
end
if #texts > 0 then
@@ -96,13 +100,20 @@ function _M.parse_sse_event(event, ctx, state)
-- Extract usage (null for non-final chunks; cjson decodes null as
userdata)
if type(data.usage) == "table" then
+ local u = data.usage
+ local pd = type(u.prompt_tokens_details) == "table" and
u.prompt_tokens_details
+ local cd = type(u.completion_tokens_details) == "table" and
u.completion_tokens_details
result.type = "usage"
result.usage = {
- prompt_tokens = data.usage.prompt_tokens or 0,
- completion_tokens = data.usage.completion_tokens or 0,
- total_tokens = data.usage.total_tokens or 0,
+ prompt_tokens = u.prompt_tokens or 0,
+ completion_tokens = u.completion_tokens or 0,
+ total_tokens = u.total_tokens or 0,
+ cache_read_input_tokens = pd and pd.cached_tokens
+ or u.prompt_cache_hit_tokens or 0,
+ cache_creation_input_tokens = pd and
pd.cache_creation_input_tokens or 0,
+ reasoning_tokens = cd and cd.reasoning_tokens or 0,
}
- result.raw_usage = data.usage
+ result.raw_usage = u
end
return result
@@ -160,14 +171,53 @@ function _M.extract_usage(res_body)
return nil, nil
end
local raw = res_body.usage
+ local pdetails = type(raw.prompt_tokens_details) == "table" and
raw.prompt_tokens_details
+ local cdetails = type(raw.completion_tokens_details) == "table"
+ and raw.completion_tokens_details
+ -- OpenAI uses prompt_tokens_details.cached_tokens; DeepSeek uses
prompt_cache_hit_tokens
+ local cache_read = pdetails and pdetails.cached_tokens or
raw.prompt_cache_hit_tokens or 0
return {
prompt_tokens = raw.prompt_tokens or 0,
completion_tokens = raw.completion_tokens or 0,
total_tokens = raw.total_tokens or (raw.prompt_tokens or 0) +
(raw.completion_tokens or 0),
+ cache_read_input_tokens = cache_read,
+ cache_creation_input_tokens = pdetails and
pdetails.cache_creation_input_tokens or 0,
+ reasoning_tokens = cdetails and cdetails.reasoning_tokens or 0,
}, raw
end
+--- Detect whether a non-streaming response contains tool calls.
+function _M.has_tool_call(res_body)
+ if type(res_body) ~= "table" or type(res_body.choices) ~= "table" then
+ return false
+ end
+ for _, choice in ipairs(res_body.choices) do
+ if type(choice) == "table" and type(choice.message) == "table"
+ and type(choice.message.tool_calls) == "table"
+ and #choice.message.tool_calls > 0 then
+ return true
+ end
+ end
+ return false
+end
+
+
+--- Extract the end-user identifier from a request body.
+function _M.extract_end_user_id(body)
+ if type(body) ~= "table" then
+ return nil
+ end
+ if type(body.safety_identifier) == "string" then
+ return body.safety_identifier
+ end
+ if type(body.user) == "string" then
+ return body.user
+ end
+ return nil
+end
+
+
--- Extract all text content from a request body for moderation.
function _M.extract_request_content(body)
local contents = {}
diff --git a/apisix/plugins/ai-protocols/openai-responses.lua
b/apisix/plugins/ai-protocols/openai-responses.lua
index 9e05e99a7..9e78b3fb5 100644
--- a/apisix/plugins/ai-protocols/openai-responses.lua
+++ b/apisix/plugins/ai-protocols/openai-responses.lua
@@ -68,16 +68,30 @@ function _M.parse_sse_event(event, ctx, state)
core.log.warn("failed to decode response.completed SSE data: ",
err)
return result
end
- if type(data.response) == "table"
- and type(data.response.usage) == "table" then
- local usage = data.response.usage
- result.type = "usage_and_done"
- result.usage = {
- prompt_tokens = usage.input_tokens or 0,
- completion_tokens = usage.output_tokens or 0,
- total_tokens = usage.total_tokens or 0,
- }
- result.raw_usage = usage
+ if type(data.response) == "table" then
+ local resp = data.response
+ if type(resp.usage) == "table" then
+ local usage = resp.usage
+ result.type = "usage_and_done"
+ result.usage = {
+ prompt_tokens = usage.input_tokens or 0,
+ completion_tokens = usage.output_tokens or 0,
+ total_tokens = usage.total_tokens or 0,
+ cache_read_input_tokens = type(usage.input_tokens_details)
== "table"
+ and usage.input_tokens_details.cached_tokens or 0,
+ reasoning_tokens = type(usage.output_tokens_details) ==
"table"
+ and usage.output_tokens_details.reasoning_tokens or 0,
+ }
+ result.raw_usage = usage
+ end
+ if type(resp.output) == "table" then
+ for _, item in ipairs(resp.output) do
+ if type(item) == "table" and item.type == "function_call"
then
+ result.has_tool_call = true
+ break
+ end
+ end
+ end
end
return result
@@ -135,17 +149,50 @@ function _M.extract_usage(res_body)
return nil, nil
end
local raw = res_body.usage
- -- Responses API uses input_tokens / output_tokens
+ local idetails = type(raw.input_tokens_details) == "table" and
raw.input_tokens_details
+ local odetails = type(raw.output_tokens_details) == "table" and
raw.output_tokens_details
local prompt = raw.input_tokens or 0
local completion = raw.output_tokens or 0
return {
prompt_tokens = prompt,
completion_tokens = completion,
total_tokens = raw.total_tokens or (prompt + completion),
+ cache_read_input_tokens = idetails and idetails.cached_tokens or 0,
+ cache_creation_input_tokens = 0,
+ reasoning_tokens = odetails and odetails.reasoning_tokens or 0,
}, raw
end
+--- Detect whether a non-streaming response contains tool calls.
+function _M.has_tool_call(res_body)
+ if type(res_body) ~= "table" or type(res_body.output) ~= "table" then
+ return false
+ end
+ for _, item in ipairs(res_body.output) do
+ if type(item) == "table" and item.type == "function_call" then
+ return true
+ end
+ end
+ return false
+end
+
+
+--- Extract the end-user identifier from a request body.
+function _M.extract_end_user_id(body)
+ if type(body) ~= "table" then
+ return nil
+ end
+ if type(body.safety_identifier) == "string" then
+ return body.safety_identifier
+ end
+ if type(body.user) == "string" then
+ return body.user
+ end
+ return nil
+end
+
+
--- Extract all text content from a request body for moderation.
function _M.extract_request_content(body)
local contents = {}
diff --git a/apisix/plugins/ai-providers/base.lua
b/apisix/plugins/ai-providers/base.lua
index 407fe0bb8..3021b89cd 100644
--- a/apisix/plugins/ai-providers/base.lua
+++ b/apisix/plugins/ai-providers/base.lua
@@ -77,6 +77,13 @@ local function merge_usage(ctx, parsed)
ctx.ai_token_usage[k] = v
end
end
+ -- Recompute total from accumulated parts (handles split events, e.g.
Anthropic
+ -- message_start carries input tokens and message_delta carries output
tokens)
+ local computed = (ctx.ai_token_usage.prompt_tokens or 0)
+ + (ctx.ai_token_usage.completion_tokens or 0)
+ if computed > (ctx.ai_token_usage.total_tokens or 0) then
+ ctx.ai_token_usage.total_tokens = computed
+ end
end
local raw = parsed.raw_usage or parsed.usage
@@ -396,12 +403,22 @@ function _M.parse_response(self, ctx, res, client_proto,
converter, conf)
end
ctx.var.llm_prompt_tokens = ctx.ai_token_usage.prompt_tokens or 0
ctx.var.llm_completion_tokens = ctx.ai_token_usage.completion_tokens or 0
+ ctx.var.llm_total_tokens = ctx.ai_token_usage.total_tokens or 0
+ ctx.var.llm_cache_read_input_tokens =
ctx.ai_token_usage.cache_read_input_tokens or 0
+ ctx.var.llm_cache_creation_input_tokens =
ctx.ai_token_usage.cache_creation_input_tokens or 0
+ ctx.var.llm_reasoning_tokens = ctx.ai_token_usage.reasoning_tokens or 0
local response_text = client_proto.extract_response_text(res_body)
if response_text then
ctx.var.llm_response_text = response_text
end
+ -- Detect tool calls (mirrors the streaming path, which sets this from
+ -- parse_sse_event.has_tool_call). Each client protocol knows its own
shape.
+ if client_proto.has_tool_call and client_proto.has_tool_call(res_body) then
+ ctx.var.llm_has_tool_calls = "true"
+ end
+
plugin.lua_response_filter(ctx, headers, raw_res_body)
return res_body
end
@@ -591,6 +608,9 @@ function _M.parse_streaming_response(self, ctx, res,
target_proto, converter, co
for _, event in ipairs(events) do
-- Target protocol parses the provider's SSE format
local parsed = target_proto.parse_sse_event(event, ctx, sse_state)
+ if parsed and parsed.has_tool_call then
+ ctx.var.llm_has_tool_calls = "true"
+ end
if not parsed or parsed.type == "skip" then
goto CONTINUE
end
@@ -618,6 +638,12 @@ function _M.parse_streaming_response(self, ctx, res,
target_proto, converter, co
merge_usage(ctx, parsed)
ctx.var.llm_prompt_tokens = ctx.ai_token_usage.prompt_tokens
ctx.var.llm_completion_tokens =
ctx.ai_token_usage.completion_tokens
+ ctx.var.llm_total_tokens = ctx.ai_token_usage.total_tokens or 0
+ ctx.var.llm_cache_read_input_tokens =
+ ctx.ai_token_usage.cache_read_input_tokens or 0
+ ctx.var.llm_cache_creation_input_tokens =
+ ctx.ai_token_usage.cache_creation_input_tokens or 0
+ ctx.var.llm_reasoning_tokens =
ctx.ai_token_usage.reasoning_tokens or 0
ctx.var.llm_response_text = table.concat(contents, "")
end
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index d2b6e67a9..80c025f7a 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -32,6 +32,21 @@ local apisix_upstream = require("resty.apisix.upstream")
local _M = {}
+-- Count tools in the final upstream request body.
+-- OpenAI Chat/Responses: body.tools array
+-- Anthropic Messages: body.tools array
+local function count_request_tools(body)
+ if type(body) ~= "table" then
+ return 0
+ end
+ local tools = body.tools
+ if type(tools) == "table" then
+ return #tools
+ end
+ return 0
+end
+
+
local function resolve_cap(cap_entry, key, conf, ctx)
local val = cap_entry and cap_entry[key]
if type(val) == "function" then
@@ -48,6 +63,7 @@ function _M.set_logging(ctx, summaries, payloads)
duration = ctx.var.llm_time_to_first_token,
prompt_tokens = ctx.var.llm_prompt_tokens,
completion_tokens = ctx.var.llm_completion_tokens,
+ total_tokens = ctx.var.llm_total_tokens,
upstream_response_time = ctx.var.apisix_upstream_response_time,
}
end
@@ -181,6 +197,7 @@ function _M.before_proxy(conf, ctx, on_error)
end
ctx.ai_converter = converter
ctx.ai_target_protocol = target_proto
+ local target_proto_module = protocols.get(target_proto)
-- Step 2: Extract model from request
local request_model = request_body.model
@@ -217,6 +234,18 @@ function _M.before_proxy(conf, ctx, on_error)
return 500, body
end
+ -- Compute built-in AI log fields from the final upstream request
+ local final_body = params.body
+ local is_stream = ctx.var.request_type == "ai_stream"
+ ctx.var.llm_stream = is_stream and "true" or "false"
+ ctx.var.llm_tool_count = count_request_tools(final_body)
+ if target_proto_module and target_proto_module.extract_end_user_id
then
+ local end_user =
target_proto_module.extract_end_user_id(final_body)
+ if end_user then
+ ctx.var.llm_end_user_id = end_user
+ end
+ end
+
core.log.info("sending request to LLM server: ",
core.json.delay_encode(log_sanitize.redact_params(params), true))
@@ -310,14 +339,16 @@ function _M.before_proxy(conf, ctx, on_error)
"application/vnd.amazon.eventstream", 1, true)
)
if is_streaming_resp then
- local target_proto_module = protocols.get(target_proto)
if not target_proto_module then
core.log.error("no protocol module for streaming target:
", target_proto)
return 500
end
+
code, body = ai_provider:parse_streaming_response(
ctx, res, target_proto_module, converter, conf)
else
+ -- Non-streaming: parse_response sets all llm_* token/tool vars
+ -- via the client protocol adapter.
local _, parse_err, parse_status = ai_provider:parse_response(
ctx, res, client_proto, converter, conf)
if parse_err then
diff --git a/t/APISIX.pm b/t/APISIX.pm
index 540c48f0e..cf342f82c 100644
--- a/t/APISIX.pm
+++ b/t/APISIX.pm
@@ -717,7 +717,7 @@ _EOC_
require("apisix").http_exit_worker()
}
- log_format main escape=default '\$remote_addr - \$remote_user
[\$time_local] \$http_host "\$request_line" \$status \$body_bytes_sent
\$request_time "\$http_referer" "\$http_user_agent" \$upstream_addr
\$upstream_status \$apisix_upstream_response_time
"\$upstream_scheme://\$upstream_host\$upstream_uri" \$request_llm_model
\$llm_model \$llm_time_to_first_token \$llm_prompt_tokens
\$llm_completion_tokens "\$rate_limiting_info"';
+ log_format main escape=default '\$remote_addr - \$remote_user
[\$time_local] \$http_host "\$request_line" \$status \$body_bytes_sent
\$request_time "\$http_referer" "\$http_user_agent" \$upstream_addr
\$upstream_status \$apisix_upstream_response_time
"\$upstream_scheme://\$upstream_host\$upstream_uri" \$request_llm_model
\$llm_model \$llm_time_to_first_token \$llm_prompt_tokens
\$llm_completion_tokens \$llm_total_tokens \$llm_stream \$llm_has_tool_calls
\$llm_tool_count \$llm_end_use [...]
# fake server, only for test
server {
@@ -923,6 +923,14 @@ _EOC_
set \$llm_model '';
set \$llm_prompt_tokens '0';
set \$llm_completion_tokens '0';
+ set \$llm_total_tokens '0';
+ set \$llm_stream 'false';
+ set \$llm_has_tool_calls 'false';
+ set \$llm_tool_count '0';
+ set \$llm_end_user_id '';
+ set \$llm_cache_read_input_tokens '0';
+ set \$llm_cache_creation_input_tokens '0';
+ set \$llm_reasoning_tokens '0';
set \$apisix_upstream_response_time \$upstream_response_time;
access_log $apisix_home/t/servroot/logs/access.log main;
diff --git a/t/fixtures/anthropic/messages-streaming-with-tool-use.sse
b/t/fixtures/anthropic/messages-streaming-with-tool-use.sse
new file mode 100644
index 000000000..425626a07
--- /dev/null
+++ b/t/fixtures/anthropic/messages-streaming-with-tool-use.sse
@@ -0,0 +1,14 @@
+event: message_start
+data:
{"type":"message_start","message":{"id":"msg_123","type":"message","role":"assistant","content":[],"usage":{"input_tokens":20,"output_tokens":0}}}
+
+event: content_block_start
+data:
{"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_1","name":"get_weather"}}
+
+event: content_block_stop
+data: {"type":"content_block_stop","index":0}
+
+event: message_delta
+data:
{"type":"message_delta","delta":{"stop_reason":"tool_use"},"usage":{"output_tokens":5}}
+
+event: message_stop
+data: {}
diff --git a/t/fixtures/openai/chat-streaming-with-cache.sse
b/t/fixtures/openai/chat-streaming-with-cache.sse
new file mode 100644
index 000000000..11757f645
--- /dev/null
+++ b/t/fixtures/openai/chat-streaming-with-cache.sse
@@ -0,0 +1,6 @@
+data:
{"id":"chatcmpl-cache1","object":"chat.completion.chunk","choices":[{"delta":{"role":"assistant","content":"Hi"},"index":0,"finish_reason":null}],"usage":null}
+
+data:
{"id":"chatcmpl-cache1","object":"chat.completion.chunk","choices":[{"delta":{},"index":0,"finish_reason":"stop"}],"usage":{"prompt_tokens":30,"completion_tokens":15,"total_tokens":45,"prompt_tokens_details":{"cached_tokens":10,"cache_creation_input_tokens":5},"completion_tokens_details":{"reasoning_tokens":7}}}
+
+data: [DONE]
+
diff --git a/t/fixtures/openai/chat-streaming-with-tool-calls.sse
b/t/fixtures/openai/chat-streaming-with-tool-calls.sse
new file mode 100644
index 000000000..e17710263
--- /dev/null
+++ b/t/fixtures/openai/chat-streaming-with-tool-calls.sse
@@ -0,0 +1,7 @@
+data:
{"id":"chatcmpl-1","object":"chat.completion.chunk","choices":[{"delta":{"role":"assistant","tool_calls":[{"index":0,"id":"call_1","type":"function","function":{"name":"get_weather","arguments":""}}]},"index":0,"finish_reason":null}],"usage":null}
+
+data:
{"id":"chatcmpl-1","object":"chat.completion.chunk","choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{}"}}]},"index":0,"finish_reason":"tool_calls"}],"usage":null}
+
+data:
{"id":"chatcmpl-1","object":"chat.completion.chunk","choices":[{"delta":{},"index":0,"finish_reason":null}],"usage":{"prompt_tokens":15,"completion_tokens":10,"total_tokens":25}}
+
+data: [DONE]
diff --git a/t/fixtures/openai/responses-streaming-with-cache.sse
b/t/fixtures/openai/responses-streaming-with-cache.sse
new file mode 100644
index 000000000..67953da93
--- /dev/null
+++ b/t/fixtures/openai/responses-streaming-with-cache.sse
@@ -0,0 +1,6 @@
+event: response.output_text.delta
+data: {"type":"response.output_text.delta","delta":"Hello"}
+
+event: response.completed
+data:
{"type":"response.completed","response":{"output":[{"type":"message","content":[{"type":"output_text","text":"Hello"}]}],"usage":{"input_tokens":20,"output_tokens":5,"total_tokens":25,"input_tokens_details":{"cached_tokens":10},"output_tokens_details":{"reasoning_tokens":3}}}}
+
diff --git a/t/fixtures/openai/responses-streaming-with-tool-call.sse
b/t/fixtures/openai/responses-streaming-with-tool-call.sse
new file mode 100644
index 000000000..4c9dbc125
--- /dev/null
+++ b/t/fixtures/openai/responses-streaming-with-tool-call.sse
@@ -0,0 +1,3 @@
+event: response.completed
+data:
{"type":"response.completed","response":{"output":[{"type":"function_call","id":"call_1","name":"get_weather","arguments":"{}"}],"usage":{"input_tokens":20,"output_tokens":5,"total_tokens":25}}}
+
diff --git a/t/fixtures/openai/responses-with-cache.json
b/t/fixtures/openai/responses-with-cache.json
new file mode 100644
index 000000000..39c72846f
--- /dev/null
+++ b/t/fixtures/openai/responses-with-cache.json
@@ -0,0 +1,22 @@
+{
+ "id": "resp_cache1",
+ "object": "response",
+ "created_at": 1723780938,
+ "model": "{{model}}",
+ "output": [
+ {
+ "type": "message",
+ "role": "assistant",
+ "content": [
+ { "type": "output_text", "text": "Hello" }
+ ]
+ }
+ ],
+ "usage": {
+ "input_tokens": 40,
+ "output_tokens": 20,
+ "total_tokens": 60,
+ "input_tokens_details": { "cached_tokens": 12 },
+ "output_tokens_details": { "reasoning_tokens": 8 }
+ }
+}
diff --git a/t/plugin/ai-proxy-anthropic.t b/t/plugin/ai-proxy-anthropic.t
index e5912d192..67e435bbb 100644
--- a/t/plugin/ai-proxy-anthropic.t
+++ b/t/plugin/ai-proxy-anthropic.t
@@ -36,6 +36,7 @@ add_block_preprocessor(sub {
my $user_yaml_config = <<_EOC_;
plugins:
+ - ai-proxy
- ai-proxy-multi
_EOC_
$block->set_value("extra_yaml_config", $user_yaml_config);
@@ -1666,3 +1667,91 @@ OK
OK
--- no_error_log
[error]
+
+
+
+=== TEST 47: set route for native Anthropic protocol built-in var tests
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/v1/messages",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "anthropic",
+ "auth": {
+ "header": {
+ "x-api-key": "test-key"
+ }
+ },
+ "options": {
+ "model": "claude-3-5-sonnet-20241022"
+ },
+ "override": {
+ "endpoint": "http://127.0.0.1:1980"
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 48: Anthropic streaming accumulates total_tokens from split
message_start and message_delta events
+--- request
+POST /v1/messages
+{"messages":[{"role":"user","content":"Hello"}],"model":"claude-3-5-sonnet-20241022","max_tokens":1024,"stream":true}
+--- more_headers
+X-AI-Fixture: anthropic/messages-streaming.sse
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" claude-3-5-sonnet-20241022
claude-3-5-sonnet-20241022 [\d.]+ 10 8 18 true false 0 /
+
+
+
+=== TEST 49: Anthropic streaming detects tool_use in content_block_start as
tool call
+--- request
+POST /v1/messages
+{"messages":[{"role":"user","content":"What is the
weather?"}],"model":"claude-3-5-sonnet-20241022","max_tokens":1024,"stream":true}
+--- more_headers
+X-AI-Fixture: anthropic/messages-streaming-with-tool-use.sse
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" claude-3-5-sonnet-20241022
claude-3-5-sonnet-20241022 [\d.]+ 20 5 25 true true 0 /
+
+
+
+=== TEST 50: Anthropic non-streaming writes cache tokens to access log
+--- request
+POST /v1/messages
+{"messages":[{"role":"user","content":"Hello"}],"model":"claude-3-5-sonnet-20241022","max_tokens":1024}
+--- more_headers
+X-AI-Fixture: anthropic/messages-with-cache.json
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" claude-3-5-sonnet-20241022
claude-3-5-sonnet-20241022 [\d.]+ 50 30 80 false false 0 \S* 200 100 0/
+
+
+
+=== TEST 51: Anthropic streaming writes cache tokens to access log
+--- request
+POST /v1/messages
+{"messages":[{"role":"user","content":"Hello"}],"model":"claude-3-5-sonnet-20241022","max_tokens":1024,"stream":true}
+--- more_headers
+X-AI-Fixture: anthropic/messages-streaming-with-cache.sse
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" claude-3-5-sonnet-20241022
claude-3-5-sonnet-20241022 [\d.]+ 50 30 80 true false 0 \S* 200 100 0/
diff --git a/t/plugin/ai-proxy3.t b/t/plugin/ai-proxy3.t
index 3110ee04b..501c84952 100644
--- a/t/plugin/ai-proxy3.t
+++ b/t/plugin/ai-proxy3.t
@@ -23,15 +23,6 @@ no_long_string();
no_root_location();
-my $resp_file = 't/assets/ai-proxy-response.json';
-open(my $fh, '<', $resp_file) or die "Could not open file '$resp_file' $!";
-my $resp = do { local $/; <$fh> };
-close($fh);
-
-print "Hello, World!\n";
-print $resp;
-
-
add_block_preprocessor(sub {
my ($block) = @_;
@@ -96,7 +87,7 @@ X-AI-Fixture: openai/chat-basic.json
--- response_body eval
qr/.*completion_tokens.*/
--- access_log eval
-qr/127\.0\.0\.1:1980 200 [\d.]+
\"http:\/\/127\.0\.0\.1\/v1\/chat\/completions\" gpt-4 gpt-3.5-turbo [\d.]+ 23
8.*/
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"http:\/\/\S+\/v1\/chat\/completions\" gpt-4
gpt-3.5-turbo [\d.]+ 23 8.*/
@@ -256,4 +247,210 @@ passed
--- response_body_like eval
qr/6data: \[DONE\]\n\n/
--- access_log eval
-qr/localhost:7737 200 [\d.]+ \"http:\/\/localhost\/v1\/chat\/completions\"
gpt-4 gpt-3.5-turbo 2\d\d 15 20.*/
+qr/localhost:7737 200 [\d.]+ \"http:\/\/\S+\/v1\/chat\/completions\" gpt-4
gpt-3.5-turbo 2\d\d 15 20.*/
+
+
+
+=== TEST 7: set route for built-in access log variable test
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/2',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/log-vars",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": {
+ "header": {
+ "Authorization": "Bearer test-key"
+ }
+ },
+ "options": {
+ "model": "gpt-4o"
+ },
+ "override": {
+ "endpoint": "http://127.0.0.1:1980"
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 8: non-streaming request writes llm built-in vars to access log
+--- request
+POST /log-vars
+{"messages":[{"role":"user","content":"What is
1+1?"}],"model":"gpt-4o","user":"alice"}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- response_body eval
+qr/.*completion_tokens.*/
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" gpt-4o gpt-4o [\d.]+ 23 8 31 false
false 0 alice/
+
+
+
+=== TEST 9: streaming request writes llm built-in vars to access log
+--- request
+POST /log-vars
+{"messages":[{"role":"user","content":"Hello"}],"model":"gpt-4o","stream":true}
+--- more_headers
+X-AI-Fixture: openai/chat-multi-chunk.sse
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" gpt-4o gpt-4o [\d.]+ 10 2 12 true
false 0 /
+
+
+
+=== TEST 10: response with cached tokens writes llm_cache_read_input_tokens to
access log
+--- request
+POST /log-vars
+{"messages":[{"role":"user","content":"Solve this"}],"model":"gpt-4o"}
+--- more_headers
+X-AI-Fixture: openai/chat-with-reasoning.json
+--- error_code: 200
+--- response_body eval
+qr/.*completion_tokens.*/
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" gpt-4o gpt-4o [\d.]+ 30 15 45 false
false 0 \S* 10 5 0/
+
+
+
+=== TEST 11: response with tool calls sets llm_has_tool_calls=true and
llm_tool_count
+--- request
+POST /log-vars
+{"messages":[{"role":"user","content":"What is the
weather?"}],"model":"gpt-4o","tools":[{"type":"function","function":{"name":"get_weather","parameters":{}}}]}
+--- more_headers
+X-AI-Fixture: openai/chat-with-tool-calls.json
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" gpt-4o gpt-4o [\d.]+ 50 20 70 false
true 1 /
+
+
+
+=== TEST 12: safety_identifier field is used as llm_end_user_id
+--- request
+POST /log-vars
+{"messages":[{"role":"user","content":"Hello"}],"model":"gpt-4o","safety_identifier":"user-xyz"}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" gpt-4o gpt-4o [\d.]+ 23 8 31 false
false 0 user-xyz/
+
+
+
+=== TEST 13: OpenAI Chat streaming detects tool_calls delta and sets
llm_has_tool_calls=true
+--- request
+POST /log-vars
+{"messages":[{"role":"user","content":"What is the
weather?"}],"model":"gpt-4o","stream":true}
+--- more_headers
+X-AI-Fixture: openai/chat-streaming-with-tool-calls.sse
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" gpt-4o gpt-4o [\d.]+ 15 10 25 true
true 0 /
+
+
+
+=== TEST 14: set route for Responses API built-in var tests
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/3',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/ai/v1/responses",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": {
+ "header": {
+ "Authorization": "Bearer test-key"
+ }
+ },
+ "options": {
+ "model": "gpt-4o-mini"
+ },
+ "override": {
+ "endpoint": "http://127.0.0.1:1980"
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 15: Responses API streaming sets llm_cache_read_input_tokens and
llm_reasoning_tokens
+--- request
+POST /ai/v1/responses
+{"input":"Hello","model":"gpt-4o-mini","stream":true}
+--- more_headers
+X-AI-Fixture: openai/responses-streaming-with-cache.sse
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" gpt-4o-mini gpt-4o-mini [\d.]+ 20 5 25
true false 0 \S* 10 0 3/
+
+
+
+=== TEST 16: Responses API streaming detects function_call in response.output
as tool call
+--- request
+POST /ai/v1/responses
+{"input":"What is the weather?","model":"gpt-4o-mini","stream":true}
+--- more_headers
+X-AI-Fixture: openai/responses-streaming-with-tool-call.sse
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" gpt-4o-mini gpt-4o-mini [\d.]+ 20 5 25
true true 0 /
+
+
+
+=== TEST 17: OpenAI Chat streaming writes cache and reasoning tokens to access
log
+--- request
+POST /log-vars
+{"messages":[{"role":"user","content":"Solve
this"}],"model":"gpt-4o","stream":true}
+--- more_headers
+X-AI-Fixture: openai/chat-streaming-with-cache.sse
+--- error_code: 200
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" gpt-4o gpt-4o [\d.]+ 30 15 45 true
false 0 \S* 10 5 7/
+
+
+
+=== TEST 18: Responses API non-streaming writes cache and reasoning tokens to
access log
+--- request
+POST /ai/v1/responses
+{"input":"Solve this","model":"gpt-4o-mini"}
+--- more_headers
+X-AI-Fixture: openai/responses-with-cache.json
+--- error_code: 200
+--- response_body eval
+qr/.*output.*/
+--- access_log eval
+qr/127\.0\.0\.1:1980 200 [\d.]+ \"\S+\" gpt-4o-mini gpt-4o-mini [\d.]+ 40 20
60 false false 0 \S* 12 0 8/