This is an automated email from the ASF dual-hosted git repository.
nic-6443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new 6dd6a3477 perf(ai): reuse raw request body when unchanged (#13406)
6dd6a3477 is described below
commit 6dd6a347729aec39829d856a75e6c1ab0010a166
Author: Nic <[email protected]>
AuthorDate: Thu May 21 16:19:53 2026 +0800
perf(ai): reuse raw request body when unchanged (#13406)
---
apisix/plugins/ai-protocols/bedrock-converse.lua | 4 +
apisix/plugins/ai-protocols/openai-chat.lua | 2 +
apisix/plugins/ai-protocols/passthrough.lua | 1 +
apisix/plugins/ai-providers/base.lua | 34 ++++-
apisix/plugins/ai-request-rewrite.lua | 1 +
t/plugin/ai-proxy-request-body-override.t | 150 ++++++++++++++++++++---
6 files changed, 175 insertions(+), 17 deletions(-)
diff --git a/apisix/plugins/ai-protocols/bedrock-converse.lua
b/apisix/plugins/ai-protocols/bedrock-converse.lua
index 5c7112c06..b0259b6c8 100644
--- a/apisix/plugins/ai-protocols/bedrock-converse.lua
+++ b/apisix/plugins/ai-protocols/bedrock-converse.lua
@@ -52,7 +52,11 @@ end
-- Strip our gateway-side `stream` flag; Bedrock rejects unknown body fields
-- and decides streaming purely by URL (/converse vs /converse-stream).
function _M.prepare_outgoing_request(body)
+ if body.stream == nil then
+ return false
+ end
body.stream = nil
+ return true
end
diff --git a/apisix/plugins/ai-protocols/openai-chat.lua
b/apisix/plugins/ai-protocols/openai-chat.lua
index da9191736..faa48f665 100644
--- a/apisix/plugins/ai-protocols/openai-chat.lua
+++ b/apisix/plugins/ai-protocols/openai-chat.lua
@@ -48,7 +48,9 @@ end
function _M.prepare_outgoing_request(body)
if body.stream then
body.stream_options = { include_usage = true }
+ return true
end
+ return false
end
diff --git a/apisix/plugins/ai-protocols/passthrough.lua
b/apisix/plugins/ai-protocols/passthrough.lua
index 9df3ab10e..dad3c15c9 100644
--- a/apisix/plugins/ai-protocols/passthrough.lua
+++ b/apisix/plugins/ai-protocols/passthrough.lua
@@ -37,6 +37,7 @@ end
function _M.prepare_outgoing_request(_)
+ return false
end
diff --git a/apisix/plugins/ai-providers/base.lua
b/apisix/plugins/ai-providers/base.lua
index 944be2632..8cad693b3 100644
--- a/apisix/plugins/ai-providers/base.lua
+++ b/apisix/plugins/ai-providers/base.lua
@@ -45,6 +45,7 @@ local pairs = pairs
local type = type
local math = math
local ipairs = ipairs
+local next = next
local setmetatable = setmetatable
local tostring = tostring
@@ -95,6 +96,8 @@ end
-- @return table params HTTP parameters ready for transport_http.request()
-- @return string|nil err Error message
function _M.build_request(self, ctx, conf, request_body, opts)
+ local body_changed = false
+
-- Protocol conversion (when a converter bridges client→target protocol)
local converter = ctx.ai_converter
if converter and converter.convert_request then
@@ -103,6 +106,7 @@ function _M.build_request(self, ctx, conf, request_body,
opts)
return nil, err or "invalid protocol", 400
end
request_body = converted
+ body_changed = true
end
-- Inject target-protocol-specific parameters (e.g. stream_options for
OpenAI).
@@ -111,7 +115,9 @@ function _M.build_request(self, ctx, conf, request_body,
opts)
if target_protocol then
local target_proto = protocols.get(target_protocol)
if target_proto and target_proto.prepare_outgoing_request then
- target_proto.prepare_outgoing_request(request_body)
+ if target_proto.prepare_outgoing_request(request_body) then
+ body_changed = true
+ end
end
end
@@ -205,6 +211,7 @@ function _M.build_request(self, ctx, conf, request_body,
opts)
core.log.info("model_options overwriting request field '",
opt, "'")
end
request_body[opt] = val
+ body_changed = true
end
end
@@ -213,6 +220,9 @@ function _M.build_request(self, ctx, conf, request_body,
opts)
local cap = self.capabilities and
self.capabilities[ctx.ai_target_protocol]
if cap and cap.rewrite_request_body then
cap.rewrite_request_body(request_body, opts.override_llm_options,
true)
+ if next(opts.override_llm_options) ~= nil then
+ body_changed = true
+ end
end
end
@@ -223,12 +233,30 @@ function _M.build_request(self, ctx, conf, request_body,
opts)
core.log.info("applying request_body override for target protocol
'",
ctx.ai_target_protocol, "'")
request_body = deep_merge(request_body, patch,
opts.request_body_force_override)
+ body_changed = true
end
end
- params.body = request_body
- if self.remove_model then
+ if self.remove_model and request_body.model ~= nil then
request_body.model = nil
+ body_changed = true
+ end
+
+ if body_changed then
+ ctx.ai_request_body_changed = true
+ end
+
+ if not ctx.ai_request_body_changed then
+ if ctx.ai_raw_request_body == nil then
+ ctx.ai_raw_request_body = core.request.get_body()
+ end
+ if type(ctx.ai_raw_request_body) == "string" then
+ params.body = ctx.ai_raw_request_body
+ else
+ params.body = request_body
+ end
+ else
+ params.body = request_body
end
-- AWS SigV4 signing (must be last — signs the finalized body)
diff --git a/apisix/plugins/ai-request-rewrite.lua
b/apisix/plugins/ai-request-rewrite.lua
index 900f70083..5bef20771 100644
--- a/apisix/plugins/ai-request-rewrite.lua
+++ b/apisix/plugins/ai-request-rewrite.lua
@@ -127,6 +127,7 @@ local function request_to_llm(conf, request_table, ctx,
target_path)
}
ctx.llm_request_start_time = ngx.now()
ctx.var.llm_request_body = request_table
+ ctx.ai_request_body_changed = true
return ai_provider:request(ctx, conf, request_table, extra_opts)
end
diff --git a/t/plugin/ai-proxy-request-body-override.t
b/t/plugin/ai-proxy-request-body-override.t
index 088123beb..1bac6c31b 100644
--- a/t/plugin/ai-proxy-request-body-override.t
+++ b/t/plugin/ai-proxy-request-body-override.t
@@ -174,7 +174,129 @@ __DATA__
-=== TEST 3: llm_options: openai provider maps max_tokens to
max_completion_tokens
+=== TEST 3a: ai-proxy forwards the original body when it is not rewritten
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/chat",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai-compatible",
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732"
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local raw = '{ "messages" : [ { "role" : "user", "content" : "hi"
} ], "temperature" : 0.7 }'
+ local http = require("resty.http").new()
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/chat", {
+ method = "POST",
+ body = raw,
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ local cjson = require("cjson.safe")
+ local body = cjson.decode(res.body)
+ ngx.say(body.choices[1].message.content == raw and "same body" or
"body changed")
+ }
+ }
+--- response_body
+same body
+
+
+
+=== TEST 4b: ai-proxy-multi forwards the original body when it is not rewritten
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/chat",
+ "plugins": {
+ "ai-proxy-multi": {
+ "instances": [{
+ "name": "only",
+ "provider": "openai-compatible",
+ "weight": 1,
+ "auth": { "header": { "Authorization": "Bearer
t" } },
+ "override": {
+ "endpoint": "http://localhost:6732"
+ }
+ }],
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local raw = '{ "messages" : [ { "role" : "user", "content" :
"hello" } ], "top_p" : 0.9 }'
+ local http = require("resty.http").new()
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/chat", {
+ method = "POST",
+ body = raw,
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ local cjson = require("cjson.safe")
+ local body = cjson.decode(res.body)
+ ngx.say(body.choices[1].message.content == raw and "same body" or
"body changed")
+ }
+ }
+--- response_body
+same body
+
+
+
+=== TEST 5c: build_request does not reuse raw body after an earlier rewrite
+--- config
+ location /t {
+ content_by_lua_block {
+ local base = require("apisix.plugins.ai-providers.base")
+ local provider = base.new({
+ capabilities = {
+ ["openai-chat"] = {
+ path = "/v1/chat/completions",
+ host = "localhost",
+ },
+ },
+ })
+ local ctx = {
+ ai_target_protocol = "openai-chat",
+ ai_request_body_changed = true,
+ var = {},
+ }
+ local opts = {
+ auth = {},
+ conf = {},
+ raw_request_body = '{"messages":[]}',
+ target_path = "/v1/chat/completions",
+ }
+
+ local request_body = {messages = {{role = "user", content =
"changed"}}}
+ local params = assert(provider:build_request(ctx, {ssl_verify =
false},
+ request_body, opts))
+ ngx.say(type(params.body))
+ ngx.say(params.body == request_body and "table body" or "raw body")
+ }
+ }
+--- response_body
+table
+table body
+
+
+
+=== TEST 6: llm_options: openai provider maps max_tokens to
max_completion_tokens
--- config
location /t {
content_by_lua_block {
@@ -217,7 +339,7 @@ max_completion_tokens=555
-=== TEST 4: llm_options: openai-compatible provider maps max_tokens to
max_tokens
+=== TEST 7: llm_options: openai-compatible provider maps max_tokens to
max_tokens
--- config
location /t {
content_by_lua_block {
@@ -260,7 +382,7 @@ max_tokens=444
-=== TEST 5: llm_options: openai responses API maps max_tokens to
max_output_tokens
+=== TEST 8: llm_options: openai responses API maps max_tokens to
max_output_tokens
--- config
location /t {
content_by_lua_block {
@@ -303,7 +425,7 @@ max_output_tokens=333
-=== TEST 6: llm_options: ai-proxy-multi per-instance override
+=== TEST 9: llm_options: ai-proxy-multi per-instance override
--- config
location /t {
content_by_lua_block {
@@ -350,7 +472,7 @@ max_completion_tokens=222
-=== TEST 7: llm_options always force-overwrites client value
+=== TEST 10: llm_options always force-overwrites client value
--- config
location /t {
content_by_lua_block {
@@ -395,7 +517,7 @@ max_tokens=555
-=== TEST 8: request_body: openai-chat override writes fields on outgoing body
+=== TEST 11: request_body: openai-chat override writes fields on outgoing body
--- config
location /t {
content_by_lua_block {
@@ -443,7 +565,7 @@ max_tokens=555 temperature=0.1
-=== TEST 9: request_body: non-force deep merge fills missing nested keys
without overwriting existing
+=== TEST 12: request_body: non-force deep merge fills missing nested keys
without overwriting existing
--- config
location /t {
content_by_lua_block {
@@ -492,7 +614,7 @@ include_usage=true extra=1
-=== TEST 10: request_body: array values are replaced wholesale (stop sequences)
+=== TEST 13: request_body: array values are replaced wholesale (stop sequences)
--- config
location /t {
content_by_lua_block {
@@ -538,7 +660,7 @@ stop=["a","b"]
-=== TEST 11: request_body: override keyed by non-matching target protocol is
ignored
+=== TEST 14: request_body: override keyed by non-matching target protocol is
ignored
--- config
location /t {
content_by_lua_block {
@@ -581,7 +703,7 @@ max_tokens=nil
-=== TEST 12: request_body: default mode - client value takes priority
+=== TEST 15: request_body: default mode - client value takes priority
--- config
location /t {
content_by_lua_block {
@@ -626,7 +748,7 @@ max_tokens=999
-=== TEST 13: request_body: force_override mode - override overwrites client
fields
+=== TEST 16: request_body: force_override mode - override overwrites client
fields
--- config
location /t {
content_by_lua_block {
@@ -672,7 +794,7 @@ max_tokens=555
-=== TEST 14: request_body: override applies to target protocol after converter
+=== TEST 17: request_body: override applies to target protocol after converter
--- config
location /t {
content_by_lua_block {
@@ -719,7 +841,7 @@ max_tokens=77 has_messages=true
-=== TEST 15: ai-proxy-multi per-instance request_body override
+=== TEST 18: ai-proxy-multi per-instance request_body override
--- config
location /t {
content_by_lua_block {
@@ -767,7 +889,7 @@ max_tokens=321
-=== TEST 16: both llm_options and request_body coexist, request_body wins
+=== TEST 19: both llm_options and request_body coexist, request_body wins
--- config
location /t {
content_by_lua_block {