(apisix) branch master updated: perf(ai): reuse raw request body when unchanged (#13406)

nic443 Thu, 21 May 2026 01:20:09 -0700

This is an automated email from the ASF dual-hosted git repository.

nic-6443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git



The following commit(s) were added to refs/heads/master by this push:
     new 6dd6a3477 perf(ai): reuse raw request body when unchanged (#13406)
6dd6a3477 is described below

commit 6dd6a347729aec39829d856a75e6c1ab0010a166
Author: Nic <[email protected]>
AuthorDate: Thu May 21 16:19:53 2026 +0800

    perf(ai): reuse raw request body when unchanged (#13406)
---
 apisix/plugins/ai-protocols/bedrock-converse.lua |   4 +
 apisix/plugins/ai-protocols/openai-chat.lua      |   2 +
 apisix/plugins/ai-protocols/passthrough.lua      |   1 +
 apisix/plugins/ai-providers/base.lua             |  34 ++++-
 apisix/plugins/ai-request-rewrite.lua            |   1 +
 t/plugin/ai-proxy-request-body-override.t        | 150 ++++++++++++++++++++---
 6 files changed, 175 insertions(+), 17 deletions(-)

diff --git a/apisix/plugins/ai-protocols/bedrock-converse.lua 
b/apisix/plugins/ai-protocols/bedrock-converse.lua
index 5c7112c06..b0259b6c8 100644
--- a/apisix/plugins/ai-protocols/bedrock-converse.lua
+++ b/apisix/plugins/ai-protocols/bedrock-converse.lua
@@ -52,7 +52,11 @@ end
 -- Strip our gateway-side `stream` flag; Bedrock rejects unknown body fields
 -- and decides streaming purely by URL (/converse vs /converse-stream).
 function _M.prepare_outgoing_request(body)
+    if body.stream == nil then
+        return false
+    end
     body.stream = nil
+    return true
 end
 
 
diff --git a/apisix/plugins/ai-protocols/openai-chat.lua 
b/apisix/plugins/ai-protocols/openai-chat.lua
index da9191736..faa48f665 100644
--- a/apisix/plugins/ai-protocols/openai-chat.lua
+++ b/apisix/plugins/ai-protocols/openai-chat.lua
@@ -48,7 +48,9 @@ end
 function _M.prepare_outgoing_request(body)
     if body.stream then
         body.stream_options = { include_usage = true }
+        return true
     end
+    return false
 end
 
 
diff --git a/apisix/plugins/ai-protocols/passthrough.lua 
b/apisix/plugins/ai-protocols/passthrough.lua
index 9df3ab10e..dad3c15c9 100644
--- a/apisix/plugins/ai-protocols/passthrough.lua
+++ b/apisix/plugins/ai-protocols/passthrough.lua
@@ -37,6 +37,7 @@ end
 
 
 function _M.prepare_outgoing_request(_)
+    return false
 end
 
 
diff --git a/apisix/plugins/ai-providers/base.lua 
b/apisix/plugins/ai-providers/base.lua
index 944be2632..8cad693b3 100644
--- a/apisix/plugins/ai-providers/base.lua
+++ b/apisix/plugins/ai-providers/base.lua
@@ -45,6 +45,7 @@ local pairs = pairs
 local type  = type
 local math  = math
 local ipairs = ipairs
+local next = next
 local setmetatable = setmetatable
 local tostring = tostring
 
@@ -95,6 +96,8 @@ end
 -- @return table params HTTP parameters ready for transport_http.request()
 -- @return string|nil err Error message
 function _M.build_request(self, ctx, conf, request_body, opts)
+    local body_changed = false
+
     -- Protocol conversion (when a converter bridges client→target protocol)
     local converter = ctx.ai_converter
     if converter and converter.convert_request then
@@ -103,6 +106,7 @@ function _M.build_request(self, ctx, conf, request_body, 
opts)
             return nil, err or "invalid protocol", 400
         end
         request_body = converted
+        body_changed = true
     end
 
     -- Inject target-protocol-specific parameters (e.g. stream_options for 
OpenAI).
@@ -111,7 +115,9 @@ function _M.build_request(self, ctx, conf, request_body, 
opts)
     if target_protocol then
         local target_proto = protocols.get(target_protocol)
         if target_proto and target_proto.prepare_outgoing_request then
-            target_proto.prepare_outgoing_request(request_body)
+            if target_proto.prepare_outgoing_request(request_body) then
+                body_changed = true
+            end
         end
     end
 
@@ -205,6 +211,7 @@ function _M.build_request(self, ctx, conf, request_body, 
opts)
                 core.log.info("model_options overwriting request field '", 
opt, "'")
             end
             request_body[opt] = val
+            body_changed = true
         end
     end
 
@@ -213,6 +220,9 @@ function _M.build_request(self, ctx, conf, request_body, 
opts)
         local cap = self.capabilities and 
self.capabilities[ctx.ai_target_protocol]
         if cap and cap.rewrite_request_body then
             cap.rewrite_request_body(request_body, opts.override_llm_options, 
true)
+            if next(opts.override_llm_options) ~= nil then
+                body_changed = true
+            end
         end
     end
 
@@ -223,12 +233,30 @@ function _M.build_request(self, ctx, conf, request_body, 
opts)
             core.log.info("applying request_body override for target protocol 
'",
                           ctx.ai_target_protocol, "'")
             request_body = deep_merge(request_body, patch, 
opts.request_body_force_override)
+            body_changed = true
         end
     end
-    params.body = request_body
 
-    if self.remove_model then
+    if self.remove_model and request_body.model ~= nil then
         request_body.model = nil
+        body_changed = true
+    end
+
+    if body_changed then
+        ctx.ai_request_body_changed = true
+    end
+
+    if not ctx.ai_request_body_changed then
+        if ctx.ai_raw_request_body == nil then
+            ctx.ai_raw_request_body = core.request.get_body()
+        end
+        if type(ctx.ai_raw_request_body) == "string" then
+            params.body = ctx.ai_raw_request_body
+        else
+            params.body = request_body
+        end
+    else
+        params.body = request_body
     end
 
     -- AWS SigV4 signing (must be last — signs the finalized body)
diff --git a/apisix/plugins/ai-request-rewrite.lua 
b/apisix/plugins/ai-request-rewrite.lua
index 900f70083..5bef20771 100644
--- a/apisix/plugins/ai-request-rewrite.lua
+++ b/apisix/plugins/ai-request-rewrite.lua
@@ -127,6 +127,7 @@ local function request_to_llm(conf, request_table, ctx, 
target_path)
     }
     ctx.llm_request_start_time = ngx.now()
     ctx.var.llm_request_body = request_table
+    ctx.ai_request_body_changed = true
     return ai_provider:request(ctx, conf, request_table, extra_opts)
 end
 
diff --git a/t/plugin/ai-proxy-request-body-override.t 
b/t/plugin/ai-proxy-request-body-override.t
index 088123beb..1bac6c31b 100644
--- a/t/plugin/ai-proxy-request-body-override.t
+++ b/t/plugin/ai-proxy-request-body-override.t
@@ -174,7 +174,129 @@ __DATA__
 
 
 
-=== TEST 3: llm_options: openai provider maps max_tokens to 
max_completion_tokens
+=== TEST 3a: ai-proxy forwards the original body when it is not rewritten
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai-compatible",
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local raw = '{ "messages" : [ { "role" : "user", "content" : "hi" 
} ], "temperature" : 0.7 }'
+            local http = require("resty.http").new()
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = raw,
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            ngx.say(body.choices[1].message.content == raw and "same body" or 
"body changed")
+        }
+    }
+--- response_body
+same body
+
+
+
+=== TEST 4b: ai-proxy-multi forwards the original body when it is not rewritten
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy-multi": {
+                            "instances": [{
+                                "name": "only",
+                                "provider": "openai-compatible",
+                                "weight": 1,
+                                "auth": { "header": { "Authorization": "Bearer 
t" } },
+                                "override": {
+                                    "endpoint": "http://localhost:6732";
+                                }
+                            }],
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local raw = '{ "messages" : [ { "role" : "user", "content" : 
"hello" } ], "top_p" : 0.9 }'
+            local http = require("resty.http").new()
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = raw,
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            ngx.say(body.choices[1].message.content == raw and "same body" or 
"body changed")
+        }
+    }
+--- response_body
+same body
+
+
+
+=== TEST 5c: build_request does not reuse raw body after an earlier rewrite
+--- config
+    location /t {
+        content_by_lua_block {
+            local base = require("apisix.plugins.ai-providers.base")
+            local provider = base.new({
+                capabilities = {
+                    ["openai-chat"] = {
+                        path = "/v1/chat/completions",
+                        host = "localhost",
+                    },
+                },
+            })
+            local ctx = {
+                ai_target_protocol = "openai-chat",
+                ai_request_body_changed = true,
+                var = {},
+            }
+            local opts = {
+                auth = {},
+                conf = {},
+                raw_request_body = '{"messages":[]}',
+                target_path = "/v1/chat/completions",
+            }
+
+            local request_body = {messages = {{role = "user", content = 
"changed"}}}
+            local params = assert(provider:build_request(ctx, {ssl_verify = 
false},
+                                                         request_body, opts))
+            ngx.say(type(params.body))
+            ngx.say(params.body == request_body and "table body" or "raw body")
+        }
+    }
+--- response_body
+table
+table body
+
+
+
+=== TEST 6: llm_options: openai provider maps max_tokens to 
max_completion_tokens
 --- config
     location /t {
         content_by_lua_block {
@@ -217,7 +339,7 @@ max_completion_tokens=555
 
 
 
-=== TEST 4: llm_options: openai-compatible provider maps max_tokens to 
max_tokens
+=== TEST 7: llm_options: openai-compatible provider maps max_tokens to 
max_tokens
 --- config
     location /t {
         content_by_lua_block {
@@ -260,7 +382,7 @@ max_tokens=444
 
 
 
-=== TEST 5: llm_options: openai responses API maps max_tokens to 
max_output_tokens
+=== TEST 8: llm_options: openai responses API maps max_tokens to 
max_output_tokens
 --- config
     location /t {
         content_by_lua_block {
@@ -303,7 +425,7 @@ max_output_tokens=333
 
 
 
-=== TEST 6: llm_options: ai-proxy-multi per-instance override
+=== TEST 9: llm_options: ai-proxy-multi per-instance override
 --- config
     location /t {
         content_by_lua_block {
@@ -350,7 +472,7 @@ max_completion_tokens=222
 
 
 
-=== TEST 7: llm_options always force-overwrites client value
+=== TEST 10: llm_options always force-overwrites client value
 --- config
     location /t {
         content_by_lua_block {
@@ -395,7 +517,7 @@ max_tokens=555
 
 
 
-=== TEST 8: request_body: openai-chat override writes fields on outgoing body
+=== TEST 11: request_body: openai-chat override writes fields on outgoing body
 --- config
     location /t {
         content_by_lua_block {
@@ -443,7 +565,7 @@ max_tokens=555 temperature=0.1
 
 
 
-=== TEST 9: request_body: non-force deep merge fills missing nested keys 
without overwriting existing
+=== TEST 12: request_body: non-force deep merge fills missing nested keys 
without overwriting existing
 --- config
     location /t {
         content_by_lua_block {
@@ -492,7 +614,7 @@ include_usage=true extra=1
 
 
 
-=== TEST 10: request_body: array values are replaced wholesale (stop sequences)
+=== TEST 13: request_body: array values are replaced wholesale (stop sequences)
 --- config
     location /t {
         content_by_lua_block {
@@ -538,7 +660,7 @@ stop=["a","b"]
 
 
 
-=== TEST 11: request_body: override keyed by non-matching target protocol is 
ignored
+=== TEST 14: request_body: override keyed by non-matching target protocol is 
ignored
 --- config
     location /t {
         content_by_lua_block {
@@ -581,7 +703,7 @@ max_tokens=nil
 
 
 
-=== TEST 12: request_body: default mode - client value takes priority
+=== TEST 15: request_body: default mode - client value takes priority
 --- config
     location /t {
         content_by_lua_block {
@@ -626,7 +748,7 @@ max_tokens=999
 
 
 
-=== TEST 13: request_body: force_override mode - override overwrites client 
fields
+=== TEST 16: request_body: force_override mode - override overwrites client 
fields
 --- config
     location /t {
         content_by_lua_block {
@@ -672,7 +794,7 @@ max_tokens=555
 
 
 
-=== TEST 14: request_body: override applies to target protocol after converter
+=== TEST 17: request_body: override applies to target protocol after converter
 --- config
     location /t {
         content_by_lua_block {
@@ -719,7 +841,7 @@ max_tokens=77 has_messages=true
 
 
 
-=== TEST 15: ai-proxy-multi per-instance request_body override
+=== TEST 18: ai-proxy-multi per-instance request_body override
 --- config
     location /t {
         content_by_lua_block {
@@ -767,7 +889,7 @@ max_tokens=321
 
 
 
-=== TEST 16: both llm_options and request_body coexist, request_body wins
+=== TEST 19: both llm_options and request_body coexist, request_body wins
 --- config
     location /t {
         content_by_lua_block {

(apisix) branch master updated: perf(ai): reuse raw request body when unchanged (#13406)

Reply via email to