This is an automated email from the ASF dual-hosted git repository.

Baoyuantop pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c0fa801a feat(ai-proxy): add provider-aware max_tokens override with 
priority control (#13251)
0c0fa801a is described below

commit 0c0fa801a1a0aa953cb69ebe700c4a69727338b1
Author: Nic <[email protected]>
AuthorDate: Tue Apr 21 11:14:16 2026 +0800

    feat(ai-proxy): add provider-aware max_tokens override with priority 
control (#13251)
---
 apisix/plugins/ai-protocols/init.lua              |   1 +
 apisix/plugins/ai-providers/aimlapi.lua           |  13 +-
 apisix/plugins/ai-providers/anthropic.lua         |  27 +-
 apisix/plugins/ai-providers/azure-openai.lua      |  13 +-
 apisix/plugins/ai-providers/base.lua              |  11 +-
 apisix/plugins/ai-providers/deepseek.lua          |  13 +-
 apisix/plugins/ai-providers/gemini.lua            |  13 +-
 apisix/plugins/ai-providers/openai-compatible.lua |  27 +-
 apisix/plugins/ai-providers/openai.lua            |  28 +-
 apisix/plugins/ai-providers/openrouter.lua        |  13 +-
 apisix/plugins/ai-providers/vertex-ai.lua         |   9 +
 apisix/plugins/ai-proxy/base.lua                  |   5 +-
 apisix/plugins/ai-proxy/schema.lua                |  51 +-
 docs/en/latest/plugins/ai-proxy-multi.md          |   3 +
 docs/en/latest/plugins/ai-proxy.md                |  28 ++
 docs/zh/latest/plugins/ai-proxy-multi.md          |   3 +
 docs/zh/latest/plugins/ai-proxy.md                |  28 ++
 t/plugin/ai-proxy-request-body-override.t         | 585 ++++++++++++++++++++++
 18 files changed, 840 insertions(+), 31 deletions(-)

diff --git a/apisix/plugins/ai-protocols/init.lua 
b/apisix/plugins/ai-protocols/init.lua
index c5db8742c..67b5ddc73 100644
--- a/apisix/plugins/ai-protocols/init.lua
+++ b/apisix/plugins/ai-protocols/init.lua
@@ -65,6 +65,7 @@ function _M.get(name)
 end
 
 
+
 --- Find a converter that can bridge from client_protocol to a protocol
 -- supported by the driver. Delegates to the converters registry.
 -- @param client_protocol string The detected client protocol
diff --git a/apisix/plugins/ai-providers/aimlapi.lua 
b/apisix/plugins/ai-providers/aimlapi.lua
index 24cf2bbc5..cffb2f223 100644
--- a/apisix/plugins/ai-providers/aimlapi.lua
+++ b/apisix/plugins/ai-providers/aimlapi.lua
@@ -15,12 +15,23 @@
 -- limitations under the License.
 --
 
+local function rewrite_chat_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_tokens == nil then
+            body.max_tokens = override.max_tokens
+        end
+    end
+end
+
 return require("apisix.plugins.ai-providers.base").new(
     {
         host = "api.aimlapi.com",
         port = 443,
         capabilities = {
-            ["openai-chat"] = { path = "/chat/completions" },
+            ["openai-chat"] = {
+                path = "/chat/completions",
+                rewrite_request_body = rewrite_chat_request_body,
+            },
         },
     }
 )
diff --git a/apisix/plugins/ai-providers/anthropic.lua 
b/apisix/plugins/ai-providers/anthropic.lua
index 2e79e5da8..9c5f270f0 100644
--- a/apisix/plugins/ai-providers/anthropic.lua
+++ b/apisix/plugins/ai-providers/anthropic.lua
@@ -15,13 +15,36 @@
 -- limitations under the License.
 --
 
+local function rewrite_chat_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_tokens == nil then
+            body.max_tokens = override.max_tokens
+        end
+    end
+end
+
+
+local function rewrite_messages_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_tokens == nil then
+            body.max_tokens = override.max_tokens
+        end
+    end
+end
+
 return require("apisix.plugins.ai-providers.base").new(
     {
         host = "api.anthropic.com",
         port = 443,
         capabilities = {
-            ["openai-chat"] = { path = "/v1/chat/completions" },
-            ["anthropic-messages"] = { path = "/v1/messages" },
+            ["openai-chat"] = {
+                path = "/v1/chat/completions",
+                rewrite_request_body = rewrite_chat_request_body,
+            },
+            ["anthropic-messages"] = {
+                path = "/v1/messages",
+                rewrite_request_body = rewrite_messages_request_body,
+            },
         },
     }
 )
diff --git a/apisix/plugins/ai-providers/azure-openai.lua 
b/apisix/plugins/ai-providers/azure-openai.lua
index a4c6db00f..0b9684ad3 100644
--- a/apisix/plugins/ai-providers/azure-openai.lua
+++ b/apisix/plugins/ai-providers/azure-openai.lua
@@ -15,12 +15,23 @@
 -- limitations under the License.
 --
 
+local function rewrite_chat_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_tokens == nil then
+            body.max_tokens = override.max_tokens
+        end
+    end
+end
+
 return require("apisix.plugins.ai-providers.base").new(
     {
         port = 443,
         remove_model = true,
         capabilities = {
-            ["openai-chat"] = { path = "/completions" },
+            ["openai-chat"] = {
+                path = "/completions",
+                rewrite_request_body = rewrite_chat_request_body,
+            },
         },
     }
 )
diff --git a/apisix/plugins/ai-providers/base.lua 
b/apisix/plugins/ai-providers/base.lua
index dc2d67f67..b83717e93 100644
--- a/apisix/plugins/ai-providers/base.lua
+++ b/apisix/plugins/ai-providers/base.lua
@@ -173,7 +173,7 @@ function _M.build_request(self, ctx, conf, request_body, 
opts)
                           or opts.target_host or self.host,
     }
 
-    -- Inject model options
+    -- Inject model options (flat overwrite)
     if opts.model_options then
         for opt, val in pairs(opts.model_options) do
             if request_body[opt] ~= nil then
@@ -182,6 +182,15 @@ function _M.build_request(self, ctx, conf, request_body, 
opts)
             request_body[opt] = val
         end
     end
+
+    -- Apply request body override via provider capability hook
+    if opts.override_request_body then
+        local cap = self.capabilities and 
self.capabilities[ctx.ai_target_protocol]
+        if cap and cap.rewrite_request_body then
+            cap.rewrite_request_body(request_body, opts.override_request_body,
+                                     opts.request_body_force_override)
+        end
+    end
     params.body = request_body
 
     if self.remove_model then
diff --git a/apisix/plugins/ai-providers/deepseek.lua 
b/apisix/plugins/ai-providers/deepseek.lua
index 47e835678..bd09fef48 100644
--- a/apisix/plugins/ai-providers/deepseek.lua
+++ b/apisix/plugins/ai-providers/deepseek.lua
@@ -15,12 +15,23 @@
 -- limitations under the License.
 --
 
+local function rewrite_chat_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_tokens == nil then
+            body.max_tokens = override.max_tokens
+        end
+    end
+end
+
 return require("apisix.plugins.ai-providers.base").new(
     {
         host = "api.deepseek.com",
         port = 443,
         capabilities = {
-            ["openai-chat"] = { path = "/chat/completions" },
+            ["openai-chat"] = {
+                path = "/chat/completions",
+                rewrite_request_body = rewrite_chat_request_body,
+            },
         },
     }
 )
diff --git a/apisix/plugins/ai-providers/gemini.lua 
b/apisix/plugins/ai-providers/gemini.lua
index f86be4eed..eb7c23b29 100644
--- a/apisix/plugins/ai-providers/gemini.lua
+++ b/apisix/plugins/ai-providers/gemini.lua
@@ -15,12 +15,23 @@
 -- limitations under the License.
 --
 
+local function rewrite_chat_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_completion_tokens == nil then
+            body.max_completion_tokens = override.max_tokens
+        end
+    end
+end
+
 return require("apisix.plugins.ai-providers.base").new(
     {
         host = "generativelanguage.googleapis.com",
         port = 443,
         capabilities = {
-            ["openai-chat"] = { path = "/v1beta/openai/chat/completions" },
+            ["openai-chat"] = {
+                path = "/v1beta/openai/chat/completions",
+                rewrite_request_body = rewrite_chat_request_body,
+            },
         },
     }
 )
diff --git a/apisix/plugins/ai-providers/openai-compatible.lua 
b/apisix/plugins/ai-providers/openai-compatible.lua
index 3f8a927bd..10d04315b 100644
--- a/apisix/plugins/ai-providers/openai-compatible.lua
+++ b/apisix/plugins/ai-providers/openai-compatible.lua
@@ -15,10 +15,33 @@
 -- limitations under the License.
 --
 
+local function rewrite_chat_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_tokens == nil then
+            body.max_tokens = override.max_tokens
+        end
+    end
+end
+
+
+local function rewrite_responses_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_output_tokens == nil then
+            body.max_output_tokens = override.max_tokens
+        end
+    end
+end
+
 return require("apisix.plugins.ai-providers.base").new({
     capabilities = {
-        ["openai-chat"]       = { path = "/v1/chat/completions" },
-        ["openai-responses"]  = { path = "/v1/responses" },
+        ["openai-chat"]       = {
+            path = "/v1/chat/completions",
+            rewrite_request_body = rewrite_chat_request_body,
+        },
+        ["openai-responses"]  = {
+            path = "/v1/responses",
+            rewrite_request_body = rewrite_responses_request_body,
+        },
         ["openai-embeddings"] = { path = "/v1/embeddings" },
     },
 })
diff --git a/apisix/plugins/ai-providers/openai.lua 
b/apisix/plugins/ai-providers/openai.lua
index a2c1af67f..0b597df2c 100644
--- a/apisix/plugins/ai-providers/openai.lua
+++ b/apisix/plugins/ai-providers/openai.lua
@@ -15,13 +15,37 @@
 -- limitations under the License.
 --
 
+local function rewrite_chat_request_body(body, override, force)
+    if override.max_tokens then
+        if force or (body.max_completion_tokens == nil and body.max_tokens == 
nil) then
+            body.max_completion_tokens = override.max_tokens
+            body.max_tokens = nil
+        end
+    end
+end
+
+
+local function rewrite_responses_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_output_tokens == nil then
+            body.max_output_tokens = override.max_tokens
+        end
+    end
+end
+
 return require("apisix.plugins.ai-providers.base").new(
     {
         host = "api.openai.com",
         port = 443,
         capabilities = {
-            ["openai-chat"]       = { path = "/v1/chat/completions" },
-            ["openai-responses"]  = { path = "/v1/responses" },
+            ["openai-chat"]       = {
+                path = "/v1/chat/completions",
+                rewrite_request_body = rewrite_chat_request_body,
+            },
+            ["openai-responses"]  = {
+                path = "/v1/responses",
+                rewrite_request_body = rewrite_responses_request_body,
+            },
             ["openai-embeddings"] = { path = "/v1/embeddings" },
         },
     }
diff --git a/apisix/plugins/ai-providers/openrouter.lua 
b/apisix/plugins/ai-providers/openrouter.lua
index 7627ea7f7..76075a499 100644
--- a/apisix/plugins/ai-providers/openrouter.lua
+++ b/apisix/plugins/ai-providers/openrouter.lua
@@ -15,12 +15,23 @@
 -- limitations under the License.
 --
 
+local function rewrite_chat_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_tokens == nil then
+            body.max_tokens = override.max_tokens
+        end
+    end
+end
+
 return require("apisix.plugins.ai-providers.base").new(
     {
         host = "openrouter.ai",
         port = 443,
         capabilities = {
-            ["openai-chat"] = { path = "/api/v1/chat/completions" },
+            ["openai-chat"] = {
+                path = "/api/v1/chat/completions",
+                rewrite_request_body = rewrite_chat_request_body,
+            },
         },
     }
 )
diff --git a/apisix/plugins/ai-providers/vertex-ai.lua 
b/apisix/plugins/ai-providers/vertex-ai.lua
index 58a181dc5..30a38f675 100644
--- a/apisix/plugins/ai-providers/vertex-ai.lua
+++ b/apisix/plugins/ai-providers/vertex-ai.lua
@@ -54,6 +54,14 @@ local function get_node(instance_conf)
 end
 
 
+local function rewrite_chat_request_body(body, override, force)
+    if override.max_tokens then
+        if force or body.max_completion_tokens == nil then
+            body.max_completion_tokens = override.max_tokens
+        end
+    end
+end
+
 return require("apisix.plugins.ai-providers.base").new({
     get_node = get_node,
     capabilities = {
@@ -66,6 +74,7 @@ return require("apisix.plugins.ai-providers.base").new({
                     return get_chat_completions_path(conf.project_id, 
conf.region)
                 end
             end,
+            rewrite_request_body = rewrite_chat_request_body,
         },
         ["vertex-predict"] = {
             host = function(conf)
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 5ce2658e4..7ba143b10 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -125,8 +125,11 @@ function _M.before_proxy(conf, ctx, on_error)
             model_options = ai_instance.options,
             conf = ai_instance.provider_conf or {},
             auth = ai_instance.auth,
+            override_request_body =
+                core.table.try_read_attr(ai_instance, "override", 
"request_body"),
+            request_body_force_override =
+                core.table.try_read_attr(ai_instance, "override", 
"request_body_force_override"),
         }
-
         -- Step 1: Route client protocol to driver capability
         local client_protocol = ctx.ai_client_protocol
         local client_proto = protocols.get(client_protocol)
diff --git a/apisix/plugins/ai-proxy/schema.lua 
b/apisix/plugins/ai-proxy/schema.lua
index 6c9dc7037..5dd1ded26 100644
--- a/apisix/plugins/ai-proxy/schema.lua
+++ b/apisix/plugins/ai-proxy/schema.lua
@@ -72,6 +72,37 @@ local model_options_schema = {
     additionalProperties = true,
 }
 
+local override_schema = {
+    type = "object",
+    properties = {
+        endpoint = {
+            type = "string",
+            description = "To be specified to override the endpoint of the AI 
Instance",
+        },
+        request_body = {
+            type = "object",
+            properties = {
+                max_tokens = {
+                    type = "integer",
+                    minimum = 1,
+                    description = "Maximum number of output tokens. APISIX 
automatically "
+                        .. "maps this to the correct field name for the target 
provider "
+                        .. "(e.g. max_completion_tokens for OpenAI, 
max_output_tokens "
+                        .. "for Responses API).",
+                },
+            },
+            additionalProperties = false,
+        },
+        request_body_force_override = {
+            type = "boolean",
+            default = false,
+            description = "When false (default), client request body fields 
take "
+                .. "priority and override values only fill in missing fields. "
+                .. "When true, override values forcefully overwrite client 
fields.",
+        },
+    },
+}
+
 local provider_vertex_ai_schema = {
     type = "object",
     properties = {
@@ -115,15 +146,7 @@ local ai_instance_schema = {
             },
             auth = auth_schema,
             options = model_options_schema,
-            override = {
-                type = "object",
-                properties = {
-                    endpoint = {
-                        type = "string",
-                        description = "To be specified to override the 
endpoint of the AI Instance",
-                    },
-                },
-            },
+            override = override_schema,
             checks = {
                 type = "object",
                 properties = {
@@ -208,15 +231,7 @@ _M.ai_proxy_schema = {
         },
         keepalive_pool = {type = "integer", minimum = 1, default = 30},
         ssl_verify = {type = "boolean", default = true },
-        override = {
-            type = "object",
-            properties = {
-                endpoint = {
-                    type = "string",
-                    description = "To be specified to override the endpoint of 
the AI Instance",
-                },
-            },
-        },
+        override = override_schema,
     },
     required = {"provider", "auth"},
     encrypt_fields = {"auth.header", "auth.query", 
"auth.gcp.service_account_json"},
diff --git a/docs/en/latest/plugins/ai-proxy-multi.md 
b/docs/en/latest/plugins/ai-proxy-multi.md
index 86f3ba9fc..7905bb921 100644
--- a/docs/en/latest/plugins/ai-proxy-multi.md
+++ b/docs/en/latest/plugins/ai-proxy-multi.md
@@ -81,6 +81,9 @@ In addition, the Plugin also supports logging LLM request 
information in the acc
 | logging.payloads                    | boolean        | False    | false      
                     |              | If true, log request and response 
payload. |
 | instances.override                    | object         | False    |          
                         |              | Override setting. |
 | instances.override.endpoint           | string         | False    |          
                         |              | LLM provider endpoint to replace the 
default endpoint with. If not configured, the Plugin uses the default OpenAI 
endpoint `https://api.openai.com/v1/chat/completions`. |
+| instances.override.request_body       | object         | False    |          
                         |              | Request body overrides. See 
[Provider-aware `max_tokens` 
mapping](./ai-proxy.md#provider-aware-max_tokens-mapping) in the `ai-proxy` 
documentation for how the contained fields are forwarded to each provider. |
+| instances.override.request_body.max_tokens | integer    | False    |         
                          | ≥ 1          | Maximum number of output tokens. 
APISIX automatically maps this to the provider-specific field name (e.g. 
`max_completion_tokens` for OpenAI Chat Completions, `max_output_tokens` for 
OpenAI Responses API, `max_tokens` for most other providers). By default, 
client request fields take priority and the override value only fills in when 
the client did not set it; set `ins [...]
+| instances.override.request_body_force_override | boolean | False | false |   
                         | When `false` (default), client request body fields 
take priority and `instances.override.request_body` values only fill in missing 
fields. When `true`, `instances.override.request_body` values forcefully 
overwrite client request body fields. |
 | instances.checks                              | object         | False    |  
                                 |              | Health check configurations. 
Note that at the moment, OpenAI, DeepSeek, and AIMLAPI do not provide an 
official health check endpoint. Other LLM services that you can configure under 
`openai-compatible` provider may have available health check endpoints. |
 | instances.checks.active                       | object         | True     |  
                                 |              | Active health check 
configurations. |
 | instances.checks.active.type                  | string         | False    | 
http                            | [http, https, tcp] | Type of health check 
connection. |
diff --git a/docs/en/latest/plugins/ai-proxy.md 
b/docs/en/latest/plugins/ai-proxy.md
index b2aed0c0a..53564a064 100644
--- a/docs/en/latest/plugins/ai-proxy.md
+++ b/docs/en/latest/plugins/ai-proxy.md
@@ -66,6 +66,9 @@ In addition, the Plugin also supports logging LLM request 
information in the acc
 | options.model   | string  | False    |         |                             
             | Name of the LLM model, such as `gpt-4` or `gpt-3.5`. Refer to 
the LLM provider's API documentation for available models. |
 | override        | object  | False    |         |                             
             | Override setting. |
 | override.endpoint | string | False    |         |                            
              | Custom LLM provider endpoint, required when `provider` is 
`openai-compatible`. |
+| override.request_body | object | False  |         |                          
                | Request body overrides. See [Provider-aware `max_tokens` 
mapping](#provider-aware-max_tokens-mapping) for how the contained fields are 
forwarded to each provider. |
+| override.request_body.max_tokens | integer | False  |         | ≥ 1          
                      | Maximum number of output tokens. APISIX automatically 
maps this to the provider-specific field name (e.g. `max_completion_tokens` for 
OpenAI Chat Completions, `max_output_tokens` for OpenAI Responses API, 
`max_tokens` for most other providers). By default, client request fields take 
priority and the override value only fills in when the client did not set it; 
set `override.request_body_ [...]
+| override.request_body_force_override | boolean | False | false |             
                       | When `false` (default), client request body fields 
take priority and `override.request_body` values only fill in missing fields. 
When `true`, `override.request_body` values forcefully overwrite client request 
body fields. |
 | logging        | object  | False    |         |                              
            | Logging configurations. Does not affect `error.log`. |
 | logging.summaries | boolean | False | false |                                
          | If true, logs request LLM model, duration, request, and response 
tokens. |
 | logging.payloads  | boolean | False | false |                                
          | If true, logs request and response payload. |
@@ -77,6 +80,31 @@ In addition, the Plugin also supports logging LLM request 
information in the acc
 | keepalive_pool | integer | False    | 30       |                             
             | Keepalive pool size for the LLM service connection. |
 | ssl_verify     | boolean | False    | true   |                               
           | If true, verifies the LLM service's certificate. |
 
+## Provider-aware `max_tokens` mapping
+
+LLM providers and API endpoints disagree on the field name used to cap the 
number of output tokens. Configuring `override.request_body.max_tokens` lets 
you set a single value in APISIX and have it forwarded under the field name 
expected by each provider/endpoint.
+
+The table below shows, for each `provider` and target API endpoint, the 
upstream field name APISIX rewrites `max_tokens` to. A `—` means the provider 
does not expose that endpoint.
+
+| Provider            | OpenAI Chat Completions      | OpenAI Responses API   
| Anthropic Messages |
+| ------------------- | ---------------------------- | ---------------------- 
| ------------------ |
+| `openai`            | `max_completion_tokens` ¹    | `max_output_tokens`    
| —                  |
+| `openai-compatible` | `max_tokens`                 | `max_output_tokens`    
| —                  |
+| `azure-openai`      | `max_tokens`                 | —                      
| —                  |
+| `deepseek`          | `max_tokens`                 | —                      
| —                  |
+| `aimlapi`           | `max_tokens`                 | —                      
| —                  |
+| `openrouter`        | `max_tokens`                 | —                      
| —                  |
+| `gemini`            | `max_completion_tokens`      | —                      
| —                  |
+| `vertex-ai`         | `max_completion_tokens`      | —                      
| —                  |
+| `anthropic`         | `max_tokens`                 | —                      
| `max_tokens`       |
+
+¹ When `provider` is `openai` and the target is the Chat Completions endpoint, 
APISIX always rewrites to `max_completion_tokens` and removes any `max_tokens` 
field from the request body — `max_tokens` has been deprecated in favor of 
`max_completion_tokens` by OpenAI.
+
+Priority between client request and override is controlled by 
`override.request_body_force_override`:
+
+- `false` (default): if the client request body already sets the corresponding 
field, it is preserved; the override value only fills in when the field is 
missing.
+- `true`: the override value forcefully overwrites the field in the client 
request body.
+
 ## Examples
 
 The examples below demonstrate how you can configure `ai-proxy` for different 
scenarios.
diff --git a/docs/zh/latest/plugins/ai-proxy-multi.md 
b/docs/zh/latest/plugins/ai-proxy-multi.md
index d7f57d5df..137fe9ab2 100644
--- a/docs/zh/latest/plugins/ai-proxy-multi.md
+++ b/docs/zh/latest/plugins/ai-proxy-multi.md
@@ -78,6 +78,9 @@ import TabItem from '@theme/TabItem';
 | instances.options.model             | string         | 否    |                
                   |              | LLM 模型的名称,如 `gpt-4` 或 
`gpt-3.5`。有关更多可用模型,请参阅您的 LLM 提供商的 API 文档。 |
 | instances.override                  | object         | 否    |                
                   |              | 覆盖设置。 |
 | instances.override.endpoint         | string         | 否    |                
                   |              | 用于替换默认端点的 LLM 提供商端点。如果未配置,插件使用默认的 OpenAI 端点 
`https://api.openai.com/v1/chat/completions`。 |
+| instances.override.request_body     | object         | 否    |                
                   |              | 请求体覆盖配置。请参阅 `ai-proxy` 文档中的 [`max_tokens` 
字段映射](./ai-proxy.md#provider-aware-max_tokens-mapping) 章节,了解其内部字段如何转发到各个上游服务商。 |
+| instances.override.request_body.max_tokens | integer | 否    |                
                   | ≥ 1          | 最大输出 token 数。APISIX 
会自动将该值映射为各上游服务商对应的字段名(例如 OpenAI Chat Completions 使用 
`max_completion_tokens`、OpenAI Responses API 使用 `max_output_tokens`、其他大多数服务商使用 
`max_tokens`)。默认情况下客户端请求中的对应字段优先,仅当客户端未设置时该 override 值才会生效;将 
`instances.override.request_body_force_override` 设置为 `true` 可强制覆盖客户端值。 |
+| instances.override.request_body_force_override | boolean | 否 | false |       
                     | 为 
`false`(默认)时,客户端请求体中的字段优先,`instances.override.request_body` 仅补充缺失字段。为 `true` 
时,`instances.override.request_body` 的值强制覆盖客户端请求体中的同名字段。 |
 | logging                             | object         | 否    |                
                   |              | 日志配置。不影响 `error.log`。 |
 | logging.summaries                   | boolean        | 否    | false          
                 |              | 如果为 true,记录请求 LLM 模型、持续时间、请求和响应令牌。 |
 | logging.payloads                    | boolean        | 否    | false          
                 |              | 如果为 true,记录请求和响应负载。 |
diff --git a/docs/zh/latest/plugins/ai-proxy.md 
b/docs/zh/latest/plugins/ai-proxy.md
index 3d2354ada..3b7f7ed94 100644
--- a/docs/zh/latest/plugins/ai-proxy.md
+++ b/docs/zh/latest/plugins/ai-proxy.md
@@ -66,6 +66,9 @@ description: ai-proxy 插件通过将插件配置转换为所需的请求格式
 | options.model   | string  | 否    |         |                                 
         | LLM 模型的名称,如 `gpt-4` 或 `gpt-3.5`。请参阅 LLM 提供商的 API 文档以了解可用模型。 |
 | override        | object  | 否    |         |                                 
         | 覆盖设置。 |
 | override.endpoint | string | 否    |         |                                
          | 自定义 LLM 提供商端点,当 `provider` 为 `openai-compatible` 时必需。 |
+| override.request_body | object | 否  |         |                              
            | 请求体覆盖配置。请参阅 [`max_tokens` 
字段映射](#provider-aware-max_tokens-mapping) 章节,了解其内部字段如何转发到各个上游服务商。 |
+| override.request_body.max_tokens | integer | 否  |         | ≥ 1              
                  | 最大输出 token 数。APISIX 会自动将该值映射为各上游服务商对应的字段名(例如 OpenAI Chat 
Completions 使用 `max_completion_tokens`、OpenAI Responses API 使用 
`max_output_tokens`、其他大多数服务商使用 `max_tokens`)。默认情况下客户端请求中的对应字段优先,仅当客户端未设置时该 
override 值才会生效;将 `override.request_body_force_override` 设置为 `true` 可强制覆盖客户端值。 |
+| override.request_body_force_override | boolean | 否 | false |                 
                   | 为 `false`(默认)时,客户端请求体中的字段优先,`override.request_body` 
仅补充缺失字段。为 `true` 时,`override.request_body` 的值强制覆盖客户端请求体中的同名字段。 |
 | logging        | object  | 否    |         |                                  
        | 日志配置。不影响 `error.log`。 |
 | logging.summaries | boolean | 否 | false |                                    
      | 如果为 true,记录请求 LLM 模型、持续时间、请求和响应令牌。 |
 | logging.payloads  | boolean | 否 | false |                                    
      | 如果为 true,记录请求和响应负载。 |
@@ -77,6 +80,31 @@ description: ai-proxy 插件通过将插件配置转换为所需的请求格式
 | keepalive_pool | integer | 否    | 30       |                                 
         | LLM 服务连接的保活池大小。 |
 | ssl_verify     | boolean | 否    | true   |                                   
       | 如果为 true,验证 LLM 服务的证书。 |
 
+## Provider-aware `max_tokens` mapping
+
+不同的 LLM 服务商和 API 端点使用不同的字段名来限制输出 token 数。通过配置 
`override.request_body.max_tokens`,您只需在 APISIX 中设置一个统一的值,APISIX 会根据上游服务商和 API 
端点将其转发为对应的字段名。
+
+下表展示了在不同 `provider` 和目标 API 端点下,APISIX 会将 `max_tokens` 映射为哪个上游字段名。`—` 
表示该服务商不支持对应的端点。
+
+| Provider            | OpenAI Chat Completions      | OpenAI Responses API   
| Anthropic Messages |
+| ------------------- | ---------------------------- | ---------------------- 
| ------------------ |
+| `openai`            | `max_completion_tokens` ¹    | `max_output_tokens`    
| —                  |
+| `openai-compatible` | `max_tokens`                 | `max_output_tokens`    
| —                  |
+| `azure-openai`      | `max_tokens`                 | —                      
| —                  |
+| `deepseek`          | `max_tokens`                 | —                      
| —                  |
+| `aimlapi`           | `max_tokens`                 | —                      
| —                  |
+| `openrouter`        | `max_tokens`                 | —                      
| —                  |
+| `gemini`            | `max_completion_tokens`      | —                      
| —                  |
+| `vertex-ai`         | `max_completion_tokens`      | —                      
| —                  |
+| `anthropic`         | `max_tokens`                 | —                      
| `max_tokens`       |
+
+¹ 当 `provider` 为 `openai` 且目标为 Chat Completions 端点时,APISIX 始终改写为 
`max_completion_tokens`,并删除请求体中已有的 `max_tokens` 字段——OpenAI 已弃用 `max_tokens`,改用 
`max_completion_tokens`。
+
+客户端请求与 override 之间的优先级由 `override.request_body_force_override` 控制:
+
+- `false`(默认):如果客户端请求体已经设置了对应字段,则保留客户端值;override 值仅在该字段缺失时补充。
+- `true`:override 值强制覆盖客户端请求体中的同名字段。
+
 ## 示例
 
 以下示例演示了如何为不同场景配置 `ai-proxy`。
diff --git a/t/plugin/ai-proxy-request-body-override.t 
b/t/plugin/ai-proxy-request-body-override.t
new file mode 100644
index 000000000..779ead5c3
--- /dev/null
+++ b/t/plugin/ai-proxy-request-body-override.t
@@ -0,0 +1,585 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+BEGIN {
+    $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
+use t::APISIX 'no_plan';
+
+log_level("info");
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+add_block_preprocessor(sub {
+    my ($block) = @_;
+
+    if (!defined $block->request) {
+        $block->set_value("request", "GET /t");
+    }
+
+    my $http_config = $block->http_config // <<_EOC_;
+        # Upstream that echoes the request body it receives so the test can
+        # assert exactly what was forwarded by ai-proxy.
+        server {
+            server_name echo-openai;
+            listen 6732;
+            default_type 'application/json';
+
+            location /v1/chat/completions {
+                content_by_lua_block {
+                    local json = require("cjson.safe")
+                    ngx.req.read_body()
+                    local raw = ngx.req.get_body_data() or ""
+                    ngx.status = 200
+                    ngx.say(json.encode({
+                        id = "chatcmpl-1",
+                        object = "chat.completion",
+                        model = "echo",
+                        choices = {{
+                            index = 0,
+                            message = { role = "assistant", content = raw },
+                            finish_reason = "stop",
+                        }},
+                        usage = { prompt_tokens = 1, completion_tokens = 1, 
total_tokens = 2 },
+                    }))
+                }
+            }
+
+            location /v1/responses {
+                content_by_lua_block {
+                    local json = require("cjson.safe")
+                    ngx.req.read_body()
+                    local raw = ngx.req.get_body_data() or ""
+                    ngx.status = 200
+                    ngx.say(json.encode({
+                        id = "resp_1",
+                        object = "response",
+                        created_at = 1,
+                        model = "echo",
+                        output = {{
+                            type = "message",
+                            role = "assistant",
+                            content = {{ type = "output_text", text = raw }},
+                        }},
+                        usage = { input_tokens = 1, output_tokens = 1, 
total_tokens = 2 },
+                    }))
+                }
+            }
+
+            location /v1/messages {
+                content_by_lua_block {
+                    local json = require("cjson.safe")
+                    ngx.req.read_body()
+                    local raw = ngx.req.get_body_data() or ""
+                    ngx.status = 200
+                    ngx.say(json.encode({
+                        id = "msg_1",
+                        type = "message",
+                        role = "assistant",
+                        model = "echo",
+                        content = {{ type = "text", text = raw }},
+                        stop_reason = "end_turn",
+                        usage = { input_tokens = 1, output_tokens = 1 },
+                    }))
+                }
+            }
+        }
+_EOC_
+
+    $block->set_value("http_config", $http_config);
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: schema rejects unknown fields in request_body
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";,
+                                "request_body": {
+                                    "temperature": 0.5
+                                }
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            ngx.status = code
+            ngx.print(body)
+        }
+    }
+--- error_code: 400
+--- response_body_like: .*additional properties forbidden.*
+
+
+
+=== TEST 2: openai provider maps max_tokens to max_completion_tokens
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";,
+                                "request_body": {
+                                    "max_tokens": 555
+                                }
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = '{"messages":[{"role":"user","content":"hi"}]}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.choices[1].message.content)
+            ngx.say("max_completion_tokens=", echoed.max_completion_tokens)
+        }
+    }
+--- response_body
+max_completion_tokens=555
+
+
+
+=== TEST 3: openai-compatible provider maps max_tokens to max_tokens
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai-compatible",
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";,
+                                "request_body": {
+                                    "max_tokens": 444
+                                }
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = '{"messages":[{"role":"user","content":"hi"}]}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.choices[1].message.content)
+            ngx.say("max_tokens=", echoed.max_tokens)
+        }
+    }
+--- response_body
+max_tokens=444
+
+
+
+=== TEST 4: openai responses API maps max_tokens to max_output_tokens
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/v1/responses",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";,
+                                "request_body": {
+                                    "max_tokens": 333
+                                }
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/v1/responses", {
+                method = "POST",
+                body = '{"model":"gpt-4o","input":"hello"}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.output[1].content[1].text)
+            ngx.say("max_output_tokens=", echoed.max_output_tokens)
+        }
+    }
+--- response_body
+max_output_tokens=333
+
+
+
+=== TEST 5: ai-proxy-multi per-instance override
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy-multi": {
+                            "instances": [{
+                                "name": "test",
+                                "provider": "openai",
+                                "weight": 1,
+                                "auth": { "header": { "Authorization": "Bearer 
t" } },
+                                "override": {
+                                    "endpoint": "http://localhost:6732";,
+                                    "request_body": {
+                                        "max_tokens": 222
+                                    }
+                                }
+                            }],
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = '{"messages":[{"role":"user","content":"hi"}]}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.choices[1].message.content)
+            ngx.say("max_completion_tokens=", echoed.max_completion_tokens)
+        }
+    }
+--- response_body
+max_completion_tokens=222
+
+
+
+=== TEST 6: cross-protocol: anthropic client to openai provider, override 
applies to target protocol
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/v1/messages",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";,
+                                "request_body_force_override": true,
+                                "request_body": {
+                                    "max_tokens": 77
+                                }
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/v1/messages", {
+                method = "POST",
+                body = 
'{"model":"claude-3","max_tokens":10,"messages":[{"role":"user","content":"hi"}]}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            ngx.status = res.status
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.content[1].text)
+            -- openai provider maps to max_completion_tokens
+            ngx.say("max_completion_tokens=", echoed.max_completion_tokens,
+                    " has_messages=", tostring(type(echoed.messages) == 
"table"))
+        }
+    }
+--- response_body
+max_completion_tokens=77 has_messages=true
+
+
+
+=== TEST 7: default mode - client value takes priority
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai-compatible",
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";,
+                                "request_body": {
+                                    "max_tokens": 555
+                                }
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            -- Client sends max_tokens=999 which should NOT be overwritten
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = 
'{"messages":[{"role":"user","content":"hi"}],"max_tokens":999}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.choices[1].message.content)
+            -- max_tokens from client (999) wins
+            ngx.say("max_tokens=", echoed.max_tokens)
+        }
+    }
+--- response_body
+max_tokens=999
+
+
+
+=== TEST 8: force_override mode - override forcefully overwrites client params
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai-compatible",
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";,
+                                "request_body_force_override": true,
+                                "request_body": {
+                                    "max_tokens": 555
+                                }
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            -- Client sends max_tokens=999 which SHOULD be overwritten
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = 
'{"messages":[{"role":"user","content":"hi"}],"max_tokens":999}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.choices[1].message.content)
+            -- max_tokens from override (555) wins over client (999)
+            ngx.say("max_tokens=", echoed.max_tokens)
+        }
+    }
+--- response_body
+max_tokens=555
+
+
+
+=== TEST 9: default mode fills missing field
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai-compatible",
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";,
+                                "request_body": {
+                                    "max_tokens": 555
+                                }
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            -- Client does NOT send max_tokens; override should fill it in
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = '{"messages":[{"role":"user","content":"hi"}]}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local cjson = require("cjson.safe")
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.choices[1].message.content)
+            ngx.say("max_tokens=", echoed.max_tokens)
+        }
+    }
+--- response_body
+max_tokens=555
+
+
+
+=== TEST 10: openai chat - deprecated max_tokens in body is respected in 
default mode and cleared in force mode
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+
+            -- Route with default mode (no force)
+            local code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "model": { "name": "gpt-4" },
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";,
+                                "request_body": {
+                                    "max_tokens": 999
+                                }
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            local http = require("resty.http").new()
+            local cjson = require("cjson.safe")
+
+            -- Client sends deprecated max_tokens=200; default mode should NOT 
override
+            local res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = 
'{"messages":[{"role":"user","content":"hi"}],"max_tokens":200}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            local body = cjson.decode(res.body)
+            local echoed = cjson.decode(body.choices[1].message.content)
+            ngx.say("default: max_tokens=", echoed.max_tokens,
+                    " max_completion_tokens=", echoed.max_completion_tokens)
+
+            -- Switch to force mode
+            code = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/chat",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "model": { "name": "gpt-4" },
+                            "auth": { "header": { "Authorization": "Bearer t" 
} },
+                            "override": {
+                                "endpoint": "http://localhost:6732";,
+                                "request_body": {
+                                    "max_tokens": 999
+                                },
+                                "request_body_force_override": true
+                            },
+                            "ssl_verify": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then ngx.status = code; return end
+
+            ngx.sleep(0.5)
+
+            -- Client sends deprecated max_tokens=200; force mode should clear 
it and set max_completion_tokens
+            res = assert(http:request_uri("http://127.0.0.1:"; .. 
ngx.var.server_port .. "/chat", {
+                method = "POST",
+                body = 
'{"messages":[{"role":"user","content":"hi"}],"max_tokens":200}',
+                headers = { ["Content-Type"] = "application/json" },
+            }))
+            body = cjson.decode(res.body)
+            echoed = cjson.decode(body.choices[1].message.content)
+            ngx.say("force: max_tokens=", echoed.max_tokens,
+                    " max_completion_tokens=", echoed.max_completion_tokens)
+        }
+    }
+--- response_body
+default: max_tokens=200 max_completion_tokens=nil
+force: max_tokens=nil max_completion_tokens=999

Reply via email to