This is an automated email from the ASF dual-hosted git repository.
Baoyuantop pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new 0c0fa801a feat(ai-proxy): add provider-aware max_tokens override with
priority control (#13251)
0c0fa801a is described below
commit 0c0fa801a1a0aa953cb69ebe700c4a69727338b1
Author: Nic <[email protected]>
AuthorDate: Tue Apr 21 11:14:16 2026 +0800
feat(ai-proxy): add provider-aware max_tokens override with priority
control (#13251)
---
apisix/plugins/ai-protocols/init.lua | 1 +
apisix/plugins/ai-providers/aimlapi.lua | 13 +-
apisix/plugins/ai-providers/anthropic.lua | 27 +-
apisix/plugins/ai-providers/azure-openai.lua | 13 +-
apisix/plugins/ai-providers/base.lua | 11 +-
apisix/plugins/ai-providers/deepseek.lua | 13 +-
apisix/plugins/ai-providers/gemini.lua | 13 +-
apisix/plugins/ai-providers/openai-compatible.lua | 27 +-
apisix/plugins/ai-providers/openai.lua | 28 +-
apisix/plugins/ai-providers/openrouter.lua | 13 +-
apisix/plugins/ai-providers/vertex-ai.lua | 9 +
apisix/plugins/ai-proxy/base.lua | 5 +-
apisix/plugins/ai-proxy/schema.lua | 51 +-
docs/en/latest/plugins/ai-proxy-multi.md | 3 +
docs/en/latest/plugins/ai-proxy.md | 28 ++
docs/zh/latest/plugins/ai-proxy-multi.md | 3 +
docs/zh/latest/plugins/ai-proxy.md | 28 ++
t/plugin/ai-proxy-request-body-override.t | 585 ++++++++++++++++++++++
18 files changed, 840 insertions(+), 31 deletions(-)
diff --git a/apisix/plugins/ai-protocols/init.lua
b/apisix/plugins/ai-protocols/init.lua
index c5db8742c..67b5ddc73 100644
--- a/apisix/plugins/ai-protocols/init.lua
+++ b/apisix/plugins/ai-protocols/init.lua
@@ -65,6 +65,7 @@ function _M.get(name)
end
+
--- Find a converter that can bridge from client_protocol to a protocol
-- supported by the driver. Delegates to the converters registry.
-- @param client_protocol string The detected client protocol
diff --git a/apisix/plugins/ai-providers/aimlapi.lua
b/apisix/plugins/ai-providers/aimlapi.lua
index 24cf2bbc5..cffb2f223 100644
--- a/apisix/plugins/ai-providers/aimlapi.lua
+++ b/apisix/plugins/ai-providers/aimlapi.lua
@@ -15,12 +15,23 @@
-- limitations under the License.
--
+local function rewrite_chat_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_tokens == nil then
+ body.max_tokens = override.max_tokens
+ end
+ end
+end
+
return require("apisix.plugins.ai-providers.base").new(
{
host = "api.aimlapi.com",
port = 443,
capabilities = {
- ["openai-chat"] = { path = "/chat/completions" },
+ ["openai-chat"] = {
+ path = "/chat/completions",
+ rewrite_request_body = rewrite_chat_request_body,
+ },
},
}
)
diff --git a/apisix/plugins/ai-providers/anthropic.lua
b/apisix/plugins/ai-providers/anthropic.lua
index 2e79e5da8..9c5f270f0 100644
--- a/apisix/plugins/ai-providers/anthropic.lua
+++ b/apisix/plugins/ai-providers/anthropic.lua
@@ -15,13 +15,36 @@
-- limitations under the License.
--
+local function rewrite_chat_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_tokens == nil then
+ body.max_tokens = override.max_tokens
+ end
+ end
+end
+
+
+local function rewrite_messages_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_tokens == nil then
+ body.max_tokens = override.max_tokens
+ end
+ end
+end
+
return require("apisix.plugins.ai-providers.base").new(
{
host = "api.anthropic.com",
port = 443,
capabilities = {
- ["openai-chat"] = { path = "/v1/chat/completions" },
- ["anthropic-messages"] = { path = "/v1/messages" },
+ ["openai-chat"] = {
+ path = "/v1/chat/completions",
+ rewrite_request_body = rewrite_chat_request_body,
+ },
+ ["anthropic-messages"] = {
+ path = "/v1/messages",
+ rewrite_request_body = rewrite_messages_request_body,
+ },
},
}
)
diff --git a/apisix/plugins/ai-providers/azure-openai.lua
b/apisix/plugins/ai-providers/azure-openai.lua
index a4c6db00f..0b9684ad3 100644
--- a/apisix/plugins/ai-providers/azure-openai.lua
+++ b/apisix/plugins/ai-providers/azure-openai.lua
@@ -15,12 +15,23 @@
-- limitations under the License.
--
+local function rewrite_chat_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_tokens == nil then
+ body.max_tokens = override.max_tokens
+ end
+ end
+end
+
return require("apisix.plugins.ai-providers.base").new(
{
port = 443,
remove_model = true,
capabilities = {
- ["openai-chat"] = { path = "/completions" },
+ ["openai-chat"] = {
+ path = "/completions",
+ rewrite_request_body = rewrite_chat_request_body,
+ },
},
}
)
diff --git a/apisix/plugins/ai-providers/base.lua
b/apisix/plugins/ai-providers/base.lua
index dc2d67f67..b83717e93 100644
--- a/apisix/plugins/ai-providers/base.lua
+++ b/apisix/plugins/ai-providers/base.lua
@@ -173,7 +173,7 @@ function _M.build_request(self, ctx, conf, request_body,
opts)
or opts.target_host or self.host,
}
- -- Inject model options
+ -- Inject model options (flat overwrite)
if opts.model_options then
for opt, val in pairs(opts.model_options) do
if request_body[opt] ~= nil then
@@ -182,6 +182,15 @@ function _M.build_request(self, ctx, conf, request_body,
opts)
request_body[opt] = val
end
end
+
+ -- Apply request body override via provider capability hook
+ if opts.override_request_body then
+ local cap = self.capabilities and
self.capabilities[ctx.ai_target_protocol]
+ if cap and cap.rewrite_request_body then
+ cap.rewrite_request_body(request_body, opts.override_request_body,
+ opts.request_body_force_override)
+ end
+ end
params.body = request_body
if self.remove_model then
diff --git a/apisix/plugins/ai-providers/deepseek.lua
b/apisix/plugins/ai-providers/deepseek.lua
index 47e835678..bd09fef48 100644
--- a/apisix/plugins/ai-providers/deepseek.lua
+++ b/apisix/plugins/ai-providers/deepseek.lua
@@ -15,12 +15,23 @@
-- limitations under the License.
--
+local function rewrite_chat_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_tokens == nil then
+ body.max_tokens = override.max_tokens
+ end
+ end
+end
+
return require("apisix.plugins.ai-providers.base").new(
{
host = "api.deepseek.com",
port = 443,
capabilities = {
- ["openai-chat"] = { path = "/chat/completions" },
+ ["openai-chat"] = {
+ path = "/chat/completions",
+ rewrite_request_body = rewrite_chat_request_body,
+ },
},
}
)
diff --git a/apisix/plugins/ai-providers/gemini.lua
b/apisix/plugins/ai-providers/gemini.lua
index f86be4eed..eb7c23b29 100644
--- a/apisix/plugins/ai-providers/gemini.lua
+++ b/apisix/plugins/ai-providers/gemini.lua
@@ -15,12 +15,23 @@
-- limitations under the License.
--
+local function rewrite_chat_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_completion_tokens == nil then
+ body.max_completion_tokens = override.max_tokens
+ end
+ end
+end
+
return require("apisix.plugins.ai-providers.base").new(
{
host = "generativelanguage.googleapis.com",
port = 443,
capabilities = {
- ["openai-chat"] = { path = "/v1beta/openai/chat/completions" },
+ ["openai-chat"] = {
+ path = "/v1beta/openai/chat/completions",
+ rewrite_request_body = rewrite_chat_request_body,
+ },
},
}
)
diff --git a/apisix/plugins/ai-providers/openai-compatible.lua
b/apisix/plugins/ai-providers/openai-compatible.lua
index 3f8a927bd..10d04315b 100644
--- a/apisix/plugins/ai-providers/openai-compatible.lua
+++ b/apisix/plugins/ai-providers/openai-compatible.lua
@@ -15,10 +15,33 @@
-- limitations under the License.
--
+local function rewrite_chat_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_tokens == nil then
+ body.max_tokens = override.max_tokens
+ end
+ end
+end
+
+
+local function rewrite_responses_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_output_tokens == nil then
+ body.max_output_tokens = override.max_tokens
+ end
+ end
+end
+
return require("apisix.plugins.ai-providers.base").new({
capabilities = {
- ["openai-chat"] = { path = "/v1/chat/completions" },
- ["openai-responses"] = { path = "/v1/responses" },
+ ["openai-chat"] = {
+ path = "/v1/chat/completions",
+ rewrite_request_body = rewrite_chat_request_body,
+ },
+ ["openai-responses"] = {
+ path = "/v1/responses",
+ rewrite_request_body = rewrite_responses_request_body,
+ },
["openai-embeddings"] = { path = "/v1/embeddings" },
},
})
diff --git a/apisix/plugins/ai-providers/openai.lua
b/apisix/plugins/ai-providers/openai.lua
index a2c1af67f..0b597df2c 100644
--- a/apisix/plugins/ai-providers/openai.lua
+++ b/apisix/plugins/ai-providers/openai.lua
@@ -15,13 +15,37 @@
-- limitations under the License.
--
+local function rewrite_chat_request_body(body, override, force)
+ if override.max_tokens then
+ if force or (body.max_completion_tokens == nil and body.max_tokens ==
nil) then
+ body.max_completion_tokens = override.max_tokens
+ body.max_tokens = nil
+ end
+ end
+end
+
+
+local function rewrite_responses_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_output_tokens == nil then
+ body.max_output_tokens = override.max_tokens
+ end
+ end
+end
+
return require("apisix.plugins.ai-providers.base").new(
{
host = "api.openai.com",
port = 443,
capabilities = {
- ["openai-chat"] = { path = "/v1/chat/completions" },
- ["openai-responses"] = { path = "/v1/responses" },
+ ["openai-chat"] = {
+ path = "/v1/chat/completions",
+ rewrite_request_body = rewrite_chat_request_body,
+ },
+ ["openai-responses"] = {
+ path = "/v1/responses",
+ rewrite_request_body = rewrite_responses_request_body,
+ },
["openai-embeddings"] = { path = "/v1/embeddings" },
},
}
diff --git a/apisix/plugins/ai-providers/openrouter.lua
b/apisix/plugins/ai-providers/openrouter.lua
index 7627ea7f7..76075a499 100644
--- a/apisix/plugins/ai-providers/openrouter.lua
+++ b/apisix/plugins/ai-providers/openrouter.lua
@@ -15,12 +15,23 @@
-- limitations under the License.
--
+local function rewrite_chat_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_tokens == nil then
+ body.max_tokens = override.max_tokens
+ end
+ end
+end
+
return require("apisix.plugins.ai-providers.base").new(
{
host = "openrouter.ai",
port = 443,
capabilities = {
- ["openai-chat"] = { path = "/api/v1/chat/completions" },
+ ["openai-chat"] = {
+ path = "/api/v1/chat/completions",
+ rewrite_request_body = rewrite_chat_request_body,
+ },
},
}
)
diff --git a/apisix/plugins/ai-providers/vertex-ai.lua
b/apisix/plugins/ai-providers/vertex-ai.lua
index 58a181dc5..30a38f675 100644
--- a/apisix/plugins/ai-providers/vertex-ai.lua
+++ b/apisix/plugins/ai-providers/vertex-ai.lua
@@ -54,6 +54,14 @@ local function get_node(instance_conf)
end
+local function rewrite_chat_request_body(body, override, force)
+ if override.max_tokens then
+ if force or body.max_completion_tokens == nil then
+ body.max_completion_tokens = override.max_tokens
+ end
+ end
+end
+
return require("apisix.plugins.ai-providers.base").new({
get_node = get_node,
capabilities = {
@@ -66,6 +74,7 @@ return require("apisix.plugins.ai-providers.base").new({
return get_chat_completions_path(conf.project_id,
conf.region)
end
end,
+ rewrite_request_body = rewrite_chat_request_body,
},
["vertex-predict"] = {
host = function(conf)
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 5ce2658e4..7ba143b10 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -125,8 +125,11 @@ function _M.before_proxy(conf, ctx, on_error)
model_options = ai_instance.options,
conf = ai_instance.provider_conf or {},
auth = ai_instance.auth,
+ override_request_body =
+ core.table.try_read_attr(ai_instance, "override",
"request_body"),
+ request_body_force_override =
+ core.table.try_read_attr(ai_instance, "override",
"request_body_force_override"),
}
-
-- Step 1: Route client protocol to driver capability
local client_protocol = ctx.ai_client_protocol
local client_proto = protocols.get(client_protocol)
diff --git a/apisix/plugins/ai-proxy/schema.lua
b/apisix/plugins/ai-proxy/schema.lua
index 6c9dc7037..5dd1ded26 100644
--- a/apisix/plugins/ai-proxy/schema.lua
+++ b/apisix/plugins/ai-proxy/schema.lua
@@ -72,6 +72,37 @@ local model_options_schema = {
additionalProperties = true,
}
+local override_schema = {
+ type = "object",
+ properties = {
+ endpoint = {
+ type = "string",
+ description = "To be specified to override the endpoint of the AI
Instance",
+ },
+ request_body = {
+ type = "object",
+ properties = {
+ max_tokens = {
+ type = "integer",
+ minimum = 1,
+ description = "Maximum number of output tokens. APISIX
automatically "
+ .. "maps this to the correct field name for the target
provider "
+ .. "(e.g. max_completion_tokens for OpenAI,
max_output_tokens "
+ .. "for Responses API).",
+ },
+ },
+ additionalProperties = false,
+ },
+ request_body_force_override = {
+ type = "boolean",
+ default = false,
+ description = "When false (default), client request body fields
take "
+ .. "priority and override values only fill in missing fields. "
+ .. "When true, override values forcefully overwrite client
fields.",
+ },
+ },
+}
+
local provider_vertex_ai_schema = {
type = "object",
properties = {
@@ -115,15 +146,7 @@ local ai_instance_schema = {
},
auth = auth_schema,
options = model_options_schema,
- override = {
- type = "object",
- properties = {
- endpoint = {
- type = "string",
- description = "To be specified to override the
endpoint of the AI Instance",
- },
- },
- },
+ override = override_schema,
checks = {
type = "object",
properties = {
@@ -208,15 +231,7 @@ _M.ai_proxy_schema = {
},
keepalive_pool = {type = "integer", minimum = 1, default = 30},
ssl_verify = {type = "boolean", default = true },
- override = {
- type = "object",
- properties = {
- endpoint = {
- type = "string",
- description = "To be specified to override the endpoint of
the AI Instance",
- },
- },
- },
+ override = override_schema,
},
required = {"provider", "auth"},
encrypt_fields = {"auth.header", "auth.query",
"auth.gcp.service_account_json"},
diff --git a/docs/en/latest/plugins/ai-proxy-multi.md
b/docs/en/latest/plugins/ai-proxy-multi.md
index 86f3ba9fc..7905bb921 100644
--- a/docs/en/latest/plugins/ai-proxy-multi.md
+++ b/docs/en/latest/plugins/ai-proxy-multi.md
@@ -81,6 +81,9 @@ In addition, the Plugin also supports logging LLM request
information in the acc
| logging.payloads | boolean | False | false
| | If true, log request and response
payload. |
| instances.override | object | False |
| | Override setting. |
| instances.override.endpoint | string | False |
| | LLM provider endpoint to replace the
default endpoint with. If not configured, the Plugin uses the default OpenAI
endpoint `https://api.openai.com/v1/chat/completions`. |
+| instances.override.request_body | object | False |
| | Request body overrides. See
[Provider-aware `max_tokens`
mapping](./ai-proxy.md#provider-aware-max_tokens-mapping) in the `ai-proxy`
documentation for how the contained fields are forwarded to each provider. |
+| instances.override.request_body.max_tokens | integer | False |
| ≥ 1 | Maximum number of output tokens.
APISIX automatically maps this to the provider-specific field name (e.g.
`max_completion_tokens` for OpenAI Chat Completions, `max_output_tokens` for
OpenAI Responses API, `max_tokens` for most other providers). By default,
client request fields take priority and the override value only fills in when
the client did not set it; set `ins [...]
+| instances.override.request_body_force_override | boolean | False | false |
| When `false` (default), client request body fields
take priority and `instances.override.request_body` values only fill in missing
fields. When `true`, `instances.override.request_body` values forcefully
overwrite client request body fields. |
| instances.checks | object | False |
| | Health check configurations.
Note that at the moment, OpenAI, DeepSeek, and AIMLAPI do not provide an
official health check endpoint. Other LLM services that you can configure under
`openai-compatible` provider may have available health check endpoints. |
| instances.checks.active | object | True |
| | Active health check
configurations. |
| instances.checks.active.type | string | False |
http | [http, https, tcp] | Type of health check
connection. |
diff --git a/docs/en/latest/plugins/ai-proxy.md
b/docs/en/latest/plugins/ai-proxy.md
index b2aed0c0a..53564a064 100644
--- a/docs/en/latest/plugins/ai-proxy.md
+++ b/docs/en/latest/plugins/ai-proxy.md
@@ -66,6 +66,9 @@ In addition, the Plugin also supports logging LLM request
information in the acc
| options.model | string | False | |
| Name of the LLM model, such as `gpt-4` or `gpt-3.5`. Refer to
the LLM provider's API documentation for available models. |
| override | object | False | |
| Override setting. |
| override.endpoint | string | False | |
| Custom LLM provider endpoint, required when `provider` is
`openai-compatible`. |
+| override.request_body | object | False | |
| Request body overrides. See [Provider-aware `max_tokens`
mapping](#provider-aware-max_tokens-mapping) for how the contained fields are
forwarded to each provider. |
+| override.request_body.max_tokens | integer | False | | ≥ 1
| Maximum number of output tokens. APISIX automatically
maps this to the provider-specific field name (e.g. `max_completion_tokens` for
OpenAI Chat Completions, `max_output_tokens` for OpenAI Responses API,
`max_tokens` for most other providers). By default, client request fields take
priority and the override value only fills in when the client did not set it;
set `override.request_body_ [...]
+| override.request_body_force_override | boolean | False | false |
| When `false` (default), client request body fields
take priority and `override.request_body` values only fill in missing fields.
When `true`, `override.request_body` values forcefully overwrite client request
body fields. |
| logging | object | False | |
| Logging configurations. Does not affect `error.log`. |
| logging.summaries | boolean | False | false |
| If true, logs request LLM model, duration, request, and response
tokens. |
| logging.payloads | boolean | False | false |
| If true, logs request and response payload. |
@@ -77,6 +80,31 @@ In addition, the Plugin also supports logging LLM request
information in the acc
| keepalive_pool | integer | False | 30 |
| Keepalive pool size for the LLM service connection. |
| ssl_verify | boolean | False | true |
| If true, verifies the LLM service's certificate. |
+## Provider-aware `max_tokens` mapping
+
+LLM providers and API endpoints disagree on the field name used to cap the
number of output tokens. Configuring `override.request_body.max_tokens` lets
you set a single value in APISIX and have it forwarded under the field name
expected by each provider/endpoint.
+
+The table below shows, for each `provider` and target API endpoint, the
upstream field name APISIX rewrites `max_tokens` to. A `—` means the provider
does not expose that endpoint.
+
+| Provider | OpenAI Chat Completions | OpenAI Responses API
| Anthropic Messages |
+| ------------------- | ---------------------------- | ----------------------
| ------------------ |
+| `openai` | `max_completion_tokens` ¹ | `max_output_tokens`
| — |
+| `openai-compatible` | `max_tokens` | `max_output_tokens`
| — |
+| `azure-openai` | `max_tokens` | —
| — |
+| `deepseek` | `max_tokens` | —
| — |
+| `aimlapi` | `max_tokens` | —
| — |
+| `openrouter` | `max_tokens` | —
| — |
+| `gemini` | `max_completion_tokens` | —
| — |
+| `vertex-ai` | `max_completion_tokens` | —
| — |
+| `anthropic` | `max_tokens` | —
| `max_tokens` |
+
+¹ When `provider` is `openai` and the target is the Chat Completions endpoint,
APISIX always rewrites to `max_completion_tokens` and removes any `max_tokens`
field from the request body — `max_tokens` has been deprecated in favor of
`max_completion_tokens` by OpenAI.
+
+Priority between client request and override is controlled by
`override.request_body_force_override`:
+
+- `false` (default): if the client request body already sets the corresponding
field, it is preserved; the override value only fills in when the field is
missing.
+- `true`: the override value forcefully overwrites the field in the client
request body.
+
## Examples
The examples below demonstrate how you can configure `ai-proxy` for different
scenarios.
diff --git a/docs/zh/latest/plugins/ai-proxy-multi.md
b/docs/zh/latest/plugins/ai-proxy-multi.md
index d7f57d5df..137fe9ab2 100644
--- a/docs/zh/latest/plugins/ai-proxy-multi.md
+++ b/docs/zh/latest/plugins/ai-proxy-multi.md
@@ -78,6 +78,9 @@ import TabItem from '@theme/TabItem';
| instances.options.model | string | 否 |
| | LLM 模型的名称,如 `gpt-4` 或
`gpt-3.5`。有关更多可用模型,请参阅您的 LLM 提供商的 API 文档。 |
| instances.override | object | 否 |
| | 覆盖设置。 |
| instances.override.endpoint | string | 否 |
| | 用于替换默认端点的 LLM 提供商端点。如果未配置,插件使用默认的 OpenAI 端点
`https://api.openai.com/v1/chat/completions`。 |
+| instances.override.request_body | object | 否 |
| | 请求体覆盖配置。请参阅 `ai-proxy` 文档中的 [`max_tokens`
字段映射](./ai-proxy.md#provider-aware-max_tokens-mapping) 章节,了解其内部字段如何转发到各个上游服务商。 |
+| instances.override.request_body.max_tokens | integer | 否 |
| ≥ 1 | 最大输出 token 数。APISIX
会自动将该值映射为各上游服务商对应的字段名(例如 OpenAI Chat Completions 使用
`max_completion_tokens`、OpenAI Responses API 使用 `max_output_tokens`、其他大多数服务商使用
`max_tokens`)。默认情况下客户端请求中的对应字段优先,仅当客户端未设置时该 override 值才会生效;将
`instances.override.request_body_force_override` 设置为 `true` 可强制覆盖客户端值。 |
+| instances.override.request_body_force_override | boolean | 否 | false |
| 为
`false`(默认)时,客户端请求体中的字段优先,`instances.override.request_body` 仅补充缺失字段。为 `true`
时,`instances.override.request_body` 的值强制覆盖客户端请求体中的同名字段。 |
| logging | object | 否 |
| | 日志配置。不影响 `error.log`。 |
| logging.summaries | boolean | 否 | false
| | 如果为 true,记录请求 LLM 模型、持续时间、请求和响应令牌。 |
| logging.payloads | boolean | 否 | false
| | 如果为 true,记录请求和响应负载。 |
diff --git a/docs/zh/latest/plugins/ai-proxy.md
b/docs/zh/latest/plugins/ai-proxy.md
index 3d2354ada..3b7f7ed94 100644
--- a/docs/zh/latest/plugins/ai-proxy.md
+++ b/docs/zh/latest/plugins/ai-proxy.md
@@ -66,6 +66,9 @@ description: ai-proxy 插件通过将插件配置转换为所需的请求格式
| options.model | string | 否 | |
| LLM 模型的名称,如 `gpt-4` 或 `gpt-3.5`。请参阅 LLM 提供商的 API 文档以了解可用模型。 |
| override | object | 否 | |
| 覆盖设置。 |
| override.endpoint | string | 否 | |
| 自定义 LLM 提供商端点,当 `provider` 为 `openai-compatible` 时必需。 |
+| override.request_body | object | 否 | |
| 请求体覆盖配置。请参阅 [`max_tokens`
字段映射](#provider-aware-max_tokens-mapping) 章节,了解其内部字段如何转发到各个上游服务商。 |
+| override.request_body.max_tokens | integer | 否 | | ≥ 1
| 最大输出 token 数。APISIX 会自动将该值映射为各上游服务商对应的字段名(例如 OpenAI Chat
Completions 使用 `max_completion_tokens`、OpenAI Responses API 使用
`max_output_tokens`、其他大多数服务商使用 `max_tokens`)。默认情况下客户端请求中的对应字段优先,仅当客户端未设置时该
override 值才会生效;将 `override.request_body_force_override` 设置为 `true` 可强制覆盖客户端值。 |
+| override.request_body_force_override | boolean | 否 | false |
| 为 `false`(默认)时,客户端请求体中的字段优先,`override.request_body`
仅补充缺失字段。为 `true` 时,`override.request_body` 的值强制覆盖客户端请求体中的同名字段。 |
| logging | object | 否 | |
| 日志配置。不影响 `error.log`。 |
| logging.summaries | boolean | 否 | false |
| 如果为 true,记录请求 LLM 模型、持续时间、请求和响应令牌。 |
| logging.payloads | boolean | 否 | false |
| 如果为 true,记录请求和响应负载。 |
@@ -77,6 +80,31 @@ description: ai-proxy 插件通过将插件配置转换为所需的请求格式
| keepalive_pool | integer | 否 | 30 |
| LLM 服务连接的保活池大小。 |
| ssl_verify | boolean | 否 | true |
| 如果为 true,验证 LLM 服务的证书。 |
+## Provider-aware `max_tokens` mapping
+
+不同的 LLM 服务商和 API 端点使用不同的字段名来限制输出 token 数。通过配置
`override.request_body.max_tokens`,您只需在 APISIX 中设置一个统一的值,APISIX 会根据上游服务商和 API
端点将其转发为对应的字段名。
+
+下表展示了在不同 `provider` 和目标 API 端点下,APISIX 会将 `max_tokens` 映射为哪个上游字段名。`—`
表示该服务商不支持对应的端点。
+
+| Provider | OpenAI Chat Completions | OpenAI Responses API
| Anthropic Messages |
+| ------------------- | ---------------------------- | ----------------------
| ------------------ |
+| `openai` | `max_completion_tokens` ¹ | `max_output_tokens`
| — |
+| `openai-compatible` | `max_tokens` | `max_output_tokens`
| — |
+| `azure-openai` | `max_tokens` | —
| — |
+| `deepseek` | `max_tokens` | —
| — |
+| `aimlapi` | `max_tokens` | —
| — |
+| `openrouter` | `max_tokens` | —
| — |
+| `gemini` | `max_completion_tokens` | —
| — |
+| `vertex-ai` | `max_completion_tokens` | —
| — |
+| `anthropic` | `max_tokens` | —
| `max_tokens` |
+
+¹ 当 `provider` 为 `openai` 且目标为 Chat Completions 端点时,APISIX 始终改写为
`max_completion_tokens`,并删除请求体中已有的 `max_tokens` 字段——OpenAI 已弃用 `max_tokens`,改用
`max_completion_tokens`。
+
+客户端请求与 override 之间的优先级由 `override.request_body_force_override` 控制:
+
+- `false`(默认):如果客户端请求体已经设置了对应字段,则保留客户端值;override 值仅在该字段缺失时补充。
+- `true`:override 值强制覆盖客户端请求体中的同名字段。
+
## 示例
以下示例演示了如何为不同场景配置 `ai-proxy`。
diff --git a/t/plugin/ai-proxy-request-body-override.t
b/t/plugin/ai-proxy-request-body-override.t
new file mode 100644
index 000000000..779ead5c3
--- /dev/null
+++ b/t/plugin/ai-proxy-request-body-override.t
@@ -0,0 +1,585 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+BEGIN {
+ $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
+use t::APISIX 'no_plan';
+
+log_level("info");
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+add_block_preprocessor(sub {
+ my ($block) = @_;
+
+ if (!defined $block->request) {
+ $block->set_value("request", "GET /t");
+ }
+
+ my $http_config = $block->http_config // <<_EOC_;
+ # Upstream that echoes the request body it receives so the test can
+ # assert exactly what was forwarded by ai-proxy.
+ server {
+ server_name echo-openai;
+ listen 6732;
+ default_type 'application/json';
+
+ location /v1/chat/completions {
+ content_by_lua_block {
+ local json = require("cjson.safe")
+ ngx.req.read_body()
+ local raw = ngx.req.get_body_data() or ""
+ ngx.status = 200
+ ngx.say(json.encode({
+ id = "chatcmpl-1",
+ object = "chat.completion",
+ model = "echo",
+ choices = {{
+ index = 0,
+ message = { role = "assistant", content = raw },
+ finish_reason = "stop",
+ }},
+ usage = { prompt_tokens = 1, completion_tokens = 1,
total_tokens = 2 },
+ }))
+ }
+ }
+
+ location /v1/responses {
+ content_by_lua_block {
+ local json = require("cjson.safe")
+ ngx.req.read_body()
+ local raw = ngx.req.get_body_data() or ""
+ ngx.status = 200
+ ngx.say(json.encode({
+ id = "resp_1",
+ object = "response",
+ created_at = 1,
+ model = "echo",
+ output = {{
+ type = "message",
+ role = "assistant",
+ content = {{ type = "output_text", text = raw }},
+ }},
+ usage = { input_tokens = 1, output_tokens = 1,
total_tokens = 2 },
+ }))
+ }
+ }
+
+ location /v1/messages {
+ content_by_lua_block {
+ local json = require("cjson.safe")
+ ngx.req.read_body()
+ local raw = ngx.req.get_body_data() or ""
+ ngx.status = 200
+ ngx.say(json.encode({
+ id = "msg_1",
+ type = "message",
+ role = "assistant",
+ model = "echo",
+ content = {{ type = "text", text = raw }},
+ stop_reason = "end_turn",
+ usage = { input_tokens = 1, output_tokens = 1 },
+ }))
+ }
+ }
+ }
+_EOC_
+
+ $block->set_value("http_config", $http_config);
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: schema rejects unknown fields in request_body
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/anything",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body": {
+ "temperature": 0.5
+ }
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ ngx.status = code
+ ngx.print(body)
+ }
+ }
+--- error_code: 400
+--- response_body_like: .*additional properties forbidden.*
+
+
+
+=== TEST 2: openai provider maps max_tokens to max_completion_tokens
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/chat",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body": {
+ "max_tokens": 555
+ }
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local http = require("resty.http").new()
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/chat", {
+ method = "POST",
+ body = '{"messages":[{"role":"user","content":"hi"}]}',
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ local cjson = require("cjson.safe")
+ local body = cjson.decode(res.body)
+ local echoed = cjson.decode(body.choices[1].message.content)
+ ngx.say("max_completion_tokens=", echoed.max_completion_tokens)
+ }
+ }
+--- response_body
+max_completion_tokens=555
+
+
+
+=== TEST 3: openai-compatible provider maps max_tokens to max_tokens
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/chat",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai-compatible",
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body": {
+ "max_tokens": 444
+ }
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local http = require("resty.http").new()
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/chat", {
+ method = "POST",
+ body = '{"messages":[{"role":"user","content":"hi"}]}',
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ local cjson = require("cjson.safe")
+ local body = cjson.decode(res.body)
+ local echoed = cjson.decode(body.choices[1].message.content)
+ ngx.say("max_tokens=", echoed.max_tokens)
+ }
+ }
+--- response_body
+max_tokens=444
+
+
+
+=== TEST 4: openai responses API maps max_tokens to max_output_tokens
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/v1/responses",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body": {
+ "max_tokens": 333
+ }
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local http = require("resty.http").new()
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/v1/responses", {
+ method = "POST",
+ body = '{"model":"gpt-4o","input":"hello"}',
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ local cjson = require("cjson.safe")
+ local body = cjson.decode(res.body)
+ local echoed = cjson.decode(body.output[1].content[1].text)
+ ngx.say("max_output_tokens=", echoed.max_output_tokens)
+ }
+ }
+--- response_body
+max_output_tokens=333
+
+
+
+=== TEST 5: ai-proxy-multi per-instance override
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/chat",
+ "plugins": {
+ "ai-proxy-multi": {
+ "instances": [{
+ "name": "test",
+ "provider": "openai",
+ "weight": 1,
+ "auth": { "header": { "Authorization": "Bearer
t" } },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body": {
+ "max_tokens": 222
+ }
+ }
+ }],
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local http = require("resty.http").new()
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/chat", {
+ method = "POST",
+ body = '{"messages":[{"role":"user","content":"hi"}]}',
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ local cjson = require("cjson.safe")
+ local body = cjson.decode(res.body)
+ local echoed = cjson.decode(body.choices[1].message.content)
+ ngx.say("max_completion_tokens=", echoed.max_completion_tokens)
+ }
+ }
+--- response_body
+max_completion_tokens=222
+
+
+
+=== TEST 6: cross-protocol: anthropic client to openai provider, override
applies to target protocol
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/v1/messages",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body_force_override": true,
+ "request_body": {
+ "max_tokens": 77
+ }
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local http = require("resty.http").new()
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/v1/messages", {
+ method = "POST",
+ body =
'{"model":"claude-3","max_tokens":10,"messages":[{"role":"user","content":"hi"}]}',
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ ngx.status = res.status
+ local cjson = require("cjson.safe")
+ local body = cjson.decode(res.body)
+ local echoed = cjson.decode(body.content[1].text)
+ -- openai provider maps to max_completion_tokens
+ ngx.say("max_completion_tokens=", echoed.max_completion_tokens,
+ " has_messages=", tostring(type(echoed.messages) ==
"table"))
+ }
+ }
+--- response_body
+max_completion_tokens=77 has_messages=true
+
+
+
+=== TEST 7: default mode - client value takes priority
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/chat",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai-compatible",
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body": {
+ "max_tokens": 555
+ }
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local http = require("resty.http").new()
+ -- Client sends max_tokens=999 which should NOT be overwritten
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/chat", {
+ method = "POST",
+ body =
'{"messages":[{"role":"user","content":"hi"}],"max_tokens":999}',
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ local cjson = require("cjson.safe")
+ local body = cjson.decode(res.body)
+ local echoed = cjson.decode(body.choices[1].message.content)
+ -- max_tokens from client (999) wins
+ ngx.say("max_tokens=", echoed.max_tokens)
+ }
+ }
+--- response_body
+max_tokens=999
+
+
+
+=== TEST 8: force_override mode - override forcefully overwrites client params
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/chat",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai-compatible",
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body_force_override": true,
+ "request_body": {
+ "max_tokens": 555
+ }
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local http = require("resty.http").new()
+ -- Client sends max_tokens=999 which SHOULD be overwritten
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/chat", {
+ method = "POST",
+ body =
'{"messages":[{"role":"user","content":"hi"}],"max_tokens":999}',
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ local cjson = require("cjson.safe")
+ local body = cjson.decode(res.body)
+ local echoed = cjson.decode(body.choices[1].message.content)
+ -- max_tokens from override (555) wins over client (999)
+ ngx.say("max_tokens=", echoed.max_tokens)
+ }
+ }
+--- response_body
+max_tokens=555
+
+
+
+=== TEST 9: default mode fills missing field
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/chat",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai-compatible",
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body": {
+ "max_tokens": 555
+ }
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local http = require("resty.http").new()
+ -- Client does NOT send max_tokens; override should fill it in
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/chat", {
+ method = "POST",
+ body = '{"messages":[{"role":"user","content":"hi"}]}',
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ local cjson = require("cjson.safe")
+ local body = cjson.decode(res.body)
+ local echoed = cjson.decode(body.choices[1].message.content)
+ ngx.say("max_tokens=", echoed.max_tokens)
+ }
+ }
+--- response_body
+max_tokens=555
+
+
+
+=== TEST 10: openai chat - deprecated max_tokens in body is respected in
default mode and cleared in force mode
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+
+ -- Route with default mode (no force)
+ local code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/chat",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "model": { "name": "gpt-4" },
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body": {
+ "max_tokens": 999
+ }
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ local http = require("resty.http").new()
+ local cjson = require("cjson.safe")
+
+ -- Client sends deprecated max_tokens=200; default mode should NOT
override
+ local res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/chat", {
+ method = "POST",
+ body =
'{"messages":[{"role":"user","content":"hi"}],"max_tokens":200}',
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ local body = cjson.decode(res.body)
+ local echoed = cjson.decode(body.choices[1].message.content)
+ ngx.say("default: max_tokens=", echoed.max_tokens,
+ " max_completion_tokens=", echoed.max_completion_tokens)
+
+ -- Switch to force mode
+ code = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/chat",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "model": { "name": "gpt-4" },
+ "auth": { "header": { "Authorization": "Bearer t"
} },
+ "override": {
+ "endpoint": "http://localhost:6732",
+ "request_body": {
+ "max_tokens": 999
+ },
+ "request_body_force_override": true
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+ if code >= 300 then ngx.status = code; return end
+
+ ngx.sleep(0.5)
+
+ -- Client sends deprecated max_tokens=200; force mode should clear
it and set max_completion_tokens
+ res = assert(http:request_uri("http://127.0.0.1:" ..
ngx.var.server_port .. "/chat", {
+ method = "POST",
+ body =
'{"messages":[{"role":"user","content":"hi"}],"max_tokens":200}',
+ headers = { ["Content-Type"] = "application/json" },
+ }))
+ body = cjson.decode(res.body)
+ echoed = cjson.decode(body.choices[1].message.content)
+ ngx.say("force: max_tokens=", echoed.max_tokens,
+ " max_completion_tokens=", echoed.max_completion_tokens)
+ }
+ }
+--- response_body
+default: max_tokens=200 max_completion_tokens=nil
+force: max_tokens=nil max_completion_tokens=999