This is an automated email from the ASF dual-hosted git repository.
nic443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new 35122557c feat(ai): add OpenAI Responses API (/v1/responses) support
(#13186)
35122557c is described below
commit 35122557cfaa81bf6f9d77c56a6882cc7c5b137c
Author: Nic <[email protected]>
AuthorDate: Fri Apr 10 10:48:39 2026 +0800
feat(ai): add OpenAI Responses API (/v1/responses) support (#13186)
---
apisix/plugins/ai-prompt-guard.lua | 7 +-
apisix/plugins/ai-protocols/init.lua | 7 +-
apisix/plugins/ai-protocols/openai-responses.lua | 315 ++++++++++++++++++++
apisix/plugins/ai-providers/openai-compatible.lua | 4 +-
apisix/plugins/ai-providers/openai.lua | 1 +
t/lib/server.lua | 2 +
t/plugin/ai-aliyun-content-moderation.t | 186 ++++++++++++
t/plugin/ai-prompt-decorator.t | 283 +++++++++++++++++-
t/plugin/ai-prompt-guard.t | 219 +++++++++++++-
t/plugin/ai-proxy.t | 338 +++++++++++++++++++++-
t/plugin/ai-rag.t | 61 ++++
11 files changed, 1411 insertions(+), 12 deletions(-)
diff --git a/apisix/plugins/ai-prompt-guard.lua
b/apisix/plugins/ai-prompt-guard.lua
index 6560e60dc..fbeac979b 100644
--- a/apisix/plugins/ai-prompt-guard.lua
+++ b/apisix/plugins/ai-prompt-guard.lua
@@ -107,9 +107,14 @@ function _M.access(conf, ctx)
return 400, {message = err}
end
+ local proto_name = protocols.detect(json_body, ctx)
local messages = protocols.get_messages(json_body, ctx)
- messages = get_content_to_check(conf, messages)
+ -- Responses API: instructions + input are parallel fields, not
conversation history,
+ -- so skip the "last message only" filtering of get_content_to_check.
+ if proto_name ~= "openai-responses" then
+ messages = get_content_to_check(conf, messages)
+ end
if not conf.match_all_roles then
-- filter to only user messages
local new_messages = {}
diff --git a/apisix/plugins/ai-protocols/init.lua
b/apisix/plugins/ai-protocols/init.lua
index 583c7e4aa..c5db8742c 100644
--- a/apisix/plugins/ai-protocols/init.lua
+++ b/apisix/plugins/ai-protocols/init.lua
@@ -28,13 +28,15 @@ local _M = {}
local registered = {
["openai-chat"] = require("apisix.plugins.ai-protocols.openai-chat"),
+ ["openai-responses"] =
require("apisix.plugins.ai-protocols.openai-responses"),
["openai-embeddings"] =
require("apisix.plugins.ai-protocols.openai-embeddings"),
["anthropic-messages"] =
require("apisix.plugins.ai-protocols.anthropic-messages"),
}
--- Detection order: URL+body first (anthropic), then body-only (chat,
embeddings).
+-- Detection order: URL+body first (anthropic, responses), then body-only
(chat, embeddings).
local detection_order = {
{ name = "anthropic-messages", protocol = registered["anthropic-messages"]
},
+ { name = "openai-responses", protocol = registered["openai-responses"] },
{ name = "openai-chat", protocol = registered["openai-chat"] },
{ name = "openai-embeddings", protocol = registered["openai-embeddings"] },
}
@@ -43,7 +45,8 @@ local detection_order = {
--- Detect the client protocol by asking each protocol if it matches.
-- @param body table The parsed request body
-- @param ctx table The request context
--- @return string Protocol name: "openai-chat" | "openai-embeddings" |
"anthropic-messages"
+-- @return string Protocol name: "openai-chat" | "openai-responses"
+-- | "openai-embeddings" | "anthropic-messages"
function _M.detect(body, ctx)
for _, entry in ipairs(detection_order) do
if entry.protocol.matches(body, ctx) then
diff --git a/apisix/plugins/ai-protocols/openai-responses.lua
b/apisix/plugins/ai-protocols/openai-responses.lua
new file mode 100644
index 000000000..2c2165d00
--- /dev/null
+++ b/apisix/plugins/ai-protocols/openai-responses.lua
@@ -0,0 +1,315 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+--- OpenAI Responses API protocol adapter.
+-- Handles the Responses API format with its different SSE event model
+-- and response structure (output[] instead of choices[]).
+
+local core = require("apisix.core")
+local uuid = require("resty.jit-uuid")
+local sse = require("apisix.plugins.ai-transport.sse")
+local type = type
+local ipairs = ipairs
+local table = table
+local string_sub = string.sub
+
+local _M = {}
+
+
+--- Detect whether the request matches OpenAI Responses API format.
+-- Requires URI suffix (/v1/responses) and body (has input field).
+function _M.matches(body, ctx)
+ local uri = ctx.var and ctx.var.uri
+ return uri and string_sub(uri, -13) == "/v1/responses"
+ and type(body) == "table" and body.input ~= nil
+end
+
+
+--- Check whether the request is a streaming request.
+function _M.is_streaming(body)
+ return body.stream == true
+end
+
+
+
+function _M.parse_sse_event(event, ctx, state)
+ if event.type == "response.output_text.delta" then
+ local data, err = core.json.decode(event.data)
+ if not data then
+ core.log.warn("failed to decode SSE data: ", err)
+ return { type = "skip" }
+ end
+ if type(data.delta) == "string" then
+ return {
+ type = "delta",
+ texts = { data.delta },
+ }
+ end
+ return { type = "skip" }
+
+ elseif event.type == "response.completed" then
+ local result = { type = "done" }
+ local data, err = core.json.decode(event.data)
+ if not data then
+ core.log.warn("failed to decode response.completed SSE data: ",
err)
+ return result
+ end
+ if type(data.response) == "table"
+ and type(data.response.usage) == "table" then
+ local usage = data.response.usage
+ result.type = "usage_and_done"
+ result.usage = {
+ prompt_tokens = usage.input_tokens or 0,
+ completion_tokens = usage.output_tokens or 0,
+ total_tokens = usage.total_tokens or 0,
+ }
+ result.raw_usage = usage
+ end
+ return result
+
+ elseif event.type == "response.failed"
+ or event.type == "response.incomplete"
+ or event.type == "error" then
+ return { type = "done" }
+ end
+
+ -- All other Responses API events are silently passed through
+ return { type = "skip" }
+end
+
+
+function _M.extract_response_text(res_body)
+ if type(res_body.output) ~= "table" then
+ return nil
+ end
+ local texts = {}
+ for _, item in ipairs(res_body.output) do
+ if type(item) == "table" and item.type == "message"
+ and type(item.content) == "table" then
+ for _, part in ipairs(item.content) do
+ if part.type == "output_text" and type(part.text) == "string"
then
+ core.table.insert(texts, part.text)
+ end
+ end
+ end
+ end
+ if #texts > 0 then
+ return table.concat(texts, " ")
+ end
+ return nil
+end
+
+
+--- Build a non-streaming request from system prompt and user content.
+function _M.build_simple_request(system_prompt, user_content, opts)
+ local body = {
+ input = user_content,
+ stream = false,
+ }
+ if system_prompt then
+ body.instructions = system_prompt
+ end
+ if opts and opts.model then
+ body.model = opts.model
+ end
+ return body
+end
+
+
+function _M.extract_usage(res_body)
+ if type(res_body) ~= "table" or type(res_body.usage) ~= "table" then
+ return nil, nil
+ end
+ local raw = res_body.usage
+ -- Responses API uses input_tokens / output_tokens
+ local prompt = raw.input_tokens or 0
+ local completion = raw.output_tokens or 0
+ return {
+ prompt_tokens = prompt,
+ completion_tokens = completion,
+ total_tokens = raw.total_tokens or (prompt + completion),
+ }, raw
+end
+
+
+--- Extract all text content from a request body for moderation.
+function _M.extract_request_content(body)
+ local contents = {}
+ local input = body.input
+ if type(input) == "string" then
+ core.table.insert(contents, input)
+ elseif type(input) == "table" then
+ for _, item in ipairs(input) do
+ if type(item) == "string" then
+ core.table.insert(contents, item)
+ elseif type(item) == "table" and item.content then
+ if type(item.content) == "string" then
+ core.table.insert(contents, item.content)
+ elseif type(item.content) == "table" then
+ for _, part in ipairs(item.content) do
+ if type(part) == "table" and part.text then
+ core.table.insert(contents, part.text)
+ end
+ end
+ end
+ end
+ end
+ end
+ if body.instructions then
+ core.table.insert(contents, body.instructions)
+ end
+ return contents
+end
+
+
+--- Get messages in canonical {role, content} format.
+-- Converts instructions + input into messages-style list.
+function _M.get_messages(body)
+ local messages = {}
+ if type(body.instructions) == "string" then
+ core.table.insert(messages, {role = "system", content =
body.instructions})
+ end
+ local input = body.input
+ if type(input) == "string" then
+ core.table.insert(messages, {role = "user", content = input})
+ elseif type(input) == "table" then
+ for _, item in ipairs(input) do
+ if type(item) == "string" then
+ core.table.insert(messages, {role = "user", content = item})
+ elseif type(item) == "table" then
+ local role = item.role or "user"
+ local content = item.content or item.text
+ if type(content) == "string" then
+ core.table.insert(messages, {role = role, content =
content})
+ end
+ end
+ end
+ end
+ return messages
+end
+
+
+--- Prepend messages to the request body.
+-- System messages go to instructions; user messages prepend to input.
+function _M.prepend_messages(body, msgs)
+ if not msgs or #msgs == 0 then return end
+ local parts = {}
+ for _, msg in ipairs(msgs) do
+ core.table.insert(parts, msg.content)
+ end
+ local prepend_text = table.concat(parts, "\n")
+ if type(body.instructions) == "string" then
+ body.instructions = prepend_text .. "\n" .. body.instructions
+ else
+ body.instructions = prepend_text
+ end
+end
+
+
+--- Append messages to the request body.
+function _M.append_messages(body, msgs)
+ if not msgs or #msgs == 0 then return end
+ local parts = {}
+ for _, msg in ipairs(msgs) do
+ core.table.insert(parts, msg.content)
+ end
+ local append_text = table.concat(parts, "\n")
+ if type(body.input) == "string" then
+ body.input = body.input .. "\n" .. append_text
+ elseif type(body.input) == "table" then
+ core.table.insert(body.input, {
+ type = "message",
+ role = "user",
+ content = append_text,
+ })
+ else
+ body.input = append_text
+ end
+end
+
+
+--- Get raw request content for logging.
+function _M.get_request_content(body)
+ return body.input
+end
+-- opts: {text, model, usage, stream}
+function _M.build_deny_response(opts)
+ local response_obj = {
+ id = uuid.generate_v4(),
+ object = "response",
+ status = "completed",
+ model = opts.model,
+ output = {{
+ type = "message",
+ role = "assistant",
+ content = {{
+ type = "output_text",
+ text = opts.text,
+ }},
+ }},
+ usage = opts.usage,
+ }
+ if opts.stream then
+ local delta_event = {
+ type = "response.output_text.delta",
+ delta = opts.text,
+ }
+ local completed_event = {
+ type = "response.completed",
+ response = response_obj,
+ }
+ return "event: response.output_text.delta\n"
+ .. "data: " .. core.json.encode(delta_event) .. "\n\n"
+ .. "event: response.completed\n"
+ .. "data: " .. core.json.encode(completed_event) .. "\n\n"
+ else
+ return core.json.encode(response_obj)
+ end
+end
+
+
+--- Build an empty usage object.
+function _M.empty_usage()
+ return { input_tokens = 0, output_tokens = 0, total_tokens = 0 }
+end
+
+
+--- Check if an SSE event is a data event.
+function _M.is_data_event(event)
+ return event.type == "response.completed"
+end
+
+
+--- Check if an SSE event is the terminal/done event.
+function _M.is_done_event(event)
+ return event.type == "response.completed"
+end
+
+
+--- Build a terminal SSE event string.
+function _M.build_done_event()
+ return sse.encode({
+ type = "response.completed",
+ data = core.json.encode({
+ type = "response.completed",
+ response = { status = "completed", output = {} }
+ })
+ })
+end
+
+
+return _M
diff --git a/apisix/plugins/ai-providers/openai-compatible.lua
b/apisix/plugins/ai-providers/openai-compatible.lua
index cfc29a3ea..3f8a927bd 100644
--- a/apisix/plugins/ai-providers/openai-compatible.lua
+++ b/apisix/plugins/ai-providers/openai-compatible.lua
@@ -17,6 +17,8 @@
return require("apisix.plugins.ai-providers.base").new({
capabilities = {
- ["openai-chat"] = {},
+ ["openai-chat"] = { path = "/v1/chat/completions" },
+ ["openai-responses"] = { path = "/v1/responses" },
+ ["openai-embeddings"] = { path = "/v1/embeddings" },
},
})
diff --git a/apisix/plugins/ai-providers/openai.lua
b/apisix/plugins/ai-providers/openai.lua
index 5a2644313..a2c1af67f 100644
--- a/apisix/plugins/ai-providers/openai.lua
+++ b/apisix/plugins/ai-providers/openai.lua
@@ -21,6 +21,7 @@ return require("apisix.plugins.ai-providers.base").new(
port = 443,
capabilities = {
["openai-chat"] = { path = "/v1/chat/completions" },
+ ["openai-responses"] = { path = "/v1/responses" },
["openai-embeddings"] = { path = "/v1/embeddings" },
},
}
diff --git a/t/lib/server.lua b/t/lib/server.lua
index 309873636..4019b6ec5 100644
--- a/t/lib/server.lua
+++ b/t/lib/server.lua
@@ -432,6 +432,8 @@ function _M.echo()
ngx.print(ngx.req.get_body_data() or "")
end
+_M.v1_responses = _M.echo
+
function _M.log()
ngx.req.read_body()
diff --git a/t/plugin/ai-aliyun-content-moderation.t
b/t/plugin/ai-aliyun-content-moderation.t
index 44891d23d..90bfb05bf 100644
--- a/t/plugin/ai-aliyun-content-moderation.t
+++ b/t/plugin/ai-aliyun-content-moderation.t
@@ -15,6 +15,10 @@
# limitations under the License.
#
+BEGIN {
+ $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
use t::APISIX 'no_plan';
log_level("debug");
@@ -1473,3 +1477,185 @@ passed
}
--- response_body
passed
+
+
+
+=== TEST 37: set route for Responses API content moderation
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/chat", "/v1/responses"],
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": {
+ "header": {
+ "Authorization": "Bearer wrongtoken"
+ }
+ },
+ "override": {
+ "endpoint": "http://localhost:6724"
+ }
+ },
+ "ai-aliyun-content-moderation": {
+ "endpoint": "http://localhost:6724",
+ "region_id": "cn-shanghai",
+ "access_key_id": "fake-key-id",
+ "access_key_secret": "fake-key-secret",
+ "risk_level_bar": "high",
+ "check_request": true
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 38: Responses API violent input should be blocked by content
moderation
+--- request
+POST /v1/responses
+{ "input": "I want to kill you", "model": "gpt-4o" }
+--- error_code: 200
+--- response_body_like eval
+qr/As an AI language model, I cannot write unethical or controversial content
for you./
+
+
+
+=== TEST 39: Responses API deny response should use Responses API format
(non-streaming)
+--- request
+POST /v1/responses
+{ "input": "I want to kill you", "model": "gpt-4o" }
+--- error_code: 200
+--- response_body_like eval
+qr/(?=.*"object"\s*:\s*"response")(?=.*"output_text")(?=.*"input_tokens")/s
+
+
+
+=== TEST 40: set route for Responses API streaming content moderation
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/chat", "/v1/responses"],
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": {
+ "header": {
+ "Authorization": "Bearer wrongtoken"
+ }
+ },
+ "override": {
+ "endpoint": "http://localhost:6724"
+ }
+ },
+ "ai-aliyun-content-moderation": {
+ "endpoint": "http://localhost:6724",
+ "region_id": "cn-shanghai",
+ "access_key_id": "fake-key-id",
+ "access_key_secret": "fake-key-secret",
+ "risk_level_bar": "high",
+ "check_request": true
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 41: Responses API streaming deny response should use SSE Responses
API format
+--- request
+POST /v1/responses
+{ "input": "I want to kill you", "model": "gpt-4o", "stream": true }
+--- error_code: 200
+--- response_body_like eval
+qr/event: response\.output_text\.delta\ndata:.*"delta".*\n\nevent:
response\.completed\ndata:.*"object"\s*:\s*"response"/s
+
+
+
+=== TEST 42: Responses API deny response should contain input_tokens (not
prompt_tokens) in usage
+--- request
+POST /v1/responses
+{ "input": "I want to kill you", "model": "gpt-4o" }
+--- error_code: 200
+--- response_body_like eval
+qr/(?=.*"input_tokens"\s*:\s*0)(?=.*"output_tokens"\s*:\s*0)/s
+--- response_body_unlike eval
+qr/"prompt_tokens"/
+
+
+
+=== TEST 43: set route with deepseek provider for Responses API nil-schema fix
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/chat", "/v1/responses"],
+ "plugins": {
+ "ai-proxy": {
+ "provider": "deepseek",
+ "auth": {
+ "header": {
+ "Authorization": "Bearer wrongtoken"
+ }
+ },
+ "override": {
+ "endpoint": "http://localhost:6724"
+ }
+ },
+ "ai-aliyun-content-moderation": {
+ "endpoint": "http://localhost:6724",
+ "region_id": "cn-shanghai",
+ "access_key_id": "fake-key-id",
+ "access_key_secret": "fake-key-secret",
+ "risk_level_bar": "high",
+ "check_request": true
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 44: Responses API request with non-openai provider (deepseek) should
not panic from nil schema check
+--- request
+POST /v1/responses
+{ "input": "safe prompt", "model": "deepseek-chat" }
+--- error_code: 400
diff --git a/t/plugin/ai-prompt-decorator.t b/t/plugin/ai-prompt-decorator.t
index 7bc3b90b7..0f077fafa 100644
--- a/t/plugin/ai-prompt-decorator.t
+++ b/t/plugin/ai-prompt-decorator.t
@@ -15,6 +15,10 @@
# limitations under the License.
#
+BEGIN {
+ $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
use t::APISIX 'no_plan';
repeat_each(1);
@@ -383,7 +387,284 @@ qr/.*failed to check the configuration of plugin
ai-prompt-decorator err.*/
-=== TEST 9: Chat Completions still works after Responses API support
(regression)
+=== TEST 9: Responses API - configure prepend for Responses API test
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/echo", "/v1/responses"],
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ },
+ "plugins": {
+ "ai-prompt-decorator": {
+ "prepend":[
+ {
+ "role": "system",
+ "content": "Be helpful"
+ }
+ ]
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+}
+--- response_body
+passed
+
+
+
+=== TEST 10: Responses API - prepend sets instructions field
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body, actual_resp = t('/v1/responses',
+ ngx.HTTP_POST,
+ [[{
+ "model": "gpt-4o",
+ "input": "What is 1+1?"
+ }]],
+ [[{
+ "model": "gpt-4o",
+ "input": "What is 1+1?",
+ "instructions": "Be helpful"
+ }]]
+ )
+ if code >= 300 then
+ ngx.status = code
+ ngx.say("failed")
+ return
+ end
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 11: Responses API - prepend prepends to existing instructions
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body, actual_resp = t('/v1/responses',
+ ngx.HTTP_POST,
+ [[{
+ "model": "gpt-4o",
+ "input": "What is 1+1?",
+ "instructions": "You are a math tutor"
+ }]],
+ [[{
+ "model": "gpt-4o",
+ "input": "What is 1+1?",
+ "instructions": "Be helpful\nYou are a math tutor"
+ }]]
+ )
+ if code >= 300 then
+ ngx.status = code
+ ngx.say("failed")
+ return
+ end
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 12: Responses API - configure append for Responses API test
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/echo", "/v1/responses"],
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ },
+ "plugins": {
+ "ai-prompt-decorator": {
+ "append":[
+ {
+ "role": "user",
+ "content": "Please be concise"
+ }
+ ]
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+}
+--- response_body
+passed
+
+
+
+=== TEST 13: Responses API - append to string input
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body, actual_resp = t('/v1/responses',
+ ngx.HTTP_POST,
+ [[{
+ "model": "gpt-4o",
+ "input": "What is 1+1?"
+ }]],
+ [[{
+ "model": "gpt-4o",
+ "input": "What is 1+1?\nPlease be concise"
+ }]]
+ )
+ if code >= 300 then
+ ngx.status = code
+ ngx.say("failed")
+ return
+ end
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 14: Responses API - append to array input
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body, actual_resp = t('/v1/responses',
+ ngx.HTTP_POST,
+ [[{
+ "model": "gpt-4o",
+ "input": [
+ { "type": "message", "role": "user", "content":
"What is 1+1?" }
+ ]
+ }]],
+ [[{
+ "model": "gpt-4o",
+ "input": [
+ { "type": "message", "role": "user", "content":
"What is 1+1?" },
+ { "type": "message", "role": "user", "content":
"Please be concise" }
+ ]
+ }]]
+ )
+ if code >= 300 then
+ ngx.status = code
+ ngx.say("failed")
+ return
+ end
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 15: Responses API - configure both prepend and append
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/echo", "/v1/responses"],
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ },
+ "plugins": {
+ "ai-prompt-decorator": {
+ "prepend":[
+ {
+ "role": "system",
+ "content": "Be helpful"
+ }
+ ],
+ "append":[
+ {
+ "role": "user",
+ "content": "Please be concise"
+ }
+ ]
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+}
+--- response_body
+passed
+
+
+
+=== TEST 16: Responses API - prepend and append together
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body, actual_resp = t('/v1/responses',
+ ngx.HTTP_POST,
+ [[{
+ "model": "gpt-4o",
+ "input": "What is 1+1?"
+ }]],
+ [[{
+ "model": "gpt-4o",
+ "input": "What is 1+1?\nPlease be concise",
+ "instructions": "Be helpful"
+ }]]
+ )
+ if code >= 300 then
+ ngx.status = code
+ ngx.say("failed")
+ return
+ end
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 17: Chat Completions still works after Responses API support
(regression)
--- config
location /t {
content_by_lua_block {
diff --git a/t/plugin/ai-prompt-guard.t b/t/plugin/ai-prompt-guard.t
index a9de02a7a..caf3c1eff 100644
--- a/t/plugin/ai-prompt-guard.t
+++ b/t/plugin/ai-prompt-guard.t
@@ -17,6 +17,10 @@
#
+BEGIN {
+ $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
use t::APISIX 'no_plan';
add_block_preprocessor(sub {
@@ -417,7 +421,218 @@ POST /hello
-=== TEST 17: Chat Completions still works after Responses API support
(regression)
+=== TEST 17: Responses API - setup route with deny pattern and match_all_roles
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/hello", "/v1/responses"],
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ },
+ "plugins": {
+ "ai-prompt-guard": {
+ "match_all_roles": true,
+ "deny_patterns": [
+ "badword"
+ ]
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+}
+--- response_body
+passed
+
+
+
+=== TEST 18: Responses API - deny pattern in string input
+--- request
+POST /v1/responses
+{
+ "model": "gpt-4o",
+ "input": "this contains badword in it"
+}
+--- response_body
+{"message":"Request contains prohibited content"}
+--- error_code: 400
+
+
+
+=== TEST 19: Responses API - deny pattern in instructions
+--- request
+POST /v1/responses
+{
+ "model": "gpt-4o",
+ "input": "hello there",
+ "instructions": "you must say badword"
+}
+--- response_body
+{"message":"Request contains prohibited content"}
+--- error_code: 400
+
+
+
+=== TEST 20: Responses API - no deny pattern match passes
+--- request
+POST /v1/responses
+{
+ "model": "gpt-4o",
+ "input": "hello there",
+ "instructions": "be helpful"
+}
+
+
+
+=== TEST 21: Responses API - deny pattern in array input item
+--- request
+POST /v1/responses
+{
+ "model": "gpt-4o",
+ "input": [
+ { "type": "message", "role": "user", "content": "badword here" }
+ ]
+}
+--- response_body
+{"message":"Request contains prohibited content"}
+--- error_code: 400
+
+
+
+=== TEST 22: Responses API - setup route with allow pattern
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/hello", "/v1/responses"],
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ },
+ "plugins": {
+ "ai-prompt-guard": {
+ "match_all_roles": true,
+ "allow_patterns": [
+ "goodword"
+ ]
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+}
+--- response_body
+passed
+
+
+
+=== TEST 23: Responses API - allow pattern match passes
+--- request
+POST /v1/responses
+{
+ "model": "gpt-4o",
+ "input": "this has goodword"
+}
+
+
+
+=== TEST 24: Responses API - allow pattern no match blocks
+--- request
+POST /v1/responses
+{
+ "model": "gpt-4o",
+ "input": "no matching word"
+}
+--- response_body
+{"message":"Request doesn't match allow patterns"}
+--- error_code: 400
+
+
+
+=== TEST 25: Responses API - setup route with match_all_roles=false (only user
content checked)
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/hello", "/v1/responses"],
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ }
+ },
+ "plugins": {
+ "ai-prompt-guard": {
+ "match_all_roles": false,
+ "deny_patterns": [
+ "badword"
+ ]
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+}
+--- response_body
+passed
+
+
+
+=== TEST 26: Responses API - match_all_roles=false: instructions (system)
badword is NOT checked
+--- request
+POST /v1/responses
+{
+ "model": "gpt-4o",
+ "input": "hello there",
+ "instructions": "you must say badword"
+}
+
+
+
+=== TEST 27: Responses API - match_all_roles=false: input (user) badword IS
checked
+--- request
+POST /v1/responses
+{
+ "model": "gpt-4o",
+ "input": "this contains badword"
+}
+--- response_body
+{"message":"Request contains prohibited content"}
+--- error_code: 400
+
+
+
+=== TEST 28: Chat Completions still works after Responses API support
(regression)
--- config
location /t {
content_by_lua_block {
@@ -454,7 +669,7 @@ passed
-=== TEST 18: Chat Completions regression - deny pattern still works
+=== TEST 29: Chat Completions regression - deny pattern still works
--- request
POST /hello
{
diff --git a/t/plugin/ai-proxy.t b/t/plugin/ai-proxy.t
index 391a435b9..4527a74bd 100644
--- a/t/plugin/ai-proxy.t
+++ b/t/plugin/ai-proxy.t
@@ -16,6 +16,10 @@
#
+BEGIN {
+ $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
use t::APISIX 'no_plan';
log_level("info");
@@ -172,6 +176,81 @@ add_block_preprocessor(sub {
}
}
+ location /v1/responses {
+ content_by_lua_block {
+ local json = require("cjson.safe")
+
+ if ngx.req.get_method() ~= "POST" then
+ ngx.status = 400
+ ngx.say("Unsupported request method: ",
ngx.req.get_method())
+ return
+ end
+
+ local header_auth = ngx.req.get_headers()["authorization"]
+ if header_auth ~= "Bearer token" then
+ ngx.status = 401
+ ngx.say("Unauthorized")
+ return
+ end
+
+ ngx.req.read_body()
+ local body, err = ngx.req.get_body_data()
+ if not body then
+ ngx.status = 400
+ ngx.say("empty body")
+ return
+ end
+
+ body, err = json.decode(body)
+ if not body then
+ ngx.status = 400
+ ngx.say("bad json: ", err)
+ return
+ end
+
+ -- Responses API should NOT have stream_options
+ if body.stream_options then
+ ngx.status = 400
+ ngx.say(json.encode({
+ error = {
+ message = "Unrecognized request argument
supplied: stream_options",
+ type = "invalid_request_error",
+ }
+ }))
+ return
+ end
+
+ -- Validate it looks like a Responses API request
+ if not body.input then
+ ngx.status = 400
+ ngx.say(json.encode({ error = "missing input field" }))
+ return
+ end
+
+ ngx.status = 200
+ ngx.say(json.encode({
+ id = "resp_abc123",
+ object = "response",
+ created_at = 1723780938,
+ model = body.model or "gpt-4o",
+ output = {
+ {
+ type = "message",
+ role = "assistant",
+ content = {
+ { type = "output_text", text = "1 + 1 =
2." }
+ },
+ }
+ },
+ usage = {
+ input_tokens = 10,
+ output_tokens = 5,
+ total_tokens = 15,
+ }
+ }))
+ }
+ }
+
location /random {
content_by_lua_block {
ngx.print("path override works")
@@ -860,7 +939,85 @@ qr/accept-encoding/
-=== TEST 25: Chat Completions still works after Responses API support
(regression)
+=== TEST 25: Responses API - set route
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/anything", "/v1/responses"],
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": {
+ "header": {
+ "Authorization": "Bearer token"
+ }
+ },
+ "options": {
+ "model": "gpt-4o",
+ "max_tokens": 512,
+ "temperature": 1.0
+ },
+ "override": {
+ "endpoint": "http://localhost:6724"
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 26: Responses API - should NOT inject stream_options
+--- request
+POST /v1/responses
+{ "model": "gpt-4o", "input": "What is 1+1?", "stream": false }
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body_like eval
+qr/resp_abc123/
+
+
+
+=== TEST 27: Responses API with stream=true should NOT inject stream_options
+--- request
+POST /v1/responses
+{ "model": "gpt-4o", "input": "What is 1+1?", "stream": true }
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- no_error_log
+[error]
+
+
+
+=== TEST 28: Responses API with instructions field
+--- request
+POST /v1/responses
+{ "model": "gpt-4o", "input": "What is 1+1?", "instructions": "You are a math
tutor", "stream": false }
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body_like eval
+qr/resp_abc123/
+
+
+
+=== TEST 29: Chat Completions still works after Responses API support
(regression)
--- request
POST /anything
{ "messages": [ { "role": "system", "content": "You are a mathematician" }, {
"role": "user", "content": "What is 1+1?"} ] }
@@ -872,7 +1029,7 @@ qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/
-=== TEST 26: set route for fragmented SSE test
+=== TEST 30: set route for fragmented SSE test
--- config
location /t {
content_by_lua_block {
@@ -915,7 +1072,7 @@ passed
-=== TEST 27: fragmented SSE - one event split across two TCP chunks
+=== TEST 31: fragmented SSE - one event split across two TCP chunks
--- http_config
server {
server_name openai_sse_fragmented;
@@ -988,7 +1145,7 @@ got token usage from ai service:
-=== TEST 28: multiple SSE events in a single chunk
+=== TEST 32: multiple SSE events in a single chunk
--- http_config
server {
server_name openai_sse_multi;
@@ -1057,7 +1214,178 @@ got token usage from ai service:
-=== TEST 29: auth.query should not be mutated across requests when endpoint
has query params
+=== TEST 33: set route for Responses API non-streaming test
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/anything", "/v1/responses"],
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": {
+ "header": {
+ "Authorization": "Bearer token"
+ }
+ },
+ "options": {
+ "model": "gpt-4o"
+ },
+ "override": {
+ "endpoint": "http://localhost:6724"
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 34: Responses API non-streaming passthrough - token usage extracted
+--- request
+POST /v1/responses
+{ "model": "gpt-4o", "input": "What is 1+1?" }
+--- more_headers
+Authorization: Bearer token
+--- error_code: 200
+--- response_body_like eval
+qr/resp_abc123/
+--- error_log
+got token usage from ai service:
+--- no_error_log
+[error]
+
+
+
+=== TEST 35: set route for Responses API streaming test
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/anything", "/v1/responses"],
+ "plugins": {
+ "ai-proxy": {
+ "provider": "openai",
+ "auth": {
+ "header": {
+ "Authorization": "Bearer token"
+ }
+ },
+ "options": {
+ "model": "gpt-4o",
+ "stream": true
+ },
+ "override": {
+ "endpoint": "http://localhost:7739"
+ },
+ "ssl_verify": false
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 36: Responses API streaming passthrough - token usage extracted from
response.completed
+--- http_config
+ server {
+ server_name openai_responses_sse;
+ listen 7739;
+
+ default_type 'text/event-stream';
+
+ location /v1/responses {
+ content_by_lua_block {
+ local json = require("cjson.safe")
+ ngx.header["Content-Type"] = "text/event-stream"
+
+ ngx.print("event: response.output_text.delta\ndata: " ..
json.encode({type="response.output_text.delta", delta="Hello"}) .. "\n\n")
+ ngx.flush(true)
+ ngx.sleep(0.05)
+
+ ngx.print("event: response.output_text.delta\ndata: " ..
json.encode({type="response.output_text.delta", delta=" world"}) .. "\n\n")
+ ngx.flush(true)
+ ngx.sleep(0.05)
+
+ ngx.print("event: response.completed\ndata: " ..
json.encode({type="response.completed", response={usage={input_tokens=10,
output_tokens=5, total_tokens=15}}}) .. "\n\n")
+ ngx.flush(true)
+ }
+ }
+ }
+--- config
+ location /t {
+ content_by_lua_block {
+ local http = require("resty.http")
+ local httpc = http.new()
+
+ local ok, err = httpc:connect({
+ scheme = "http",
+ host = "localhost",
+ port = ngx.var.server_port,
+ })
+
+ if not ok then
+ ngx.status = 500
+ ngx.say(err)
+ return
+ end
+
+ local res, err = httpc:request({
+ method = "POST",
+ headers = { ["Content-Type"] = "application/json" },
+ path = "/v1/responses",
+ body = [[{"input": "hello", "model": "gpt-4o", "stream":
true}]],
+ })
+ if not res then
+ ngx.status = 500
+ ngx.say(err)
+ return
+ end
+
+ -- Drain the response
+ while true do
+ local chunk, err = res.body_reader()
+ if err or not chunk then break end
+ end
+
+ ngx.say("done")
+ }
+ }
+--- response_body
+done
+--- error_log
+got token usage from ai service:
+--- no_error_log
+[error]
+
+
+
+=== TEST 37: auth.query should not be mutated across requests when endpoint
has query params
--- config
location /t {
content_by_lua_block {
diff --git a/t/plugin/ai-rag.t b/t/plugin/ai-rag.t
index 7b912c2e9..c0e1ccfc2 100644
--- a/t/plugin/ai-rag.t
+++ b/t/plugin/ai-rag.t
@@ -16,6 +16,10 @@
#
+BEGIN {
+ $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
use t::APISIX 'no_plan';
log_level("info");
@@ -391,3 +395,60 @@ POST /echo
--- error_code: 200
--- response_body eval
qr/\{"messages":\[\{"content":"passed","role":"user"\}\]\}|\{"messages":\[\{"role":"user","content":"passed"\}\]\}/
+
+
+
+=== TEST 13: configure route for Responses API RAG injection test
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uris": ["/echo", "/v1/responses"],
+ "plugins": {
+ "ai-rag": {
+ "embeddings_provider": {
+ "azure_openai": {
+ "endpoint":
"http://localhost:3623/embeddings",
+ "api_key": "key"
+ }
+ },
+ "vector_search_provider": {
+ "azure_ai_search": {
+ "endpoint": "http://localhost:3623/search",
+ "api_key": "key"
+ }
+ }
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "127.0.0.1:1980": 1
+ },
+ "scheme": "http",
+ "pass_host": "node"
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 14: Responses API RAG injection - RAG result appended to input
+--- request
+POST /v1/responses
+{"input":"which service is good for
devops","ai_rag":{"vector_search":{"fields":"something"},"embeddings":{"input":"which
service is good for devops"}}}
+--- error_code: 200
+--- response_body eval
+qr/"input":"which service is good for devops\\npassed"/