(apisix) branch master updated: refactor(ai): ai-proxy and ai-proxy-multi (#12030)

shreemaanabhishek Tue, 11 Mar 2025 13:44:06 -0700

This is an automated email from the ASF dual-hosted git repository.

shreemaanabhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git



The following commit(s) were added to refs/heads/master by this push:
     new 53e5b0227 refactor(ai): ai-proxy and ai-proxy-multi (#12030)
53e5b0227 is described below

commit 53e5b02270e307942f93fc2f94e1dc1a2fed9cd6
Author: Shreemaan Abhishek <[email protected]>
AuthorDate: Tue Mar 11 16:11:50 2025 +0545

    refactor(ai): ai-proxy and ai-proxy-multi (#12030)
---
 apisix/cli/config.lua                        |   4 +-
 apisix/plugins/ai-aws-content-moderation.lua |   2 +-
 apisix/plugins/ai-drivers/openai-base.lua    | 128 +++++++++++++++++++++++++--
 apisix/plugins/ai-drivers/schema.lua         |  44 +++++++++
 apisix/plugins/ai-proxy-multi.lua            | 123 ++++++++++---------------
 apisix/plugins/ai-proxy.lua                  |  36 +++-----
 apisix/plugins/ai-proxy/base.lua             |  95 ++++++--------------
 apisix/plugins/ai-proxy/schema.lua           |  73 +++++++--------
 t/admin/plugins.t                            |   4 +-
 t/plugin/ai-proxy-multi.balancer.t           | 120 +++----------------------
 t/plugin/ai-proxy-multi.openai-compatible.t  |  34 ++-----
 t/plugin/ai-proxy-multi.t                    |  57 +++++++-----
 t/plugin/ai-proxy-multi2.t                   |  93 ++++---------------
 t/plugin/ai-proxy.openai-compatible.t        |  57 ++++--------
 t/plugin/ai-proxy.t                          |  78 +++++++---------
 t/plugin/ai-proxy2.t                         |  47 ++++------
 16 files changed, 432 insertions(+), 563 deletions(-)

diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua
index be7694130..56af978c2 100644
--- a/apisix/cli/config.lua
+++ b/apisix/cli/config.lua
@@ -219,13 +219,13 @@ local _M = {
     "ai-prompt-decorator",
     "ai-prompt-guard",
     "ai-rag",
+    "ai-proxy-multi",
+    "ai-proxy",
     "ai-aws-content-moderation",
     "proxy-mirror",
     "proxy-rewrite",
     "workflow",
     "api-breaker",
-    "ai-proxy",
-    "ai-proxy-multi",
     "limit-conn",
     "limit-count",
     "limit-req",
diff --git a/apisix/plugins/ai-aws-content-moderation.lua 
b/apisix/plugins/ai-aws-content-moderation.lua
index c7b54ed4e..e5a870bd3 100644
--- a/apisix/plugins/ai-aws-content-moderation.lua
+++ b/apisix/plugins/ai-aws-content-moderation.lua
@@ -72,7 +72,7 @@ local schema = {
 
 local _M = {
     version  = 0.1,
-    priority = 1040, -- TODO: might change
+    priority = 1050,
     name     = "ai-aws-content-moderation",
     schema   = schema,
 }
diff --git a/apisix/plugins/ai-drivers/openai-base.lua 
b/apisix/plugins/ai-drivers/openai-base.lua
index a9eb31059..4f0b38afe 100644
--- a/apisix/plugins/ai-drivers/openai-base.lua
+++ b/apisix/plugins/ai-drivers/openai-base.lua
@@ -20,12 +20,20 @@ local mt = {
     __index = _M
 }
 
+local CONTENT_TYPE_JSON = "application/json"
+
 local core = require("apisix.core")
 local http = require("resty.http")
 local url  = require("socket.url")
+local schema = require("apisix.plugins.ai-drivers.schema")
+local ngx_re = require("ngx.re")
+
+local ngx_print = ngx.print
+local ngx_flush = ngx.flush
 
 local pairs = pairs
 local type  = type
+local ipairs = ipairs
 local setmetatable = setmetatable
 
 
@@ -40,6 +48,26 @@ function _M.new(opts)
 end
 
 
+function _M.validate_request(ctx)
+        local ct = core.request.header(ctx, "Content-Type") or 
CONTENT_TYPE_JSON
+        if not core.string.has_prefix(ct, CONTENT_TYPE_JSON) then
+            return nil, "unsupported content-type: " .. ct .. ", only 
application/json is supported"
+        end
+
+        local request_table, err = core.request.get_json_request_body_table()
+        if not request_table then
+            return nil, err
+        end
+
+        local ok, err = core.schema.check(schema.chat_request_schema, 
request_table)
+        if not ok then
+            return nil, "request format doesn't match schema: " .. err
+        end
+
+        return request_table, nil
+end
+
+
 function _M.request(self, conf, request_table, extra_opts)
     local httpc, err = http.new()
     if not httpc then
@@ -54,11 +82,11 @@ function _M.request(self, conf, request_table, extra_opts)
     end
 
     local ok, err = httpc:connect({
-        scheme = endpoint and parsed_url.scheme or "https",
-        host = endpoint and parsed_url.host or self.host,
-        port = endpoint and parsed_url.port or self.port,
+        scheme = parsed_url and parsed_url.scheme or "https",
+        host = parsed_url and parsed_url.host or self.host,
+        port = parsed_url and parsed_url.port or self.port,
         ssl_verify = conf.ssl_verify,
-        ssl_server_name = endpoint and parsed_url.host or self.host,
+        ssl_server_name = parsed_url and parsed_url.host or self.host,
         pool_size = conf.keepalive and conf.keepalive_pool,
     })
 
@@ -75,7 +103,7 @@ function _M.request(self, conf, request_table, extra_opts)
         end
     end
 
-    local path = (endpoint and parsed_url.path or self.path)
+    local path = (parsed_url and parsed_url.path or self.path)
 
     local headers = extra_opts.headers
     headers["Content-Type"] = "application/json"
@@ -106,7 +134,95 @@ function _M.request(self, conf, request_table, extra_opts)
         return nil, err
     end
 
-    return res, nil, httpc
+    return res, nil
 end
 
+
+function _M.read_response(ctx, res)
+    local body_reader = res.body_reader
+    if not body_reader then
+        core.log.error("AI service sent no response body")
+        return 500
+    end
+
+    local content_type = res.headers["Content-Type"]
+    core.response.set_header("Content-Type", content_type)
+
+    if core.string.find(content_type, "text/event-stream") then
+        while true do
+            local chunk, err = body_reader() -- will read chunk by chunk
+            if err then
+                core.log.error("failed to read response chunk: ", err)
+                break
+            end
+            if not chunk then
+                break
+            end
+
+            ngx_print(chunk)
+            ngx_flush(true)
+
+            local events, err = ngx_re.split(chunk, "\n")
+            if err then
+                core.log.warn("failed to split response chunk [", chunk, "] to 
events: ", err)
+                goto CONTINUE
+            end
+
+            for _, event in ipairs(events) do
+                if not core.string.find(event, "data:") or 
core.string.find(event, "[DONE]") then
+                    goto CONTINUE
+                end
+
+                local parts, err = ngx_re.split(event, ":", nil, nil, 2)
+                if err then
+                    core.log.warn("failed to split data event [", event,  "] 
to parts: ", err)
+                    goto CONTINUE
+                end
+
+                if #parts ~= 2 then
+                    core.log.warn("malformed data event: ", event)
+                    goto CONTINUE
+                end
+
+                local data, err = core.json.decode(parts[2])
+                if err then
+                    core.log.warn("failed to decode data event [", parts[2], 
"] to json: ", err)
+                    goto CONTINUE
+                end
+
+                -- usage field is null for non-last events, null is parsed as 
userdata type
+                if data and data.usage and type(data.usage) ~= "userdata" then
+                    ctx.ai_token_usage = {
+                        prompt_tokens = data.usage.prompt_tokens or 0,
+                        completion_tokens = data.usage.completion_tokens or 0,
+                        total_tokens = data.usage.total_tokens or 0,
+                    }
+                end
+            end
+
+            ::CONTINUE::
+        end
+        return
+    end
+
+    local raw_res_body, err = res:read_body()
+    if not raw_res_body then
+        core.log.error("failed to read response body: ", err)
+        return 500
+    end
+    local res_body, err = core.json.decode(raw_res_body)
+    if err then
+        core.log.warn("invalid response body from ai service: ", raw_res_body, 
" err: ", err,
+            ", it will cause token usage not available")
+    else
+        ctx.ai_token_usage = {
+            prompt_tokens = res_body.usage and res_body.usage.prompt_tokens or 
0,
+            completion_tokens = res_body.usage and 
res_body.usage.completion_tokens or 0,
+            total_tokens = res_body.usage and res_body.usage.total_tokens or 0,
+        }
+    end
+    return res.status, raw_res_body
+end
+
+
 return _M
diff --git a/apisix/plugins/ai-drivers/schema.lua 
b/apisix/plugins/ai-drivers/schema.lua
new file mode 100644
index 000000000..7a469bd01
--- /dev/null
+++ b/apisix/plugins/ai-drivers/schema.lua
@@ -0,0 +1,44 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local _M = {}
+
+_M.chat_request_schema = {
+    type = "object",
+    properties = {
+        messages = {
+            type = "array",
+            minItems = 1,
+            items = {
+                properties = {
+                    role = {
+                        type = "string",
+                        enum = {"system", "user", "assistant"}
+                    },
+                    content = {
+                        type = "string",
+                        minLength = "1",
+                    },
+                },
+                additionalProperties = false,
+                required = {"role", "content"},
+            },
+        }
+    },
+    required = {"messages"}
+}
+
+return  _M
diff --git a/apisix/plugins/ai-proxy-multi.lua 
b/apisix/plugins/ai-proxy-multi.lua
index 4993270b9..3b4fc7e84 100644
--- a/apisix/plugins/ai-proxy-multi.lua
+++ b/apisix/plugins/ai-proxy-multi.lua
@@ -17,8 +17,8 @@
 
 local core = require("apisix.core")
 local schema = require("apisix.plugins.ai-proxy.schema")
+local base   = require("apisix.plugins.ai-proxy.base")
 local plugin = require("apisix.plugin")
-local base = require("apisix.plugins.ai-proxy.base")
 
 local require = require
 local pcall = pcall
@@ -36,7 +36,7 @@ local lrucache_server_picker = core.lrucache.new({
 local plugin_name = "ai-proxy-multi"
 local _M = {
     version = 0.5,
-    priority = 998,
+    priority = 1041,
     name = plugin_name,
     schema = schema.ai_proxy_multi_schema,
 }
@@ -64,10 +64,16 @@ end
 
 
 function _M.check_schema(conf)
-    for _, provider in ipairs(conf.providers) do
-        local ai_driver = pcall(require, "apisix.plugins.ai-drivers." .. 
provider.name)
+    local ok, err = core.schema.check(schema.ai_proxy_multi_schema, conf)
+    if not ok then
+        return false, err
+    end
+
+    for _, instance in ipairs(conf.instances) do
+        local ai_driver, err = pcall(require, "apisix.plugins.ai-drivers." .. 
instance.provider)
         if not ai_driver then
-            return false, "provider: " .. provider.name .. " is not supported."
+            core.log.warn("fail to require ai provider: ", instance.provider, 
", err", err)
+            return false, "ai provider: " .. instance.provider .. " is not 
supported."
         end
     end
     local algo = core.table.try_read_attr(conf, "balancer", "algorithm")
@@ -96,21 +102,21 @@ function _M.check_schema(conf)
         end
     end
 
-    return core.schema.check(schema.ai_proxy_multi_schema, conf)
+    return ok
 end
 
 
-local function transform_providers(new_providers, provider)
-    if not new_providers._priority_index then
-        new_providers._priority_index = {}
+local function transform_instances(new_instances, instance)
+    if not new_instances._priority_index then
+        new_instances._priority_index = {}
     end
 
-    if not new_providers[provider.priority] then
-        new_providers[provider.priority] = {}
-        core.table.insert(new_providers._priority_index, provider.priority)
+    if not new_instances[instance.priority] then
+        new_instances[instance.priority] = {}
+        core.table.insert(new_instances._priority_index, instance.priority)
     end
 
-    new_providers[provider.priority][provider.name] = provider.weight
+    new_instances[instance.priority][instance.name] = instance.weight
 end
 
 
@@ -120,37 +126,31 @@ local function create_server_picker(conf, ups_tab)
         pickers[conf.balancer.algorithm] = require("apisix.balancer." .. 
conf.balancer.algorithm)
         picker = pickers[conf.balancer.algorithm]
     end
-    local new_providers = {}
-    for i, provider in ipairs(conf.providers) do
-        transform_providers(new_providers, provider)
+    local new_instances = {}
+    for _, ins in ipairs(conf.instances) do
+        transform_instances(new_instances, ins)
     end
 
-    if #new_providers._priority_index > 1 then
-        core.log.info("new providers: ", core.json.delay_encode(new_providers))
-        return priority_balancer.new(new_providers, ups_tab, picker)
+    if #new_instances._priority_index > 1 then
+        core.log.info("new instances: ", core.json.delay_encode(new_instances))
+        return priority_balancer.new(new_instances, ups_tab, picker)
     end
     core.log.info("upstream nodes: ",
-                
core.json.delay_encode(new_providers[new_providers._priority_index[1]]))
-    return picker.new(new_providers[new_providers._priority_index[1]], ups_tab)
+                
core.json.delay_encode(new_instances[new_instances._priority_index[1]]))
+    return picker.new(new_instances[new_instances._priority_index[1]], ups_tab)
 end
 
 
-local function get_provider_conf(providers, name)
-    for i, provider in ipairs(providers) do
-        if provider.name == name then
-            return provider
+local function get_instance_conf(instances, name)
+    for _, ins in ipairs(instances) do
+        if ins.name == name then
+            return ins
         end
     end
 end
 
 
 local function pick_target(ctx, conf, ups_tab)
-    if ctx.ai_balancer_try_count > 1 then
-        if ctx.server_picker and ctx.server_picker.after_balance then
-            ctx.server_picker.after_balance(ctx, true)
-        end
-    end
-
     local server_picker = ctx.server_picker
     if not server_picker then
         server_picker = lrucache_server_picker(ctx.matched_route.key, 
plugin.conf_version(conf),
@@ -160,40 +160,31 @@ local function pick_target(ctx, conf, ups_tab)
         return internal_server_error, "failed to fetch server picker"
     end
 
-    local provider_name = server_picker.get(ctx)
-    local provider_conf = get_provider_conf(conf.providers, provider_name)
+local instance_name = server_picker.get(ctx)
+    local instance_conf = get_instance_conf(conf.instances, instance_name)
 
-    ctx.balancer_server = provider_name
+    ctx.balancer_server = instance_name
     ctx.server_picker = server_picker
 
-    return provider_name, provider_conf
+    return instance_name, instance_conf
 end
 
 
-local function get_load_balanced_provider(ctx, conf, ups_tab, request_table)
-    ctx.ai_balancer_try_count = (ctx.ai_balancer_try_count or 0) + 1
-    local provider_name, provider_conf
-    if #conf.providers == 1 then
-        provider_name = conf.providers[1].name
-        provider_conf = conf.providers[1]
+local function pick_ai_instance(ctx, conf, ups_tab)
+    local instance_name, instance_conf
+    if #conf.instances == 1 then
+        instance_name = conf.instances[1].name
+        instance_conf = conf.instances[1]
     else
-        provider_name, provider_conf = pick_target(ctx, conf, ups_tab)
-    end
-
-    core.log.info("picked provider: ", provider_name)
-    if provider_conf.model then
-        request_table.model = provider_conf.model
+        instance_name, instance_conf = pick_target(ctx, conf, ups_tab)
     end
 
-    provider_conf.__name = provider_name
-    return provider_name, provider_conf
-end
-
-local function get_model_name(...)
+    core.log.info("picked instance: ", instance_name)
+    return instance_name, instance_conf
 end
 
 
-local function proxy_request_to_llm(conf, request_table, ctx)
+function _M.access(conf, ctx)
     local ups_tab = {}
     local algo = core.table.try_read_attr(conf, "balancer", "algorithm")
     if algo == "chash" then
@@ -203,31 +194,13 @@ local function proxy_request_to_llm(conf, request_table, 
ctx)
         ups_tab["hash_on"] = hash_on
     end
 
-    ::retry::
-    local provider, provider_conf = get_load_balanced_provider(ctx, conf, 
ups_tab, request_table)
-    local extra_opts = {
-        endpoint = core.table.try_read_attr(provider_conf, "override", 
"endpoint"),
-        query_params = provider_conf.auth.query or {},
-        headers = (provider_conf.auth.header or {}),
-        model_options = provider_conf.options,
-    }
-
-    local ai_driver = require("apisix.plugins.ai-drivers." .. provider)
-    local res, err, httpc = ai_driver:request(conf, request_table, extra_opts)
-    if not res then
-        if (ctx.ai_balancer_try_count or 0) < 2 then
-            core.log.warn("failed to send request to LLM: ", err, ". 
Retrying...")
-            goto retry
-        end
-        return nil, err, nil
-    end
-
-    request_table.model = provider_conf.model
-    return res, nil, httpc
+    local name, ai_instance = pick_ai_instance(ctx, conf, ups_tab)
+    ctx.picked_ai_instance_name = name
+    ctx.picked_ai_instance = ai_instance
 end
 
 
-_M.access = base.new(proxy_request_to_llm, get_model_name)
+_M.before_proxy = base.before_proxy
 
 
 return _M
diff --git a/apisix/plugins/ai-proxy.lua b/apisix/plugins/ai-proxy.lua
index ffc82f856..2301a65e6 100644
--- a/apisix/plugins/ai-proxy.lua
+++ b/apisix/plugins/ai-proxy.lua
@@ -24,41 +24,33 @@ local pcall = pcall
 local plugin_name = "ai-proxy"
 local _M = {
     version = 0.5,
-    priority = 999,
+    priority = 1040,
     name = plugin_name,
-    schema = schema,
+    schema = schema.ai_proxy_schema,
 }
 
 
 function _M.check_schema(conf)
-    local ai_driver = pcall(require, "apisix.plugins.ai-drivers." .. 
conf.model.provider)
+    local ok, err = core.schema.check(schema.ai_proxy_schema, conf)
+    if not ok then
+        return false, err
+    end
+    local ai_driver, err = pcall(require, "apisix.plugins.ai-drivers." .. 
conf.provider)
     if not ai_driver then
-        return false, "provider: " .. conf.model.provider .. " is not 
supported."
+        core.log.warn("fail to require ai provider: ", conf.provider, ", err", 
err)
+        return false, "ai provider: " .. conf.provider .. " is not supported."
     end
-    return core.schema.check(schema.ai_proxy_schema, conf)
+    return ok
 end
 
 
-local function get_model_name(conf)
-    return conf.model.name
+function _M.access(conf, ctx)
+    ctx.picked_ai_instance_name = "ai-proxy"
+    ctx.picked_ai_instance = conf
 end
 
 
-local function proxy_request_to_llm(conf, request_table, ctx)
-    local ai_driver = require("apisix.plugins.ai-drivers." .. 
conf.model.provider)
-    local extra_opts = {
-        endpoint = core.table.try_read_attr(conf, "override", "endpoint"),
-        query_params = conf.auth.query or {},
-        headers = (conf.auth.header or {}),
-        model_options = conf.model.options
-    }
-    local res, err, httpc = ai_driver:request(conf, request_table, extra_opts)
-    if not res then
-        return nil, err, nil
-    end
-    return res, nil, httpc
-end
+_M.before_proxy = base.before_proxy
 
-_M.access = base.new(proxy_request_to_llm, get_model_name)
 
 return _M
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 6de6ceb8c..d8f1a8944 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -15,84 +15,43 @@
 -- limitations under the License.
 --
 
-local CONTENT_TYPE_JSON = "application/json"
 local core = require("apisix.core")
+local require = require
 local bad_request = ngx.HTTP_BAD_REQUEST
 local internal_server_error = ngx.HTTP_INTERNAL_SERVER_ERROR
-local schema = require("apisix.plugins.ai-proxy.schema")
-local ngx_print = ngx.print
-local ngx_flush = ngx.flush
-
-local function keepalive_or_close(conf, httpc)
-    if conf.set_keepalive then
-        httpc:set_keepalive(10000, 100)
-        return
-    end
-    httpc:close()
-end
 
 local _M = {}
 
-function _M.new(proxy_request_to_llm_func, get_model_name_func)
-    return function(conf, ctx)
-        local ct = core.request.header(ctx, "Content-Type") or 
CONTENT_TYPE_JSON
-        if not core.string.has_prefix(ct, CONTENT_TYPE_JSON) then
-            return bad_request, "unsupported content-type: " .. ct
-        end
-
-        local request_table, err = core.request.get_json_request_body_table()
-        if not request_table then
-            return bad_request, err
-        end
-
-        local ok, err = core.schema.check(schema.chat_request_schema, 
request_table)
-        if not ok then
-            return bad_request, "request format doesn't match schema: " .. err
-        end
-
-        request_table.model = get_model_name_func(conf)
+function _M.before_proxy(conf, ctx)
+    local ai_instance = ctx.picked_ai_instance
+    local ai_driver = require("apisix.plugins.ai-drivers." .. 
ai_instance.provider)
 
-        if core.table.try_read_attr(conf, "model", "options", "stream") then
-            request_table.stream = true
-        end
-
-        local res, err, httpc = proxy_request_to_llm_func(conf, request_table, 
ctx)
-        if not res then
-            core.log.error("failed to send request to LLM service: ", err)
-            return internal_server_error
-        end
+    local request_body, err = ai_driver.validate_request(ctx)
+    if not request_body then
+        return bad_request, err
+    end
 
-        local body_reader = res.body_reader
-        if not body_reader then
-            core.log.error("LLM sent no response body")
-            return internal_server_error
-        end
+    local extra_opts = {
+        endpoint = core.table.try_read_attr(ai_instance, "override", 
"endpoint"),
+        query_params = ai_instance.auth.query or {},
+        headers = (ai_instance.auth.header or {}),
+        model_options = ai_instance.options,
+    }
+
+    if request_body.stream then
+        request_body.stream_options = {
+            include_usage = true
+        }
+    end
 
-        if request_table.stream then
-            while true do
-                local chunk, err = body_reader() -- will read chunk by chunk
-                if err then
-                    core.log.error("failed to read response chunk: ", err)
-                    break
-                end
-                if not chunk then
-                    break
-                end
-                ngx_print(chunk)
-                ngx_flush(true)
-            end
-            keepalive_or_close(conf, httpc)
-            return
-        else
-            local res_body, err = res:read_body()
-            if not res_body then
-                core.log.error("failed to read response body: ", err)
-                return internal_server_error
-            end
-            keepalive_or_close(conf, httpc)
-            return res.status, res_body
-        end
+    local res, err = ai_driver:request(conf, request_body, extra_opts)
+    if not res then
+        core.log.error("failed to send request to AI service: ", err)
+        return internal_server_error
     end
+
+    return ai_driver.read_response(ctx, res)
 end
 
+
 return _M
diff --git a/apisix/plugins/ai-proxy/schema.lua 
b/apisix/plugins/ai-proxy/schema.lua
index a2c25e924..7170b5bfc 100644
--- a/apisix/plugins/ai-proxy/schema.lua
+++ b/apisix/plugins/ai-proxy/schema.lua
@@ -38,6 +38,10 @@ local model_options_schema = {
     description = "Key/value settings for the model",
     type = "object",
     properties = {
+        model = {
+            type = "string",
+            description = "Model to execute.",
+        },
         max_tokens = {
             type = "integer",
             description = "Defines the max_tokens, if using chat or completion 
models.",
@@ -74,36 +78,10 @@ local model_options_schema = {
             description = "Stream response by SSE",
             type = "boolean",
         }
-    }
-}
-
-local model_schema = {
-    type = "object",
-    properties = {
-        provider = {
-            type = "string",
-            description = "Name of the AI service provider.",
-            enum = { "openai", "openai-compatible", "deepseek" }, -- add more 
providers later
-        },
-        name = {
-            type = "string",
-            description = "Model name to execute.",
-        },
-        options = model_options_schema,
-        override = {
-            type = "object",
-            properties = {
-                endpoint = {
-                    type = "string",
-                    description = "To be specified to override the host of the 
AI provider",
-                },
-            }
-        }
     },
-    required = {"provider", "name"}
 }
 
-local provider_schema = {
+local ai_instance_schema = {
     type = "array",
     minItems = 1,
     items = {
@@ -111,13 +89,15 @@ local provider_schema = {
         properties = {
             name = {
                 type = "string",
-                description = "Name of the AI service provider.",
-                enum = { "openai", "deepseek", "openai-compatible" }, -- add 
more providers later
-
+                minLength = 1,
+                maxLength = 100,
+                description = "Name of the AI service instance.",
             },
-            model = {
+            provider = {
                 type = "string",
-                description = "Model to execute.",
+                description = "Type of the AI service instance.",
+                enum = { "openai", "deepseek", "openai-compatible" }, -- add 
more providers later
+
             },
             priority = {
                 type = "integer",
@@ -126,6 +106,7 @@ local provider_schema = {
             },
             weight = {
                 type = "integer",
+                minimum = 0,
             },
             auth = auth_schema,
             options = model_options_schema,
@@ -134,12 +115,12 @@ local provider_schema = {
                 properties = {
                     endpoint = {
                         type = "string",
-                        description = "To be specified to override the host of 
the AI provider",
+                        description = "To be specified to override the 
endpoint of the AI Instance",
                     },
                 },
             },
         },
-        required = {"name", "model", "auth"}
+        required = {"name", "provider", "auth"}
     },
 }
 
@@ -147,8 +128,14 @@ local provider_schema = {
 _M.ai_proxy_schema = {
     type = "object",
     properties = {
+        provider = {
+            type = "string",
+            description = "Type of the AI service instance.",
+            enum = { "openai", "deepseek", "openai-compatible" }, -- add more 
providers later
+
+        },
         auth = auth_schema,
-        model = model_schema,
+        options = model_options_schema,
         timeout = {
             type = "integer",
             minimum = 1,
@@ -159,8 +146,17 @@ _M.ai_proxy_schema = {
         keepalive = {type = "boolean", default = true},
         keepalive_pool = {type = "integer", minimum = 1, default = 30},
         ssl_verify = {type = "boolean", default = true },
+        override = {
+            type = "object",
+            properties = {
+                endpoint = {
+                    type = "string",
+                    description = "To be specified to override the endpoint of 
the AI Instance",
+                },
+            },
+        },
     },
-    required = {"model", "auth"}
+    required = {"provider", "auth"}
 }
 
 _M.ai_proxy_multi_schema = {
@@ -191,7 +187,7 @@ _M.ai_proxy_multi_schema = {
             },
             default = { algorithm = "roundrobin" }
         },
-        providers = provider_schema,
+        instances = ai_instance_schema,
         timeout = {
             type = "integer",
             minimum = 1,
@@ -200,11 +196,10 @@ _M.ai_proxy_multi_schema = {
             description = "timeout in milliseconds",
         },
         keepalive = {type = "boolean", default = true},
-        keepalive_timeout = {type = "integer", minimum = 1000, default = 
60000},
         keepalive_pool = {type = "integer", minimum = 1, default = 30},
         ssl_verify = {type = "boolean", default = true },
     },
-    required = {"providers", }
+    required = {"instances"}
 }
 
 _M.chat_request_schema = {
diff --git a/t/admin/plugins.t b/t/admin/plugins.t
index 20cf4a8fc..c43d5ffeb 100644
--- a/t/admin/plugins.t
+++ b/t/admin/plugins.t
@@ -99,6 +99,8 @@ ai-prompt-template
 ai-prompt-decorator
 ai-rag
 ai-aws-content-moderation
+ai-proxy-multi
+ai-proxy
 proxy-mirror
 proxy-rewrite
 workflow
@@ -106,8 +108,6 @@ api-breaker
 limit-conn
 limit-count
 limit-req
-ai-proxy
-ai-proxy-multi
 gzip
 server-info
 traffic-split
diff --git a/t/plugin/ai-proxy-multi.balancer.t 
b/t/plugin/ai-proxy-multi.balancer.t
index da26957fb..09076e4a8 100644
--- a/t/plugin/ai-proxy-multi.balancer.t
+++ b/t/plugin/ai-proxy-multi.balancer.t
@@ -158,10 +158,10 @@ __DATA__
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
                                     "name": "openai",
-                                    "model": "gpt-4",
+                                    "provider": "openai",
                                     "weight": 4,
                                     "auth": {
                                         "header": {
@@ -169,6 +169,7 @@ __DATA__
                                         }
                                     },
                                     "options": {
+                                        "model": "gpt-4",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     },
@@ -178,7 +179,7 @@ __DATA__
                                 },
                                 {
                                     "name": "deepseek",
-                                    "model": "gpt-4",
+                                    "provider": "deepseek",
                                     "weight": 1,
                                     "auth": {
                                         "header": {
@@ -186,6 +187,7 @@ __DATA__
                                         }
                                     },
                                     "options": {
+                                        "model": "deepseek-chat",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     },
@@ -239,7 +241,7 @@ passed
             end
 
             table.sort(restab)
-            ngx.log(ngx.WARN, "test picked providers: ", table.concat(restab, 
"."))
+            ngx.log(ngx.WARN, "test picked instances: ", table.concat(restab, 
"."))
 
         }
     }
@@ -266,10 +268,10 @@ 
deepseek.deepseek.openai.openai.openai.openai.openai.openai.openai.openai
                                 "hash_on": "vars",
                                 "key": "query_string"
                             },
-                            "providers": [
+                            "instances": [
                                 {
                                     "name": "openai",
-                                    "model": "gpt-4",
+                                    "provider": "openai",
                                     "weight": 4,
                                     "auth": {
                                         "header": {
@@ -277,6 +279,7 @@ 
deepseek.deepseek.openai.openai.openai.openai.openai.openai.openai.openai
                                         }
                                     },
                                     "options": {
+                                        "model": "gpt-4",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     },
@@ -286,7 +289,7 @@ 
deepseek.deepseek.openai.openai.openai.openai.openai.openai.openai.openai
                                 },
                                 {
                                     "name": "deepseek",
-                                    "model": "gpt-4",
+                                    "provider": "deepseek",
                                     "weight": 1,
                                     "auth": {
                                         "header": {
@@ -294,6 +297,7 @@ 
deepseek.deepseek.openai.openai.openai.openai.openai.openai.openai.openai
                                         }
                                     },
                                     "options": {
+                                        "model": "deepseek-chat",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     },
@@ -366,105 +370,3 @@ GET /t
 --- error_log
 distribution: deepseek: 2
 distribution: openai: 8
-
-
-
-=== TEST 5: retry logic with different priorities
---- config
-    location /t {
-        content_by_lua_block {
-            local t = require("lib.test_admin").test
-            local code, body = t('/apisix/admin/routes/1',
-                 ngx.HTTP_PUT,
-                 [[{
-                    "uri": "/anything",
-                    "plugins": {
-                        "ai-proxy-multi": {
-                            "providers": [
-                                {
-                                    "name": "openai",
-                                    "model": "gpt-4",
-                                    "weight": 1,
-                                    "priority": 1,
-                                    "auth": {
-                                        "header": {
-                                            "Authorization": "Bearer token"
-                                        }
-                                    },
-                                    "options": {
-                                        "max_tokens": 512,
-                                        "temperature": 1.0
-                                    },
-                                    "override": {
-                                        "endpoint": "http://localhost:9999";
-                                    }
-                                },
-                                {
-                                    "name": "deepseek",
-                                    "model": "gpt-4",
-                                    "priority": 0,
-                                    "weight": 1,
-                                    "auth": {
-                                        "header": {
-                                            "Authorization": "Bearer token"
-                                        }
-                                    },
-                                    "options": {
-                                        "max_tokens": 512,
-                                        "temperature": 1.0
-                                    },
-                                    "override": {
-                                        "endpoint": 
"http://localhost:6724/chat/completions";
-                                    }
-                                }
-                            ],
-                            "ssl_verify": false
-                        }
-                    },
-                    "upstream": {
-                        "type": "roundrobin",
-                        "nodes": {
-                            "canbeanything.com": 1
-                        }
-                    }
-                }]]
-            )
-
-            if code >= 300 then
-                ngx.status = code
-            end
-            ngx.say(body)
-        }
-    }
---- response_body
-passed
-
-
-
-=== TEST 6: test
---- config
-    location /t {
-        content_by_lua_block {
-            local http = require "resty.http"
-            local uri = "http://127.0.0.1:"; .. ngx.var.server_port
-                        .. "/anything"
-
-            local restab = {}
-
-            local body = [[{ "messages": [ { "role": "system", "content": "You 
are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] }]]
-                local httpc = http.new()
-                local res, err = httpc:request_uri(uri, {method = "POST", body 
= body})
-                if not res then
-                    ngx.say(err)
-                    return
-                end
-                ngx.say(res.body)
-
-        }
-    }
---- request
-GET /t
---- response_body
-deepseek
---- error_log
-failed to send request to LLM: failed to connect to LLM server: connection 
refused. Retrying...
diff --git a/t/plugin/ai-proxy-multi.openai-compatible.t 
b/t/plugin/ai-proxy-multi.openai-compatible.t
index f80be6dc4..923c12d37 100644
--- a/t/plugin/ai-proxy-multi.openai-compatible.t
+++ b/t/plugin/ai-proxy-multi.openai-compatible.t
@@ -52,26 +52,6 @@ _EOC_
 
             default_type 'application/json';
 
-            location /anything {
-                content_by_lua_block {
-                    local json = require("cjson.safe")
-
-                    if ngx.req.get_method() ~= "POST" then
-                        ngx.status = 400
-                        ngx.say("Unsupported request method: ", 
ngx.req.get_method())
-                    end
-                    ngx.req.read_body()
-                    local body = ngx.req.get_body_data()
-
-                    if body ~= "SELECT * FROM STUDENTS" then
-                        ngx.status = 503
-                        ngx.say("passthrough doesn't work")
-                        return
-                    end
-                    ngx.say('{"foo", "bar"}')
-                }
-            }
-
             location /v1/chat/completions {
                 content_by_lua_block {
                     local json = require("cjson.safe")
@@ -158,10 +138,10 @@ __DATA__
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai-compatible",
-                                    "model": "custom",
+                                    "name": "self-hosted",
+                                    "provider": "openai-compatible",
                                     "weight": 1,
                                     "auth": {
                                         "header": {
@@ -169,6 +149,7 @@ __DATA__
                                         }
                                     },
                                     "options": {
+                                        "model": "custom",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     },
@@ -223,10 +204,10 @@ qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai-compatible",
-                                    "model": "custom-instruct",
+                                    "name": "self-hosted",
+                                    "provider": "openai-compatible",
                                     "weight": 1,
                                     "auth": {
                                         "header": {
@@ -234,6 +215,7 @@ qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/
                                         }
                                     },
                                     "options": {
+                                        "model": "custom-instruct",
                                         "max_tokens": 512,
                                         "temperature": 1.0,
                                         "stream": true
diff --git a/t/plugin/ai-proxy-multi.t b/t/plugin/ai-proxy-multi.t
index 7969dbd81..0da04c9de 100644
--- a/t/plugin/ai-proxy-multi.t
+++ b/t/plugin/ai-proxy-multi.t
@@ -133,10 +133,13 @@ __DATA__
         content_by_lua_block {
             local plugin = require("apisix.plugins.ai-proxy-multi")
             local ok, err = plugin.check_schema({
-                providers = {
+                instances = {
                     {
-                        name = "openai",
-                        model = "gpt-4",
+                        name = "openai-official",
+                        provider = "openai",
+                        options = {
+                            model = "gpt-4",
+                        },
                         weight = 1,
                         auth = {
                             header = {
@@ -165,10 +168,13 @@ passed
         content_by_lua_block {
             local plugin = require("apisix.plugins.ai-proxy-multi")
             local ok, err = plugin.check_schema({
-                providers = {
+                instances = {
                     {
-                        name = "some-unique",
-                        model = "gpt-4",
+                        name = "self-hosted",
+                        provider = "some-unique",
+                        options = {
+                            model = "gpt-4",
+                        },
                         weight = 1,
                         auth = {
                             header = {
@@ -187,7 +193,7 @@ passed
         }
     }
 --- response_body eval
-qr/.*provider: some-unique is not supported.*/
+qr/.*property "provider" validation failed: matches none of the enum values*/
 
 
 
@@ -202,10 +208,10 @@ qr/.*provider: some-unique is not supported.*/
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai",
-                                    "model": "gpt-4",
+                                    "name": "openai-official",
+                                    "provider": "openai",
                                     "weight": 1,
                                     "auth": {
                                         "header": {
@@ -213,6 +219,7 @@ qr/.*provider: some-unique is not supported.*/
                                         }
                                     },
                                     "options": {
+                                        "model": "gpt-4",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     },
@@ -265,10 +272,10 @@ Unauthorized
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai",
-                                    "model": "gpt-4",
+                                    "name": "openai-official",
+                                    "provider": "openai",
                                     "weight": 1,
                                     "auth": {
                                         "header": {
@@ -276,6 +283,7 @@ Unauthorized
                                         }
                                     },
                                     "options": {
+                                        "model": "gpt-4",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     },
@@ -360,7 +368,7 @@ prompt%3Dwhat%2520is%25201%2520%252B%25201
 Content-Type: application/x-www-form-urlencoded
 --- error_code: 400
 --- response_body chomp
-unsupported content-type: application/x-www-form-urlencoded
+unsupported content-type: application/x-www-form-urlencoded, only 
application/json is supported
 
 
 
@@ -387,10 +395,10 @@ request format doesn't match schema: property "messages" 
is required
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai",
-                                    "model": "some-model",
+                                    "name": "openai-official",
+                                    "provider": "openai",
                                     "weight": 1,
                                     "auth": {
                                         "header": {
@@ -398,6 +406,7 @@ request format doesn't match schema: property "messages" is 
required
                                         }
                                     },
                                     "options": {
+                                        "model": "some-model",
                                         "foo": "bar",
                                         "temperature": 1.0
                                     },
@@ -461,10 +470,10 @@ options_works
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai",
-                                    "model": "some-model",
+                                    "name": "openai-official",
+                                    "provider": "openai",
                                     "weight": 1,
                                     "auth": {
                                         "header": {
@@ -472,6 +481,7 @@ options_works
                                         }
                                     },
                                     "options": {
+                                        "model": "some-model",
                                         "foo": "bar",
                                         "temperature": 1.0
                                     },
@@ -534,10 +544,10 @@ path override works
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai",
-                                    "model": "gpt-35-turbo-instruct",
+                                    "name": "openai-official",
+                                    "provider": "openai",
                                     "weight": 1,
                                     "auth": {
                                         "header": {
@@ -545,6 +555,7 @@ path override works
                                         }
                                     },
                                     "options": {
+                                        "model": "gpt-35-turbo-instruct",
                                         "max_tokens": 512,
                                         "temperature": 1.0,
                                         "stream": true
diff --git a/t/plugin/ai-proxy-multi2.t b/t/plugin/ai-proxy-multi2.t
index 00c1714a3..c54e7a67e 100644
--- a/t/plugin/ai-proxy-multi2.t
+++ b/t/plugin/ai-proxy-multi2.t
@@ -123,10 +123,10 @@ __DATA__
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai",
-                                    "model": "gpt-35-turbo-instruct",
+                                    "name": "openai-official",
+                                    "provider": "openai",
                                     "weight": 1,
                                     "auth": {
                                         "query": {
@@ -134,6 +134,7 @@ __DATA__
                                         }
                                     },
                                     "options": {
+                                        "model": "gpt-35-turbo-instruct",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     },
@@ -186,10 +187,10 @@ Unauthorized
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai",
-                                    "model": "gpt-35-turbo-instruct",
+                                    "name": "openai-official",
+                                    "provider": "openai",
                                     "weight": 1,
                                     "auth": {
                                         "query": {
@@ -197,6 +198,7 @@ Unauthorized
                                         }
                                     },
                                     "options": {
+                                        "model": "gpt-35-turbo-instruct",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     },
@@ -249,10 +251,10 @@ passed
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai",
-                                    "model": "gpt-35-turbo-instruct",
+                                    "name": "openai-official",
+                                    "provider": "openai",
                                     "weight": 1,
                                     "auth": {
                                         "header": {
@@ -260,6 +262,7 @@ passed
                                         }
                                     },
                                     "options": {
+                                        "model": "gpt-35-turbo-instruct",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     }
@@ -308,10 +311,10 @@ POST /anything
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy-multi": {
-                            "providers": [
+                            "instances": [
                                 {
-                                    "name": "openai",
-                                    "model": "gpt-35-turbo-instruct",
+                                    "name": "openai-official",
+                                    "provider": "openai",
                                     "weight": 1,
                                     "auth": {
                                         "query": {
@@ -319,6 +322,7 @@ POST /anything
                                         }
                                     },
                                     "options": {
+                                        "model": "gpt-35-turbo-instruct",
                                         "max_tokens": 512,
                                         "temperature": 1.0
                                     },
@@ -359,68 +363,3 @@ POST /anything
 found query params: {"api_key":"apikey","some_query":"yes"}
 --- response_body
 passed
-
-
-
-=== TEST 9: set route with unavailable endpoint
---- config
-    location /t {
-        content_by_lua_block {
-            local t = require("lib.test_admin").test
-            local code, body = t('/apisix/admin/routes/1',
-                 ngx.HTTP_PUT,
-                 [[{
-                    "uri": "/anything",
-                    "plugins": {
-                        "ai-proxy-multi": {
-                            "providers": [
-                                {
-                                    "name": "openai",
-                                    "model": "gpt-4",
-                                    "weight": 1,
-                                    "auth": {
-                                        "header": {
-                                            "Authorization": "Bearer token"
-                                        }
-                                    },
-                                    "options": {
-                                        "max_tokens": 512,
-                                        "temperature": 1.0
-                                    },
-                                    "override": {
-                                        "endpoint": 
"http://unavailable.endpoint.ehfwuehr:404";
-                                    }
-                                }
-                            ],
-                            "ssl_verify": false
-                        }
-                    },
-                    "upstream": {
-                        "type": "roundrobin",
-                        "nodes": {
-                            "canbeanything.com": 1
-                        }
-                    }
-                }]]
-            )
-
-            if code >= 300 then
-                ngx.status = code
-            end
-            ngx.say(body)
-        }
-    }
---- response_body
-passed
-
-
-
-=== TEST 10: ai-proxy-multi should retry once and fail
-# i.e it should not attempt to proxy request endlessly
---- request
-POST /anything
-{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { 
"role": "user", "content": "What is 1+1?"} ] }
---- error_code: 500
---- error_log
-parse_domain(): failed to parse domain: unavailable.endpoint.ehfwuehr, error: 
failed to query the DNS server: dns
-phase_func(): failed to send request to LLM service: failed to connect to LLM 
server: failed to parse domain
diff --git a/t/plugin/ai-proxy.openai-compatible.t 
b/t/plugin/ai-proxy.openai-compatible.t
index a98161a48..84ae175da 100644
--- a/t/plugin/ai-proxy.openai-compatible.t
+++ b/t/plugin/ai-proxy.openai-compatible.t
@@ -132,18 +132,16 @@ __DATA__
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai-compatible",
                             "auth": {
                                 "header": {
                                     "Authorization": "Bearer token"
                                 }
                             },
-                            "model": {
-                                "provider": "openai-compatible",
-                                "name": "custom",
-                                "options": {
-                                    "max_tokens": 512,
-                                    "temperature": 1.0
-                                }
+                            "options": {
+                                "model": "custom",
+                                "max_tokens": 512,
+                                "temperature": 1.0
                             },
                             "override": {
                                 "endpoint": 
"http://localhost:6724/v1/chat/completions";
@@ -194,18 +192,16 @@ qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai-compatible",
                             "auth": {
                                 "header": {
                                     "Authorization": "Bearer token"
                                 }
                             },
-                            "model": {
-                                "provider": "openai-compatible",
-                                "name": "some-model",
-                                "options": {
-                                    "foo": "bar",
-                                    "temperature": 1.0
-                                }
+                            "options": {
+                                "model": "some-model",
+                                "foo": "bar",
+                                "temperature": 1.0
                             },
                             "override": {
                                 "endpoint": "http://localhost:6724/random";
@@ -264,19 +260,17 @@ path override works
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai-compatible",
                             "auth": {
                                 "header": {
                                     "Authorization": "Bearer token"
                                 }
                             },
-                            "model": {
-                                "provider": "openai-compatible",
-                                "name": "custom",
-                                "options": {
-                                    "max_tokens": 512,
-                                    "temperature": 1.0,
-                                    "stream": true
-                                }
+                            "options": {
+                                "model": "custom",
+                                "max_tokens": 512,
+                                "temperature": 1.0,
+                                "stream": true
                             },
                             "override": {
                                 "endpoint": 
"http://localhost:7737/v1/chat/completions";
@@ -343,22 +337,3 @@ passed
                 ngx.say(err)
                 return
             end
-
-            local final_res = {}
-            while true do
-                local chunk, err = res.body_reader() -- will read chunk by 
chunk
-                if err then
-                    core.log.error("failed to read response chunk: ", err)
-                    break
-                end
-                if not chunk then
-                    break
-                end
-                core.table.insert_tail(final_res, chunk)
-            end
-
-            ngx.print(#final_res .. final_res[6])
-        }
-    }
---- response_body_like eval
-qr/6data: \[DONE\]\n\n/
diff --git a/t/plugin/ai-proxy.t b/t/plugin/ai-proxy.t
index 8cfd88018..08220fc3c 100644
--- a/t/plugin/ai-proxy.t
+++ b/t/plugin/ai-proxy.t
@@ -127,9 +127,9 @@ __DATA__
         content_by_lua_block {
             local plugin = require("apisix.plugins.ai-proxy")
             local ok, err = plugin.check_schema({
-                model = {
-                    provider = "openai",
-                    name = "gpt-4",
+                provider = "openai",
+                options = {
+                    model = "gpt-4",
                 },
                 auth = {
                     header = {
@@ -156,9 +156,9 @@ passed
         content_by_lua_block {
             local plugin = require("apisix.plugins.ai-proxy")
             local ok, err = plugin.check_schema({
-                model = {
-                    provider = "some-unique",
-                    name = "gpt-4",
+                provider = "some-unique",
+                options = {
+                    model = "gpt-4",
                 },
                 auth = {
                     header = {
@@ -175,7 +175,7 @@ passed
         }
     }
 --- response_body eval
-qr/.*provider: some-unique is not supported.*/
+qr/.*property "provider" validation failed: matches none of the enum values.*/
 
 
 
@@ -190,18 +190,16 @@ qr/.*provider: some-unique is not supported.*/
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai",
                             "auth": {
                                 "header": {
                                     "Authorization": "Bearer wrongtoken"
                                 }
                             },
-                            "model": {
-                                "provider": "openai",
-                                "name": "gpt-35-turbo-instruct",
-                                "options": {
-                                    "max_tokens": 512,
-                                    "temperature": 1.0
-                                }
+                            "options": {
+                                "model": "gpt-35-turbo-instruct",
+                                "max_tokens": 512,
+                                "temperature": 1.0
                             },
                             "override": {
                                 "endpoint": "http://localhost:6724";
@@ -250,18 +248,16 @@ Unauthorized
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai",
                             "auth": {
                                 "header": {
                                     "Authorization": "Bearer token"
                                 }
                             },
-                            "model": {
-                                "provider": "openai",
-                                "name": "gpt-35-turbo-instruct",
-                                "options": {
-                                    "max_tokens": 512,
-                                    "temperature": 1.0
-                                }
+                            "options": {
+                                "model": "gpt-35-turbo-instruct",
+                                "max_tokens": 512,
+                                "temperature": 1.0
                             },
                             "override": {
                                 "endpoint": "http://localhost:6724";
@@ -342,7 +338,7 @@ prompt%3Dwhat%2520is%25201%2520%252B%25201
 Content-Type: application/x-www-form-urlencoded
 --- error_code: 400
 --- response_body chomp
-unsupported content-type: application/x-www-form-urlencoded
+unsupported content-type: application/x-www-form-urlencoded, only 
application/json is supported
 
 
 
@@ -369,18 +365,16 @@ request format doesn't match schema: property "messages" 
is required
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai",
                             "auth": {
                                 "header": {
                                     "Authorization": "Bearer token"
                                 }
                             },
-                            "model": {
-                                "provider": "openai",
-                                "name": "some-model",
-                                "options": {
-                                    "foo": "bar",
-                                    "temperature": 1.0
-                                }
+                            "options": {
+                                "model": "some-model",
+                                "foo": "bar",
+                                "temperature": 1.0
                             },
                             "override": {
                                 "endpoint": "http://localhost:6724";
@@ -440,18 +434,16 @@ options_works
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai",
+                            "model": "some-model",
                             "auth": {
                                 "header": {
                                     "Authorization": "Bearer token"
                                 }
                             },
-                            "model": {
-                                "provider": "openai",
-                                "name": "some-model",
-                                "options": {
-                                    "foo": "bar",
-                                    "temperature": 1.0
-                                }
+                            "options": {
+                                "foo": "bar",
+                                "temperature": 1.0
                             },
                             "override": {
                                 "endpoint": "http://localhost:6724/random";
@@ -510,19 +502,17 @@ path override works
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai",
                             "auth": {
                                 "header": {
                                     "Authorization": "Bearer token"
                                 }
                             },
-                            "model": {
-                                "provider": "openai",
-                                "name": "gpt-35-turbo-instruct",
-                                "options": {
-                                    "max_tokens": 512,
-                                    "temperature": 1.0,
-                                    "stream": true
-                                }
+                            "options": {
+                                "model": "gpt-35-turbo-instruct",
+                                "max_tokens": 512,
+                                "temperature": 1.0,
+                                "stream": true
                             },
                             "override": {
                                 "endpoint": "http://localhost:7737";
diff --git a/t/plugin/ai-proxy2.t b/t/plugin/ai-proxy2.t
index f372e4fbd..942f449cd 100644
--- a/t/plugin/ai-proxy2.t
+++ b/t/plugin/ai-proxy2.t
@@ -117,18 +117,16 @@ __DATA__
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai",
                             "auth": {
                                 "query": {
                                     "api_key": "wrong_key"
                                 }
                             },
-                            "model": {
-                                "provider": "openai",
-                                "name": "gpt-35-turbo-instruct",
-                                "options": {
-                                    "max_tokens": 512,
-                                    "temperature": 1.0
-                                }
+                            "options": {
+                                "model": "gpt-35-turbo-instruct",
+                                "max_tokens": 512,
+                                "temperature": 1.0
                             },
                             "override": {
                                 "endpoint": "http://localhost:6724";
@@ -177,18 +175,16 @@ Unauthorized
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai",
                             "auth": {
                                 "query": {
                                     "api_key": "apikey"
                                 }
                             },
-                            "model": {
-                                "provider": "openai",
-                                "name": "gpt-35-turbo-instruct",
-                                "options": {
-                                    "max_tokens": 512,
-                                    "temperature": 1.0
-                                }
+                            "options": {
+                                "model": "gpt-35-turbo-instruct",
+                                "max_tokens": 512,
+                                "temperature": 1.0
                             },
                             "override": {
                                 "endpoint": "http://localhost:6724";
@@ -237,18 +233,16 @@ passed
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai",
                             "auth": {
                                 "header": {
                                     "Authorization": "some-key"
                                 }
                             },
-                            "model": {
-                                "provider": "openai",
-                                "name": "gpt-4",
-                                "options": {
-                                    "max_tokens": 512,
-                                    "temperature": 1.0
-                                }
+                            "options": {
+                                "model": "gpt-4",
+                                "max_tokens": 512,
+                                "temperature": 1.0
                             }
                         }
                     },
@@ -292,18 +286,15 @@ POST /anything
                     "uri": "/anything",
                     "plugins": {
                         "ai-proxy": {
+                            "provider": "openai",
                             "auth": {
                                 "query": {
                                     "api_key": "apikey"
                                 }
                             },
-                            "model": {
-                                "provider": "openai",
-                                "name": "gpt-35-turbo-instruct",
-                                "options": {
-                                    "max_tokens": 512,
-                                    "temperature": 1.0
-                                }
+                            "options": {
+                                "max_tokens": 512,
+                                "temperature": 1.0
                             },
                             "override": {
                                 "endpoint": 
"http://localhost:6724/test/params/in/overridden/endpoint?some_query=yes";

(apisix) branch master updated: refactor(ai): ai-proxy and ai-proxy-multi (#12030)

Reply via email to