(apisix) branch master updated: feat(ai-rate-limiting): add expression-based limit strategy (#13191)

nic443 Fri, 10 Apr 2026 02:39:37 -0700

This is an automated email from the ASF dual-hosted git repository.

nic443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git



The following commit(s) were added to refs/heads/master by this push:
     new ac99cd874 feat(ai-rate-limiting): add expression-based limit strategy 
(#13191)
ac99cd874 is described below

commit ac99cd8740a87ae92937b54ce3f3dc5861935e47
Author: Nic <[email protected]>
AuthorDate: Fri Apr 10 17:39:13 2026 +0800

    feat(ai-rate-limiting): add expression-based limit strategy (#13191)
---
 apisix/plugins/ai-rate-limiting.lua    | 106 +++++-
 t/plugin/ai-rate-limiting-expression.t | 620 +++++++++++++++++++++++++++++++++
 2 files changed, 724 insertions(+), 2 deletions(-)

diff --git a/apisix/plugins/ai-rate-limiting.lua 
b/apisix/plugins/ai-rate-limiting.lua
index 8c7eea51a..cdf2d9fb1 100644
--- a/apisix/plugins/ai-rate-limiting.lua
+++ b/apisix/plugins/ai-rate-limiting.lua
@@ -18,6 +18,11 @@ local require = require
 local setmetatable = setmetatable
 local ipairs = ipairs
 local type = type
+local pairs = pairs
+local pcall = pcall
+local load = load
+local math_floor = math.floor
+local math_huge = math.huge
 local core = require("apisix.core")
 local limit_count = require("apisix.plugins.limit-count.init")
 
@@ -61,10 +66,19 @@ local schema = {
         show_limit_quota_header = {type = "boolean", default = true},
         limit_strategy = {
             type = "string",
-            enum = {"total_tokens", "prompt_tokens", "completion_tokens"},
+            enum = {"total_tokens", "prompt_tokens", "completion_tokens", 
"expression"},
             default = "total_tokens",
             description = "The strategy to limit the tokens"
         },
+        cost_expr = {
+            type = "string",
+            minLength = 1,
+            description = "Lua arithmetic expression for dynamic token cost 
calculation. "
+                .. "Variables are injected from the LLM API raw usage response 
fields. "
+                .. "Missing variables default to 0. "
+                .. "Only valid when limit_strategy is 'expression'. "
+                .. "Example: input_tokens + cache_creation_input_tokens + 
output_tokens",
+        },
         instances = {
             type = "array",
             items = instance_limit_schema,
@@ -136,8 +150,42 @@ local limit_conf_cache = core.lrucache.new({
 })
 
 
+-- safe math functions allowed in cost expressions
+local expr_safe_env = {
+    math = math,
+    abs = math.abs,
+    ceil = math.ceil,
+    floor = math.floor,
+    max = math.max,
+    min = math.min,
+}
+
+local function compile_cost_expr(expr_str)
+    local fn_code = "return " .. expr_str
+    -- validate syntax by loading first
+    local fn, err = load(fn_code, "cost_expr", "t", expr_safe_env)
+    if not fn then
+        return nil, err
+    end
+    return fn_code
+end
+
+
 function _M.check_schema(conf)
-    return core.schema.check(schema, conf)
+    local ok, err = core.schema.check(schema, conf)
+    if not ok then
+        return false, err
+    end
+    if conf.limit_strategy == "expression" then
+        if not conf.cost_expr or conf.cost_expr == "" then
+            return false, "cost_expr is required when limit_strategy is 
'expression'"
+        end
+        local _, compile_err = compile_cost_expr(conf.cost_expr)
+        if compile_err then
+            return false, "invalid cost_expr: " .. compile_err
+        end
+    end
+    return true
 end
 
 
@@ -264,7 +312,57 @@ function _M.check_instance_status(conf, ctx, instance_name)
 end
 
 
+local function eval_cost_expr(conf_cost_expr, raw)
+    local fn_code = "return " .. conf_cost_expr
+    -- build environment: safe math + usage variables (missing vars default to 
0)
+    local env = setmetatable({}, {
+        __index = function(_, k)
+            local v = expr_safe_env[k]
+            if v ~= nil then
+                return v
+            end
+            return 0
+        end
+    })
+    for k, v in pairs(raw) do
+        if type(v) == "number" and not expr_safe_env[k] then
+            env[k] = v
+        end
+    end
+    local fn, err = load(fn_code, "cost_expr", "t", env)
+    if not fn then
+        return nil, "failed to compile cost_expr: " .. err
+    end
+    local ok, result = pcall(fn)
+    if not ok then
+        return nil, "failed to evaluate cost_expr: " .. result
+    end
+    if type(result) ~= "number" then
+        return nil, "cost_expr must return a number, got: " .. type(result)
+    end
+    if result ~= result or result == math_huge or result == -math_huge then
+        return nil, "cost_expr returned non-finite value"
+    end
+    if result < 0 then
+        result = 0
+    end
+    return math_floor(result + 0.5)
+end
+
 local function get_token_usage(conf, ctx)
+    if conf.limit_strategy == "expression" then
+        local raw = ctx.llm_raw_usage
+        if not raw then
+            return
+        end
+        local result, err = eval_cost_expr(conf.cost_expr, raw)
+        if not result then
+            core.log.error(err)
+            return
+        end
+        return result
+    end
+
     local usage = ctx.ai_token_usage
     if not usage then
         return
@@ -288,6 +386,10 @@ function _M.log(conf, ctx)
         core.log.error("failed to get token usage for llm service")
         return
     end
+    if used_tokens == 0 then
+        core.log.info("token usage is 0, skip rate limiting")
+        return
+    end
 
     core.log.info("instance name: ", instance_name, " used tokens: ", 
used_tokens)
 
diff --git a/t/plugin/ai-rate-limiting-expression.t 
b/t/plugin/ai-rate-limiting-expression.t
new file mode 100644
index 000000000..a0f818406
--- /dev/null
+++ b/t/plugin/ai-rate-limiting-expression.t
@@ -0,0 +1,620 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+BEGIN {
+    $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
+use t::APISIX 'no_plan';
+
+log_level("info");
+repeat_each(1);
+no_long_string();
+no_shuffle();
+no_root_location();
+
+add_block_preprocessor(sub {
+    my ($block) = @_;
+
+    if (!defined $block->request) {
+        $block->set_value("request", "GET /t");
+    }
+
+    my $http_config = $block->http_config // <<_EOC_;
+        server {
+            server_name anthropic;
+            listen 16725;
+
+            default_type 'application/json';
+
+            location /v1/messages {
+                content_by_lua_block {
+                    local json = require("cjson.safe")
+                    local ngx = ngx
+
+                    ngx.req.read_body()
+                    local body = ngx.req.get_body_data()
+                    body = json.decode(body)
+
+                    if not body or not body.messages then
+                        ngx.status = 400
+                        
ngx.say('{"type":"error","error":{"type":"invalid_request_error","message":"missing
 messages"}}')
+                        return
+                    end
+
+                    local api_key = ngx.req.get_headers()["x-api-key"]
+                    if api_key ~= "test-key" then
+                        ngx.status = 401
+                        
ngx.say('{"type":"error","error":{"type":"authentication_error","message":"invalid
 x-api-key"}}')
+                        return
+                    end
+
+                    if body.stream then
+                        ngx.header["Content-Type"] = "text/event-stream"
+
+                        -- message_start with input_tokens and cache tokens
+                        local message_start = json.encode({
+                            type = "message_start",
+                            message = {
+                                id = "msg_test123",
+                                type = "message",
+                                role = "assistant",
+                                model = body.model or 
"claude-sonnet-4-20250514",
+                                content = {},
+                                usage = {
+                                    input_tokens = 50,
+                                    output_tokens = 0,
+                                    cache_creation_input_tokens = 100,
+                                    cache_read_input_tokens = 200,
+                                },
+                            },
+                        })
+                        ngx.say("event: message_start")
+                        ngx.say("data: " .. message_start)
+                        ngx.say("")
+
+                        -- content_block_start
+                        ngx.say("event: content_block_start")
+                        ngx.say('data: 
{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}')
+                        ngx.say("")
+
+                        -- content_block_delta
+                        ngx.say("event: content_block_delta")
+                        ngx.say('data: 
{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello
 from Claude!"}}')
+                        ngx.say("")
+
+                        -- content_block_stop
+                        ngx.say("event: content_block_stop")
+                        ngx.say('data: 
{"type":"content_block_stop","index":0}')
+                        ngx.say("")
+
+                        -- message_delta with output_tokens
+                        local message_delta = json.encode({
+                            type = "message_delta",
+                            delta = { stop_reason = "end_turn" },
+                            usage = {
+                                output_tokens = 30,
+                            },
+                        })
+                        ngx.say("event: message_delta")
+                        ngx.say("data: " .. message_delta)
+                        ngx.say("")
+
+                        -- message_stop
+                        ngx.say("event: message_stop")
+                        ngx.say("data: {}")
+                        ngx.say("")
+                    else
+                        ngx.status = 200
+                        ngx.say(json.encode({
+                            id = "msg_test456",
+                            type = "message",
+                            role = "assistant",
+                            model = body.model or "claude-sonnet-4-20250514",
+                            content = {{
+                                type = "text",
+                                text = "Hello from Claude!",
+                            }},
+                            stop_reason = "end_turn",
+                            usage = {
+                                input_tokens = 50,
+                                output_tokens = 30,
+                                cache_creation_input_tokens = 100,
+                                cache_read_input_tokens = 200,
+                            },
+                        }))
+                    end
+                }
+            }
+        }
+_EOC_
+
+    $block->set_value("http_config", $http_config);
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: schema validation - expression strategy requires cost_expr
+--- config
+    location /t {
+        content_by_lua_block {
+            local plugin = require("apisix.plugins.ai-rate-limiting")
+            local configs = {
+                -- expression without cost_expr
+                {
+                    limit = 100,
+                    time_window = 60,
+                    limit_strategy = "expression",
+                },
+                -- expression with empty cost_expr
+                {
+                    limit = 100,
+                    time_window = 60,
+                    limit_strategy = "expression",
+                    cost_expr = "",
+                },
+                -- expression with invalid cost_expr syntax
+                {
+                    limit = 100,
+                    time_window = 60,
+                    limit_strategy = "expression",
+                    cost_expr = "invalid $$$ syntax %%%",
+                },
+                -- valid expression
+                {
+                    limit = 100,
+                    time_window = 60,
+                    limit_strategy = "expression",
+                    cost_expr = "input_tokens + output_tokens",
+                },
+                -- valid complex expression
+                {
+                    limit = 100,
+                    time_window = 60,
+                    limit_strategy = "expression",
+                    cost_expr = "(input_tokens - cache_read_input_tokens) + 
cache_creation_input_tokens * 1.25 + output_tokens",
+                },
+            }
+            for i, conf in ipairs(configs) do
+                local ok, err = plugin.check_schema(conf)
+                if ok then
+                    ngx.say("config " .. i .. ": valid")
+                else
+                    ngx.say("config " .. i .. ": invalid")
+                end
+            end
+        }
+    }
+--- response_body
+config 1: invalid
+config 2: invalid
+config 3: invalid
+config 4: valid
+config 5: valid
+
+
+
+=== TEST 2: set route with expression rate limiting (non-streaming, native 
Anthropic)
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/v1/messages",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "anthropic",
+                            "auth": {
+                                "header": {
+                                    "x-api-key": "test-key",
+                                    "anthropic-version": "2023-06-01"
+                                }
+                            },
+                            "options": {
+                                "model": "claude-sonnet-4-20250514"
+                            },
+                            "override": {
+                                "endpoint": "http://localhost:16725";
+                            },
+                            "ssl_verify": false
+                        },
+                        "ai-rate-limiting": {
+                            "limit": 500,
+                            "time_window": 60,
+                            "limit_strategy": "expression",
+                            "cost_expr": "input_tokens + 
cache_creation_input_tokens + output_tokens"
+                        }
+                    },
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": {
+                            "canbeanything.com": 1
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 3: non-streaming request - expression counts input_tokens + 
cache_creation + output_tokens
+--- pipelined_requests eval
+[
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- response_headers_like eval
+[
+    "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 499",
+    "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 319",
+]
+--- no_error_log
+[error]
+
+
+
+=== TEST 4: set route with expression rate limiting (streaming, native 
Anthropic)
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/v1/messages",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "anthropic",
+                            "auth": {
+                                "header": {
+                                    "x-api-key": "test-key",
+                                    "anthropic-version": "2023-06-01"
+                                }
+                            },
+                            "options": {
+                                "model": "claude-sonnet-4-20250514"
+                            },
+                            "override": {
+                                "endpoint": "http://localhost:16725";
+                            },
+                            "ssl_verify": false
+                        },
+                        "ai-rate-limiting": {
+                            "limit": 500,
+                            "time_window": 60,
+                            "limit_strategy": "expression",
+                            "cost_expr": "input_tokens + 
cache_creation_input_tokens + output_tokens"
+                        }
+                    },
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": {
+                            "canbeanything.com": 1
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 5: streaming request - verify token usage accumulation and rate 
limiting
+--- pipelined_requests eval
+[
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"stream":true,"messages":[{"role":"user","content":"Hello"}]}',
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"stream":true,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- response_headers_like eval
+[
+    "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 499",
+    "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 319",
+]
+--- no_error_log
+[error]
+
+
+
+=== TEST 6: set route with cache-aware ITPM expression (excludes 
cache_read_input_tokens)
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/v1/messages",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "anthropic",
+                            "auth": {
+                                "header": {
+                                    "x-api-key": "test-key",
+                                    "anthropic-version": "2023-06-01"
+                                }
+                            },
+                            "options": {
+                                "model": "claude-sonnet-4-20250514"
+                            },
+                            "override": {
+                                "endpoint": "http://localhost:16725";
+                            },
+                            "ssl_verify": false
+                        },
+                        "ai-rate-limiting": {
+                            "limit": 100,
+                            "time_window": 60,
+                            "limit_strategy": "expression",
+                            "cost_expr": "input_tokens + 
cache_creation_input_tokens"
+                        }
+                    },
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": {
+                            "canbeanything.com": 1
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 7: cache-aware ITPM - cost=150 exceeds limit=100 after first request, 
second rejected
+--- pipelined_requests eval
+[
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- error_code eval
+[200, 503]
+--- no_error_log
+[error]
+
+
+
+=== TEST 8: set route with weighted expression (cache_read costs 10%, 
cache_creation costs 125%)
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/v1/messages",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "anthropic",
+                            "auth": {
+                                "header": {
+                                    "x-api-key": "test-key",
+                                    "anthropic-version": "2023-06-01"
+                                }
+                            },
+                            "options": {
+                                "model": "claude-sonnet-4-20250514"
+                            },
+                            "override": {
+                                "endpoint": "http://localhost:16725";
+                            },
+                            "ssl_verify": false
+                        },
+                        "ai-rate-limiting": {
+                            "limit": 1000,
+                            "time_window": 60,
+                            "limit_strategy": "expression",
+                            "cost_expr": "input_tokens + 
cache_read_input_tokens * 0.1 + cache_creation_input_tokens * 1.25 + 
output_tokens"
+                        }
+                    },
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": {
+                            "canbeanything.com": 1
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 9: weighted expression - two requests (cost = 50 + 200*0.1 + 100*1.25 
+ 30 = 225 each)
+--- pipelined_requests eval
+[
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- response_headers_like eval
+[
+    "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 999",
+    "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 774",
+]
+--- no_error_log
+[error]
+
+
+
+=== TEST 10: expression with missing variables defaults to 0
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/v1/messages",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "anthropic",
+                            "auth": {
+                                "header": {
+                                    "x-api-key": "test-key",
+                                    "anthropic-version": "2023-06-01"
+                                }
+                            },
+                            "options": {
+                                "model": "claude-sonnet-4-20250514"
+                            },
+                            "override": {
+                                "endpoint": "http://localhost:16725";
+                            },
+                            "ssl_verify": false
+                        },
+                        "ai-rate-limiting": {
+                            "limit": 500,
+                            "time_window": 60,
+                            "limit_strategy": "expression",
+                            "cost_expr": "input_tokens + nonexistent_field + 
output_tokens"
+                        }
+                    },
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": {
+                            "canbeanything.com": 1
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 11: missing variable defaults to 0 - cost = 50 + 0 + 30 = 80 per 
request
+--- pipelined_requests eval
+[
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- response_headers_like eval
+[
+    "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 499",
+    "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 419",
+]
+--- no_error_log
+[error]
+
+
+
+=== TEST 12: set route with expression that can yield negative cost
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                 ngx.HTTP_PUT,
+                 [[{
+                    "uri": "/v1/messages",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "anthropic",
+                            "auth": {
+                                "header": {
+                                    "x-api-key": "test-key",
+                                    "anthropic-version": "2023-06-01"
+                                }
+                            },
+                            "options": {
+                                "model": "claude-sonnet-4-20250514"
+                            },
+                            "override": {
+                                "endpoint": "http://localhost:16725";
+                            },
+                            "ssl_verify": false
+                        },
+                        "ai-rate-limiting": {
+                            "limit": 100,
+                            "time_window": 60,
+                            "limit_strategy": "expression",
+                            "cost_expr": "input_tokens - 
cache_read_input_tokens"
+                        }
+                    },
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": {
+                            "canbeanything.com": 1
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 13: negative expression result clamped to 0 - cost = 50 - 200 = -150, 
clamped to 0
+--- pipelined_requests eval
+[
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+    "POST /v1/messages\n" . 
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- response_headers_like eval
+[
+    "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 99",
+    "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 99",
+]
+--- no_error_log
+[error]

(apisix) branch master updated: feat(ai-rate-limiting): add expression-based limit strategy (#13191)

Reply via email to