This is an automated email from the ASF dual-hosted git repository.
nic443 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new ac99cd874 feat(ai-rate-limiting): add expression-based limit strategy
(#13191)
ac99cd874 is described below
commit ac99cd8740a87ae92937b54ce3f3dc5861935e47
Author: Nic <[email protected]>
AuthorDate: Fri Apr 10 17:39:13 2026 +0800
feat(ai-rate-limiting): add expression-based limit strategy (#13191)
---
apisix/plugins/ai-rate-limiting.lua | 106 +++++-
t/plugin/ai-rate-limiting-expression.t | 620 +++++++++++++++++++++++++++++++++
2 files changed, 724 insertions(+), 2 deletions(-)
diff --git a/apisix/plugins/ai-rate-limiting.lua
b/apisix/plugins/ai-rate-limiting.lua
index 8c7eea51a..cdf2d9fb1 100644
--- a/apisix/plugins/ai-rate-limiting.lua
+++ b/apisix/plugins/ai-rate-limiting.lua
@@ -18,6 +18,11 @@ local require = require
local setmetatable = setmetatable
local ipairs = ipairs
local type = type
+local pairs = pairs
+local pcall = pcall
+local load = load
+local math_floor = math.floor
+local math_huge = math.huge
local core = require("apisix.core")
local limit_count = require("apisix.plugins.limit-count.init")
@@ -61,10 +66,19 @@ local schema = {
show_limit_quota_header = {type = "boolean", default = true},
limit_strategy = {
type = "string",
- enum = {"total_tokens", "prompt_tokens", "completion_tokens"},
+ enum = {"total_tokens", "prompt_tokens", "completion_tokens",
"expression"},
default = "total_tokens",
description = "The strategy to limit the tokens"
},
+ cost_expr = {
+ type = "string",
+ minLength = 1,
+ description = "Lua arithmetic expression for dynamic token cost
calculation. "
+ .. "Variables are injected from the LLM API raw usage response
fields. "
+ .. "Missing variables default to 0. "
+ .. "Only valid when limit_strategy is 'expression'. "
+ .. "Example: input_tokens + cache_creation_input_tokens +
output_tokens",
+ },
instances = {
type = "array",
items = instance_limit_schema,
@@ -136,8 +150,42 @@ local limit_conf_cache = core.lrucache.new({
})
+-- safe math functions allowed in cost expressions
+local expr_safe_env = {
+ math = math,
+ abs = math.abs,
+ ceil = math.ceil,
+ floor = math.floor,
+ max = math.max,
+ min = math.min,
+}
+
+local function compile_cost_expr(expr_str)
+ local fn_code = "return " .. expr_str
+ -- validate syntax by loading first
+ local fn, err = load(fn_code, "cost_expr", "t", expr_safe_env)
+ if not fn then
+ return nil, err
+ end
+ return fn_code
+end
+
+
function _M.check_schema(conf)
- return core.schema.check(schema, conf)
+ local ok, err = core.schema.check(schema, conf)
+ if not ok then
+ return false, err
+ end
+ if conf.limit_strategy == "expression" then
+ if not conf.cost_expr or conf.cost_expr == "" then
+ return false, "cost_expr is required when limit_strategy is
'expression'"
+ end
+ local _, compile_err = compile_cost_expr(conf.cost_expr)
+ if compile_err then
+ return false, "invalid cost_expr: " .. compile_err
+ end
+ end
+ return true
end
@@ -264,7 +312,57 @@ function _M.check_instance_status(conf, ctx, instance_name)
end
+local function eval_cost_expr(conf_cost_expr, raw)
+ local fn_code = "return " .. conf_cost_expr
+ -- build environment: safe math + usage variables (missing vars default to
0)
+ local env = setmetatable({}, {
+ __index = function(_, k)
+ local v = expr_safe_env[k]
+ if v ~= nil then
+ return v
+ end
+ return 0
+ end
+ })
+ for k, v in pairs(raw) do
+ if type(v) == "number" and not expr_safe_env[k] then
+ env[k] = v
+ end
+ end
+ local fn, err = load(fn_code, "cost_expr", "t", env)
+ if not fn then
+ return nil, "failed to compile cost_expr: " .. err
+ end
+ local ok, result = pcall(fn)
+ if not ok then
+ return nil, "failed to evaluate cost_expr: " .. result
+ end
+ if type(result) ~= "number" then
+ return nil, "cost_expr must return a number, got: " .. type(result)
+ end
+ if result ~= result or result == math_huge or result == -math_huge then
+ return nil, "cost_expr returned non-finite value"
+ end
+ if result < 0 then
+ result = 0
+ end
+ return math_floor(result + 0.5)
+end
+
local function get_token_usage(conf, ctx)
+ if conf.limit_strategy == "expression" then
+ local raw = ctx.llm_raw_usage
+ if not raw then
+ return
+ end
+ local result, err = eval_cost_expr(conf.cost_expr, raw)
+ if not result then
+ core.log.error(err)
+ return
+ end
+ return result
+ end
+
local usage = ctx.ai_token_usage
if not usage then
return
@@ -288,6 +386,10 @@ function _M.log(conf, ctx)
core.log.error("failed to get token usage for llm service")
return
end
+ if used_tokens == 0 then
+ core.log.info("token usage is 0, skip rate limiting")
+ return
+ end
core.log.info("instance name: ", instance_name, " used tokens: ",
used_tokens)
diff --git a/t/plugin/ai-rate-limiting-expression.t
b/t/plugin/ai-rate-limiting-expression.t
new file mode 100644
index 000000000..a0f818406
--- /dev/null
+++ b/t/plugin/ai-rate-limiting-expression.t
@@ -0,0 +1,620 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+BEGIN {
+ $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
+use t::APISIX 'no_plan';
+
+log_level("info");
+repeat_each(1);
+no_long_string();
+no_shuffle();
+no_root_location();
+
+add_block_preprocessor(sub {
+ my ($block) = @_;
+
+ if (!defined $block->request) {
+ $block->set_value("request", "GET /t");
+ }
+
+ my $http_config = $block->http_config // <<_EOC_;
+ server {
+ server_name anthropic;
+ listen 16725;
+
+ default_type 'application/json';
+
+ location /v1/messages {
+ content_by_lua_block {
+ local json = require("cjson.safe")
+ local ngx = ngx
+
+ ngx.req.read_body()
+ local body = ngx.req.get_body_data()
+ body = json.decode(body)
+
+ if not body or not body.messages then
+ ngx.status = 400
+
ngx.say('{"type":"error","error":{"type":"invalid_request_error","message":"missing
messages"}}')
+ return
+ end
+
+ local api_key = ngx.req.get_headers()["x-api-key"]
+ if api_key ~= "test-key" then
+ ngx.status = 401
+
ngx.say('{"type":"error","error":{"type":"authentication_error","message":"invalid
x-api-key"}}')
+ return
+ end
+
+ if body.stream then
+ ngx.header["Content-Type"] = "text/event-stream"
+
+ -- message_start with input_tokens and cache tokens
+ local message_start = json.encode({
+ type = "message_start",
+ message = {
+ id = "msg_test123",
+ type = "message",
+ role = "assistant",
+ model = body.model or
"claude-sonnet-4-20250514",
+ content = {},
+ usage = {
+ input_tokens = 50,
+ output_tokens = 0,
+ cache_creation_input_tokens = 100,
+ cache_read_input_tokens = 200,
+ },
+ },
+ })
+ ngx.say("event: message_start")
+ ngx.say("data: " .. message_start)
+ ngx.say("")
+
+ -- content_block_start
+ ngx.say("event: content_block_start")
+ ngx.say('data:
{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}')
+ ngx.say("")
+
+ -- content_block_delta
+ ngx.say("event: content_block_delta")
+ ngx.say('data:
{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello
from Claude!"}}')
+ ngx.say("")
+
+ -- content_block_stop
+ ngx.say("event: content_block_stop")
+ ngx.say('data:
{"type":"content_block_stop","index":0}')
+ ngx.say("")
+
+ -- message_delta with output_tokens
+ local message_delta = json.encode({
+ type = "message_delta",
+ delta = { stop_reason = "end_turn" },
+ usage = {
+ output_tokens = 30,
+ },
+ })
+ ngx.say("event: message_delta")
+ ngx.say("data: " .. message_delta)
+ ngx.say("")
+
+ -- message_stop
+ ngx.say("event: message_stop")
+ ngx.say("data: {}")
+ ngx.say("")
+ else
+ ngx.status = 200
+ ngx.say(json.encode({
+ id = "msg_test456",
+ type = "message",
+ role = "assistant",
+ model = body.model or "claude-sonnet-4-20250514",
+ content = {{
+ type = "text",
+ text = "Hello from Claude!",
+ }},
+ stop_reason = "end_turn",
+ usage = {
+ input_tokens = 50,
+ output_tokens = 30,
+ cache_creation_input_tokens = 100,
+ cache_read_input_tokens = 200,
+ },
+ }))
+ end
+ }
+ }
+ }
+_EOC_
+
+ $block->set_value("http_config", $http_config);
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: schema validation - expression strategy requires cost_expr
+--- config
+ location /t {
+ content_by_lua_block {
+ local plugin = require("apisix.plugins.ai-rate-limiting")
+ local configs = {
+ -- expression without cost_expr
+ {
+ limit = 100,
+ time_window = 60,
+ limit_strategy = "expression",
+ },
+ -- expression with empty cost_expr
+ {
+ limit = 100,
+ time_window = 60,
+ limit_strategy = "expression",
+ cost_expr = "",
+ },
+ -- expression with invalid cost_expr syntax
+ {
+ limit = 100,
+ time_window = 60,
+ limit_strategy = "expression",
+ cost_expr = "invalid $$$ syntax %%%",
+ },
+ -- valid expression
+ {
+ limit = 100,
+ time_window = 60,
+ limit_strategy = "expression",
+ cost_expr = "input_tokens + output_tokens",
+ },
+ -- valid complex expression
+ {
+ limit = 100,
+ time_window = 60,
+ limit_strategy = "expression",
+ cost_expr = "(input_tokens - cache_read_input_tokens) +
cache_creation_input_tokens * 1.25 + output_tokens",
+ },
+ }
+ for i, conf in ipairs(configs) do
+ local ok, err = plugin.check_schema(conf)
+ if ok then
+ ngx.say("config " .. i .. ": valid")
+ else
+ ngx.say("config " .. i .. ": invalid")
+ end
+ end
+ }
+ }
+--- response_body
+config 1: invalid
+config 2: invalid
+config 3: invalid
+config 4: valid
+config 5: valid
+
+
+
+=== TEST 2: set route with expression rate limiting (non-streaming, native
Anthropic)
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/v1/messages",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "anthropic",
+ "auth": {
+ "header": {
+ "x-api-key": "test-key",
+ "anthropic-version": "2023-06-01"
+ }
+ },
+ "options": {
+ "model": "claude-sonnet-4-20250514"
+ },
+ "override": {
+ "endpoint": "http://localhost:16725"
+ },
+ "ssl_verify": false
+ },
+ "ai-rate-limiting": {
+ "limit": 500,
+ "time_window": 60,
+ "limit_strategy": "expression",
+ "cost_expr": "input_tokens +
cache_creation_input_tokens + output_tokens"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "canbeanything.com": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 3: non-streaming request - expression counts input_tokens +
cache_creation + output_tokens
+--- pipelined_requests eval
+[
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- response_headers_like eval
+[
+ "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 499",
+ "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 319",
+]
+--- no_error_log
+[error]
+
+
+
+=== TEST 4: set route with expression rate limiting (streaming, native
Anthropic)
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/v1/messages",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "anthropic",
+ "auth": {
+ "header": {
+ "x-api-key": "test-key",
+ "anthropic-version": "2023-06-01"
+ }
+ },
+ "options": {
+ "model": "claude-sonnet-4-20250514"
+ },
+ "override": {
+ "endpoint": "http://localhost:16725"
+ },
+ "ssl_verify": false
+ },
+ "ai-rate-limiting": {
+ "limit": 500,
+ "time_window": 60,
+ "limit_strategy": "expression",
+ "cost_expr": "input_tokens +
cache_creation_input_tokens + output_tokens"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "canbeanything.com": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 5: streaming request - verify token usage accumulation and rate
limiting
+--- pipelined_requests eval
+[
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"stream":true,"messages":[{"role":"user","content":"Hello"}]}',
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"stream":true,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- response_headers_like eval
+[
+ "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 499",
+ "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 319",
+]
+--- no_error_log
+[error]
+
+
+
+=== TEST 6: set route with cache-aware ITPM expression (excludes
cache_read_input_tokens)
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/v1/messages",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "anthropic",
+ "auth": {
+ "header": {
+ "x-api-key": "test-key",
+ "anthropic-version": "2023-06-01"
+ }
+ },
+ "options": {
+ "model": "claude-sonnet-4-20250514"
+ },
+ "override": {
+ "endpoint": "http://localhost:16725"
+ },
+ "ssl_verify": false
+ },
+ "ai-rate-limiting": {
+ "limit": 100,
+ "time_window": 60,
+ "limit_strategy": "expression",
+ "cost_expr": "input_tokens +
cache_creation_input_tokens"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "canbeanything.com": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 7: cache-aware ITPM - cost=150 exceeds limit=100 after first request,
second rejected
+--- pipelined_requests eval
+[
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- error_code eval
+[200, 503]
+--- no_error_log
+[error]
+
+
+
+=== TEST 8: set route with weighted expression (cache_read costs 10%,
cache_creation costs 125%)
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/v1/messages",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "anthropic",
+ "auth": {
+ "header": {
+ "x-api-key": "test-key",
+ "anthropic-version": "2023-06-01"
+ }
+ },
+ "options": {
+ "model": "claude-sonnet-4-20250514"
+ },
+ "override": {
+ "endpoint": "http://localhost:16725"
+ },
+ "ssl_verify": false
+ },
+ "ai-rate-limiting": {
+ "limit": 1000,
+ "time_window": 60,
+ "limit_strategy": "expression",
+ "cost_expr": "input_tokens +
cache_read_input_tokens * 0.1 + cache_creation_input_tokens * 1.25 +
output_tokens"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "canbeanything.com": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 9: weighted expression - two requests (cost = 50 + 200*0.1 + 100*1.25
+ 30 = 225 each)
+--- pipelined_requests eval
+[
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- response_headers_like eval
+[
+ "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 999",
+ "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 774",
+]
+--- no_error_log
+[error]
+
+
+
+=== TEST 10: expression with missing variables defaults to 0
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/v1/messages",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "anthropic",
+ "auth": {
+ "header": {
+ "x-api-key": "test-key",
+ "anthropic-version": "2023-06-01"
+ }
+ },
+ "options": {
+ "model": "claude-sonnet-4-20250514"
+ },
+ "override": {
+ "endpoint": "http://localhost:16725"
+ },
+ "ssl_verify": false
+ },
+ "ai-rate-limiting": {
+ "limit": 500,
+ "time_window": 60,
+ "limit_strategy": "expression",
+ "cost_expr": "input_tokens + nonexistent_field +
output_tokens"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "canbeanything.com": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 11: missing variable defaults to 0 - cost = 50 + 0 + 30 = 80 per
request
+--- pipelined_requests eval
+[
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- response_headers_like eval
+[
+ "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 499",
+ "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 419",
+]
+--- no_error_log
+[error]
+
+
+
+=== TEST 12: set route with expression that can yield negative cost
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/v1/messages",
+ "plugins": {
+ "ai-proxy": {
+ "provider": "anthropic",
+ "auth": {
+ "header": {
+ "x-api-key": "test-key",
+ "anthropic-version": "2023-06-01"
+ }
+ },
+ "options": {
+ "model": "claude-sonnet-4-20250514"
+ },
+ "override": {
+ "endpoint": "http://localhost:16725"
+ },
+ "ssl_verify": false
+ },
+ "ai-rate-limiting": {
+ "limit": 100,
+ "time_window": 60,
+ "limit_strategy": "expression",
+ "cost_expr": "input_tokens -
cache_read_input_tokens"
+ }
+ },
+ "upstream": {
+ "type": "roundrobin",
+ "nodes": {
+ "canbeanything.com": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 13: negative expression result clamped to 0 - cost = 50 - 200 = -150,
clamped to 0
+--- pipelined_requests eval
+[
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+ "POST /v1/messages\n" .
'{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}',
+]
+--- response_headers_like eval
+[
+ "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 99",
+ "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 99",
+]
+--- no_error_log
+[error]