Re: [PR] feat: add ai-cache plugin [apisix]

via GitHub Mon, 22 Jun 2026 18:23:58 -0700


Copilot commented on code in PR #13578:
URL: https://github.com/apache/apisix/pull/13578#discussion_r3456420395



##########
apisix/plugins/ai-cache.lua:
##########
@@ -0,0 +1,199 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local core       = require("apisix.core")
+local schema     = require("apisix.plugins.ai-cache.schema")
+local key_mod    = require("apisix.plugins.ai-cache.key")
+local redis_util = require("apisix.utils.redis")
+
+local ngx        = ngx
+local ngx_null   = ngx.null
+local ipairs     = ipairs
+local str_sub    = string.sub
+
+local CACHE_STATUS_HEADER = "X-AI-Cache-Status"
+local CACHE_AGE_HEADER    = "X-AI-Cache-Age"
+local DEFAULT_TTL         = 3600
+
+local _M = {
+    version  = 0.1,
+    priority = 1035,
+    name     = "ai-cache",
+    schema   = schema,
+}
+
+
+function _M.check_schema(conf)
+    return core.schema.check(schema, conf)
+end
+
+
+local function release(conf, red)
+    local ok, err = red:set_keepalive(conf.redis_keepalive_timeout or 10000,
+                                      conf.redis_keepalive_pool or 100)
+    if not ok then
+        core.log.warn("ai-cache: failed to set redis keepalive: ", err)
+    end
+end
+
+
+local function serve_hit(conf, ctx, cached)
+    ctx.ai_cache_status = "HIT"
+    if conf.cache_headers ~= false then
+        core.response.set_header(CACHE_STATUS_HEADER, "HIT")
+        local age = ngx.time() - (cached.created_at or ngx.time())
+        core.response.set_header(CACHE_AGE_HEADER, age < 0 and 0 or age)
+    end
+    core.response.set_header("Content-Type", "application/json")
+    return core.response.exit(200, cached.body)
+end
+
+
+function _M.access(conf, ctx)
+    -- Streaming responses are not cached in PR-1 (SSE replay is a later
+    -- increment). ai-proxy (higher priority) has already classified the
+    -- request, so bypass before doing any work.
+    if ctx.var.request_type == "ai_stream" then
+        ctx.ai_cache_status = "BYPASS"
+        return
+    end
+
+    -- explicit opt-out: any cache_bypass reference resolving to a value
+    -- that is non-empty and not "0" skips the cache (proxy-cache
+    -- `cache_bypass` / nginx `proxy_cache_bypass` semantics). A leading
+    -- "$" marks a variable to resolve; anything else is a literal.
+    if conf.cache_bypass then
+        for _, ref in ipairs(conf.cache_bypass) do
+            local val = ref
+            if str_sub(ref, 1, 1) == "$" then
+                val = ctx.var[str_sub(ref, 2)]
+            end
+            if val ~= nil and val ~= "" and val ~= "0" then
+                ctx.ai_cache_status = "BYPASS"
+                return
+            end
+        end
+    end
+
+    local body, err = core.request.get_json_request_body_table()
+    if not body then
+        core.log.warn("ai-cache: cannot read request body, bypassing: ", err)
+        ctx.ai_cache_status = "BYPASS"
+        return
+    end
+
+    ctx.ai_cache_key = "ai-cache:l1:" .. key_mod.scope(conf, ctx)
+                       .. ":" .. key_mod.fingerprint(ctx, body)
+
+    local red
+    red, err = redis_util.new(conf)
+    if not red then
+        -- fail-open: never let a cache-backend outage break the request.
+        core.log.warn("ai-cache: redis unavailable, fail-open as MISS: ", err)
+        ctx.ai_cache_status = "MISS"
+        return
+    end
+
+    local res
+    res, err = red:get(ctx.ai_cache_key)
+    release(conf, red)
+    if err then
+        core.log.warn("ai-cache: redis get failed, fail-open as MISS: ", err)
+        ctx.ai_cache_status = "MISS"
+        return
+    end
+
+    if res ~= nil and res ~= ngx_null then
+        local cached = core.json.decode(res)
+        if cached and cached.body then
+            return serve_hit(conf, ctx, cached)
+        end
+        core.log.warn("ai-cache: discarding malformed cache entry for ", 
ctx.ai_cache_key)
+    end
+
+    ctx.ai_cache_status = "MISS"
+end
+
+
+function _M.header_filter(conf, ctx)
+    if ctx.ai_cache_status and conf.cache_headers ~= false then
+        core.response.set_header(CACHE_STATUS_HEADER, ctx.ai_cache_status)
+    end
+end
+
+
+function _M.body_filter(conf, ctx)
+    -- only a MISS gets written back; HIT exited in access, BYPASS opts out.
+    if ctx.ai_cache_status ~= "MISS" then
+        return
+    end
+    local chunk = ngx.arg[1]
+    if chunk and #chunk > 0 then
+        ctx.ai_cache_buf = (ctx.ai_cache_buf or "") .. chunk
+    end
+end

Review Comment:
   `body_filter` currently keeps concatenating the full upstream response into 
`ctx.ai_cache_buf` even when it has already exceeded `max_cache_body_size`. 
This can waste memory/CPU on large LLM outputs that will never be cached (the 
size check happens later in `log`).



##########
apisix/plugins/ai-cache.lua:
##########
@@ -0,0 +1,199 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local core       = require("apisix.core")
+local schema     = require("apisix.plugins.ai-cache.schema")
+local key_mod    = require("apisix.plugins.ai-cache.key")
+local redis_util = require("apisix.utils.redis")
+
+local ngx        = ngx
+local ngx_null   = ngx.null
+local ipairs     = ipairs
+local str_sub    = string.sub
+
+local CACHE_STATUS_HEADER = "X-AI-Cache-Status"
+local CACHE_AGE_HEADER    = "X-AI-Cache-Age"
+local DEFAULT_TTL         = 3600
+
+local _M = {
+    version  = 0.1,
+    priority = 1035,
+    name     = "ai-cache",
+    schema   = schema,
+}
+
+
+function _M.check_schema(conf)
+    return core.schema.check(schema, conf)
+end
+
+
+local function release(conf, red)
+    local ok, err = red:set_keepalive(conf.redis_keepalive_timeout or 10000,
+                                      conf.redis_keepalive_pool or 100)
+    if not ok then
+        core.log.warn("ai-cache: failed to set redis keepalive: ", err)
+    end
+end
+
+
+local function serve_hit(conf, ctx, cached)
+    ctx.ai_cache_status = "HIT"
+    if conf.cache_headers ~= false then
+        core.response.set_header(CACHE_STATUS_HEADER, "HIT")
+        local age = ngx.time() - (cached.created_at or ngx.time())
+        core.response.set_header(CACHE_AGE_HEADER, age < 0 and 0 or age)
+    end
+    core.response.set_header("Content-Type", "application/json")
+    return core.response.exit(200, cached.body)
+end

Review Comment:
   The plugin writes any 2xx response to cache (`ngx.status >= 200 and < 300`), 
but cache hits always replay with HTTP 200 (`core.response.exit(200, ...)`). If 
the upstream ever returns 201/204/etc, a cached HIT will change the status 
code, which can break clients relying on the original semantics.



##########
apisix/plugins/ai-cache/key.lua:
##########
@@ -0,0 +1,72 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local core      = require("apisix.core")
+local protocols = require("apisix.plugins.ai-protocols")
+local sha256    = require("resty.sha256")
+local to_hex    = require("resty.string").to_hex
+
+local ipairs = ipairs
+local concat = table.concat
+
+local _M = {}
+
+
+local function hex_digest(s)
+    local hash = sha256:new()
+    hash:update(s)
+    return to_hex(hash:final())
+end
+
+
+function _M.fingerprint(ctx, body)
+    local params = core.table.deepcopy(body)
+    params.messages = nil
+    params.model    = nil
+    params.stream   = nil
+
+    local repr = core.json.stably_encode({
+        protocol = ctx.ai_client_protocol or "",
+        model    = ctx.var.request_llm_model or body.model or "",
+        messages = protocols.get_messages(body, ctx) or {},
+        params   = params,
+    })
+    return hex_digest(repr)
+end

Review Comment:
   Cache fingerprinting currently uses `ctx.var.request_llm_model or 
body.model`, which can be empty when clients omit `model` and rely on the 
route-level `ai-proxy.options.model`. In that case different configured models 
can share the same cache key and replay the wrong model's response. Also, 
`core.table.deepcopy(body)` copies `messages` before they are nulled out, which 
can be expensive for large prompts.



##########
t/plugin/ai-cache.t:
##########
@@ -0,0 +1,652 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+BEGIN {
+    $ENV{TEST_ENABLE_CONTROL_API_V1} = "0";
+}
+
+use t::APISIX 'no_plan';
+
+log_level("info");
+repeat_each(1);
+no_long_string();
+no_root_location();
+
+add_block_preprocessor(sub {
+    my ($block) = @_;
+
+    if (!defined $block->request) {
+        $block->set_value("request", "GET /t");
+    }
+
+    my $user_yaml_config = <<_EOC_;
+plugins:
+  - ai-proxy
+  - ai-cache
+_EOC_
+    if (!defined $block->extra_yaml_config) {
+        $block->set_value("extra_yaml_config", $user_yaml_config);
+    }
+});
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: minimal valid exact-cache configuration
+--- config
+    location /t {
+        content_by_lua_block {
+            local plugin = require("apisix.plugins.ai-cache")
+            local ok, err = plugin.check_schema({
+                redis_host = "127.0.0.1",
+                redis_port = 6379,
+            })
+
+            if not ok then
+                ngx.say(err)
+            else
+                ngx.say("passed")
+            end
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 2: reject config missing required redis (policy=redis then-clause)
+--- config
+    location /t {
+        content_by_lua_block {
+            local plugin = require("apisix.plugins.ai-cache")
+            local ok, err = plugin.check_schema({})
+
+            if not ok then
+                ngx.say(err)
+            else
+                ngx.say("passed")
+            end
+        }
+    }
+--- response_body eval
+qr/then clause did not match/
+
+
+
+=== TEST 3: reject unknown layer value
+--- config
+    location /t {
+        content_by_lua_block {
+            local plugin = require("apisix.plugins.ai-cache")
+            local ok, err = plugin.check_schema({
+                redis_host = "127.0.0.1",
+                layers = { "nonsense" },
+            })
+
+            if not ok then
+                ngx.say(err)
+            else
+                ngx.say("passed")
+            end
+        }
+    }
+--- response_body eval
+qr/layers/
+
+
+
+=== TEST 4: flush redis, then set route with ai-proxy + ai-cache (mock 
upstream)
+--- config
+    location /t {
+        content_by_lua_block {
+            local redis = require("resty.redis")
+            local red = redis:new()
+            red:set_timeout(1000)
+            local ok, rerr = red:connect("127.0.0.1", 6379)
+            if not ok then
+                ngx.say("redis connect failed: ", rerr)
+                return
+            end
+            red:flushall()
+
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": {
+                                "header": {
+                                    "Authorization": "Bearer test-key"
+                                }
+                            },
+                            "options": {
+                                "model": "gpt-4o"
+                            },
+                            "override": {
+                                "endpoint": "http://127.0.0.1:1980";
+                            }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 5: cold request is a cache MISS and is proxied upstream
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"ai-cache miss 
unique-prompt-5"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+--- response_body_like eval
+qr/1 \+ 1 = 2/
+--- wait: 0.3
+
+
+
+=== TEST 6: identical re-request is a HIT served from cache (upstream not 
called)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"ai-cache miss 
unique-prompt-5"}]}
+--- error_code: 200
+--- response_headers_like
+X-AI-Cache-Status: HIT
+X-AI-Cache-Age: \d+
+--- response_body_like eval
+qr/1 \+ 1 = 2/
+
+
+
+=== TEST 7: fingerprint sensitivity (key.lua unit)
+--- config
+    location /t {
+        content_by_lua_block {
+            local key = require("apisix.plugins.ai-cache.key")
+            local function ctx(model)
+                return { ai_client_protocol = "openai-chat", var = { 
request_llm_model = model } }
+            end
+            local function fp(body)
+                return key.fingerprint(ctx(body.model), body)
+            end
+
+            local base   = { model="gpt-4o",      messages={{role="user", 
content="hi"}}, temperature=0.2 }
+            local same   = { model="gpt-4o",      messages={{role="user", 
content="hi"}}, temperature=0.2 }
+            local msg2   = { model="gpt-4o",      messages={{role="user", 
content="yo"}}, temperature=0.2 }
+            local model2 = { model="gpt-4o-mini", messages={{role="user", 
content="hi"}}, temperature=0.2 }
+            local temp2  = { model="gpt-4o",      messages={{role="user", 
content="hi"}}, temperature=0.7 }
+            local tools2 = { model="gpt-4o",      messages={{role="user", 
content="hi"}}, temperature=0.2,
+                             tools={{ type="function", ["function"]={ name="f" 
} }} }
+
+            local b = fp(base)
+            assert(fp(same)   == b, "identical bodies must share a 
fingerprint")
+            assert(fp(msg2)   ~= b, "changed message must change the 
fingerprint")
+            assert(fp(model2) ~= b, "changed model must change the 
fingerprint")
+            assert(fp(temp2)  ~= b, "changed temperature must change the 
fingerprint")
+            assert(fp(tools2) ~= b, "changed tools must change the 
fingerprint")
+            ngx.say("passed")
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 8: non-2xx upstream (no fixture -> 401) is a MISS
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"non-2xx-test-prompt"}]}
+--- error_code: 401
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 9: same prompt with a valid fixture is still a MISS (the 401 was not 
cached)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"non-2xx-test-prompt"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: MISS
+--- response_body_like eval
+qr/1 \+ 1 = 2/
+
+
+
+=== TEST 10: set route with a cache_bypass variable rule
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer 
test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980"; }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "cache_bypass": ["$http_x_ai_cache_bypass"]
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 11: a non-empty, non-"0" cache_bypass value is a BYPASS
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"bypass rule test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+X-AI-Cache-Bypass: 1
+--- response_headers
+X-AI-Cache-Status: BYPASS
+
+
+
+=== TEST 12: a cache_bypass value of "0" does not bypass (normal MISS)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"bypass-zero-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+X-AI-Cache-Bypass: 0
+--- response_headers
+X-AI-Cache-Status: MISS
+
+
+
+=== TEST 13: set route with a tiny max_cache_body_size
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer 
test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980"; }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "max_cache_body_size": 10
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 14: cold request (response exceeds max_cache_body_size) is a MISS
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"body-size-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 15: same prompt is still a MISS (oversized response was not cached)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"body-size-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+
+
+
+=== TEST 16: set route isolating the cache by a request variable
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer 
test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980"; }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "cache_key": { "include_vars": ["http_x_tenant"] }
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 17: tenant alpha cold request is a MISS (warms scope=alpha)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation 
test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+X-Tenant: alpha
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 18: same prompt, tenant beta is a MISS (not shared with alpha)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation 
test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+X-Tenant: beta
+--- response_headers
+X-AI-Cache-Status: MISS
+
+
+
+=== TEST 19: same prompt, tenant alpha is a HIT (its own scope persisted)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"scope isolation 
test"}]}
+--- more_headers
+X-Tenant: alpha
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: HIT
+
+
+
+=== TEST 20: set route with a 1-second exact ttl
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer 
test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980"; }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "exact": { "ttl": 1 }
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 21: cold request is a MISS (cached with ttl=1), then wait past the ttl
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"ttl-expiry-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 2
+
+
+
+=== TEST 22: same prompt is a MISS again (entry expired)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"ttl-expiry-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+
+
+
+=== TEST 23: set an anthropic-messages route (cross-protocol)
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/2',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/v1/messages",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "anthropic",
+                            "auth": { "header": { "x-api-key": "test-key" } },
+                            "options": { "model": "claude-3-5-sonnet-20241022" 
},
+                            "override": { "endpoint": "http://127.0.0.1:1980"; }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 24: anthropic cold request is a MISS
+--- request
+POST /v1/messages
+{"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":"cross-protocol
 test"}],"max_tokens":100}
+--- more_headers
+X-AI-Fixture: anthropic/messages-basic.json
+--- response_headers
+X-AI-Cache-Status: MISS
+--- wait: 0.3
+
+
+
+=== TEST 25: identical anthropic re-request is a HIT (upstream not called)
+--- request
+POST /v1/messages
+{"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":"cross-protocol
 test"}],"max_tokens":100}
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: HIT
+
+
+
+=== TEST 26: set route whose redis is unreachable
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer 
test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980"; }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6390,
+                            "redis_timeout": 200
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 27: redis unreachable fails open (request still proxied as MISS, no 
5xx)
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"redis-down failopen"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: MISS
+--- response_body_like eval
+qr/1 \+ 1 = 2/
+--- error_log
+ai-cache: redis unavailable, fail-open as MISS
+
+
+
+=== TEST 28: set route with cache_headers disabled
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/anything",
+                    "plugins": {
+                        "ai-proxy": {
+                            "provider": "openai",
+                            "auth": { "header": { "Authorization": "Bearer 
test-key" } },
+                            "options": { "model": "gpt-4o" },
+                            "override": { "endpoint": "http://127.0.0.1:1980"; }
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "cache_headers": false
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 29: cache_headers=false suppresses the X-AI-Cache-* headers
+--- request
+POST /anything
+{"model":"gpt-4o","messages":[{"role":"user","content":"cache-headers-off-test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status:
+X-AI-Cache-Age:
+--- response_body_like eval

Review Comment:
   `cache_headers=false` should suppress the `X-AI-Cache-*` headers entirely. 
Using `--- response_headers` with an empty value typically asserts a header 
exists with an empty value, which is different from "header absent" and can 
make this test flaky/incorrect. Other tests in the suite use 
`raw_response_headers_unlike` to assert header absence.



##########
apisix/plugins/ai-cache.lua:
##########
@@ -0,0 +1,199 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+local core       = require("apisix.core")
+local schema     = require("apisix.plugins.ai-cache.schema")
+local key_mod    = require("apisix.plugins.ai-cache.key")
+local redis_util = require("apisix.utils.redis")
+
+local ngx        = ngx
+local ngx_null   = ngx.null
+local ipairs     = ipairs
+local str_sub    = string.sub
+
+local CACHE_STATUS_HEADER = "X-AI-Cache-Status"
+local CACHE_AGE_HEADER    = "X-AI-Cache-Age"
+local DEFAULT_TTL         = 3600
+
+local _M = {
+    version  = 0.1,
+    priority = 1035,
+    name     = "ai-cache",
+    schema   = schema,
+}
+
+
+function _M.check_schema(conf)
+    return core.schema.check(schema, conf)
+end
+
+
+local function release(conf, red)
+    local ok, err = red:set_keepalive(conf.redis_keepalive_timeout or 10000,
+                                      conf.redis_keepalive_pool or 100)
+    if not ok then
+        core.log.warn("ai-cache: failed to set redis keepalive: ", err)
+    end
+end
+
+
+local function serve_hit(conf, ctx, cached)
+    ctx.ai_cache_status = "HIT"
+    if conf.cache_headers ~= false then
+        core.response.set_header(CACHE_STATUS_HEADER, "HIT")
+        local age = ngx.time() - (cached.created_at or ngx.time())
+        core.response.set_header(CACHE_AGE_HEADER, age < 0 and 0 or age)
+    end
+    core.response.set_header("Content-Type", "application/json")
+    return core.response.exit(200, cached.body)
+end
+
+
+function _M.access(conf, ctx)
+    -- Streaming responses are not cached in PR-1 (SSE replay is a later
+    -- increment). ai-proxy (higher priority) has already classified the
+    -- request, so bypass before doing any work.
+    if ctx.var.request_type == "ai_stream" then
+        ctx.ai_cache_status = "BYPASS"
+        return
+    end
+
+    -- explicit opt-out: any cache_bypass reference resolving to a value
+    -- that is non-empty and not "0" skips the cache (proxy-cache
+    -- `cache_bypass` / nginx `proxy_cache_bypass` semantics). A leading
+    -- "$" marks a variable to resolve; anything else is a literal.
+    if conf.cache_bypass then
+        for _, ref in ipairs(conf.cache_bypass) do
+            local val = ref
+            if str_sub(ref, 1, 1) == "$" then
+                val = ctx.var[str_sub(ref, 2)]
+            end
+            if val ~= nil and val ~= "" and val ~= "0" then
+                ctx.ai_cache_status = "BYPASS"
+                return
+            end
+        end
+    end
+
+    local body, err = core.request.get_json_request_body_table()
+    if not body then
+        core.log.warn("ai-cache: cannot read request body, bypassing: ", err)
+        ctx.ai_cache_status = "BYPASS"
+        return
+    end
+
+    ctx.ai_cache_key = "ai-cache:l1:" .. key_mod.scope(conf, ctx)
+                       .. ":" .. key_mod.fingerprint(ctx, body)
+
+    local red
+    red, err = redis_util.new(conf)
+    if not red then
+        -- fail-open: never let a cache-backend outage break the request.
+        core.log.warn("ai-cache: redis unavailable, fail-open as MISS: ", err)
+        ctx.ai_cache_status = "MISS"
+        return
+    end
+
+    local res
+    res, err = red:get(ctx.ai_cache_key)
+    release(conf, red)
+    if err then
+        core.log.warn("ai-cache: redis get failed, fail-open as MISS: ", err)
+        ctx.ai_cache_status = "MISS"
+        return
+    end
+
+    if res ~= nil and res ~= ngx_null then
+        local cached = core.json.decode(res)
+        if cached and cached.body then
+            return serve_hit(conf, ctx, cached)
+        end
+        core.log.warn("ai-cache: discarding malformed cache entry for ", 
ctx.ai_cache_key)
+    end
+
+    ctx.ai_cache_status = "MISS"
+end
+
+
+function _M.header_filter(conf, ctx)
+    if ctx.ai_cache_status and conf.cache_headers ~= false then
+        core.response.set_header(CACHE_STATUS_HEADER, ctx.ai_cache_status)
+    end
+end
+
+
+function _M.body_filter(conf, ctx)
+    -- only a MISS gets written back; HIT exited in access, BYPASS opts out.
+    if ctx.ai_cache_status ~= "MISS" then
+        return
+    end
+    local chunk = ngx.arg[1]
+    if chunk and #chunk > 0 then
+        ctx.ai_cache_buf = (ctx.ai_cache_buf or "") .. chunk
+    end
+end
+
+
+-- The response-capturing phases (body_filter / log) run in contexts where
+-- cosockets are disabled, so the Redis write is deferred to a 0-delay timer
+-- (timers run in a light thread where cosockets are allowed).
+local function write_to_cache(premature, conf, cache_key, response_body)
+    if premature then
+        return
+    end
+    local red, err = redis_util.new(conf)
+    if not red then
+        core.log.warn("ai-cache: redis unavailable on write: ", err)
+        return
+    end
+    local envelope = core.json.encode({ body = response_body, created_at = 
ngx.time() })
+    local ttl = (conf.exact and conf.exact.ttl) or DEFAULT_TTL
+    local ok
+    ok, err = red:set(cache_key, envelope, "EX", ttl)
+    if not ok then
+        core.log.warn("ai-cache: redis set failed: ", err)
+        return
+    end
+    release(conf, red)

Review Comment:
   In `write_to_cache`, when `red:set(...)` fails the function returns without 
calling `set_keepalive`, leaving the Redis connection out of the keepalive 
pool. Under repeated write failures this can cause unnecessary connection churn 
/ resource usage.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat: add ai-cache plugin [apisix]

Reply via email to