Copilot commented on code in PR #13570: URL: https://github.com/apache/apisix/pull/13570#discussion_r3449647940
########## apisix/plugins/ai-lakera-guard.lua: ########## @@ -0,0 +1,163 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +local core = require("apisix.core") +local schema_mod = require("apisix.plugins.ai-lakera-guard.schema") +local client = require("apisix.plugins.ai-lakera-guard.client") +local protocols = require("apisix.plugins.ai-protocols") +local binding = require("apisix.plugins.ai-protocols.binding") + +local ipairs = ipairs +local type = type +local concat = table.concat + + +local _M = { + version = 0.1, + priority = 1028, + name = "ai-lakera-guard", + schema = schema_mod.schema, +} + + +function _M.check_schema(conf) + return schema_mod.check_schema(conf) +end + + +-- Format only the detectors that actually fired (detected = true) for the +-- client-facing reveal; the raw breakdown may also carry non-detected entries, +-- which belong in the log but not in the deny message. +local function format_breakdown(breakdown) + local parts = {} + for _, entry in ipairs(breakdown or {}) do + if type(entry) == "table" and entry.detected and entry.detector_type then + local part = entry.detector_type + if entry.result and entry.result ~= "" then + part = part .. " (" .. entry.result .. ")" + end + core.table.insert(parts, part) + end + end + return parts +end + + +local function deny_message(ctx, conf, message, breakdown) + local proto = protocols.get(ctx.ai_client_protocol) + if not proto then + core.log.error("ai-lakera-guard: unsupported protocol: ", + ctx.ai_client_protocol or "unknown") + return message + end + local text = message + if conf.reveal_failure_categories then + local parts = format_breakdown(breakdown) + if #parts > 0 then + text = text .. ". Flagged categories: " .. concat(parts, ", ") + end + end + local usage = ctx.llm_raw_usage + or (proto.empty_usage and proto.empty_usage()) + or { prompt_tokens = 0, completion_tokens = 0, total_tokens = 0 } + return proto.build_deny_response({ + text = text, + model = ctx.var.request_llm_model, + usage = usage, + stream = ctx.var.request_type == "ai_stream", + }) +end + + +local function request_content_moderation(ctx, conf, content) + if not content or #content == 0 then + return + end + + local result, err = client.scan(conf, content) + if err then + if conf.fail_open then + core.log.warn("ai-lakera-guard: ", err, "; fail_open=true, allowing request") + return + end + core.log.error("ai-lakera-guard: ", err, "; fail_open=false, blocking request") + return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message) + end + + if not result.flagged then + return + end + + -- Log Lakera's full per-detector verdict (every entry, detected or not) so + -- both alert mode and blocked requests are auditable. + core.log.warn("ai-lakera-guard: request flagged by Lakera Guard", + ", breakdown: ", core.json.encode(result.breakdown), + ", request_uuid: ", result.request_uuid or "") + + if conf.action == "alert" then + return + end + + return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message, result.breakdown) +end + + +function _M.access(conf, ctx) + if not ctx.picked_ai_instance then + local handled, code, body = binding.on_unsupported( + conf.fail_mode, _M.name, ctx, + "no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)", + 500, "no ai instance picked, ai-lakera-guard plugin must be used with " + .. "ai-proxy or ai-proxy-multi plugin") + if handled then + return code, body + end + return + end + + -- ai-proxy / ai-proxy-multi runs first (higher priority) and already + -- validated the Content-Type and parsed the JSON body -- it rejects non-JSON + -- before picking an instance, so reaching here guarantees a valid JSON table. + local request_tab = core.request.get_json_request_body_table() + Review Comment: For consistency with `ai-aliyun-content-moderation` (and to harden against future changes), consider handling the `nil, err` return from `core.request.get_json_request_body_table()`. While `ai-proxy`/`ai-proxy-multi` currently parse and cache the JSON body before this plugin runs, an explicit check here prevents a potential runtime error if this assumption ever changes. ########## apisix/plugins/ai-lakera-guard/schema.lua: ########## @@ -0,0 +1,106 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +local core = require("apisix.core") +local binding = require("apisix.plugins.ai-protocols.binding") + + +local schema = { + type = "object", + properties = { + api_key = { + type = "string", + minLength = 1, + description = "Lakera Guard API key, sent as 'Authorization: Bearer'.", + }, + lakera_endpoint = { + type = "string", + pattern = [[^https?://]], + default = "https://api.lakera.ai/v2/guard", + description = "Lakera Guard v2 endpoint.", + }, + project_id = { + type = "string", + description = "Lakera project whose policy (detectors + thresholds) to apply.", + }, + direction = { + type = "string", + -- input only in this phase; output/both are added in later phases. + enum = { "input" }, + default = "input", + description = "Which traffic to scan.", + }, + action = { + type = "string", + enum = { "block", "alert" }, + default = "block", + description = "block = enforce; alert = log-only shadow mode (pass traffic).", + }, Review Comment: The schema description for `action` implies `alert` is a shadow mode that simply passes traffic, but the plugin can still block on Lakera API failures when `fail_open=false` (default). Adjusting the description to clarify that `action` only affects handling of *flagged* verdicts helps avoid confusion in schema-driven docs/UI. ########## docs/zh/latest/plugins/ai-lakera-guard.md: ########## @@ -0,0 +1,395 @@ +--- +title: ai-lakera-guard +keywords: + - Apache APISIX + - API 网关 + - 插件 + - ai-lakera-guard + - AI + - AI 安全 + - Lakera +description: ai-lakera-guard 插件将 Apache APISIX 与 Lakera Guard API(v2)集成,用于扫描 LLM 请求中的提示词注入、越狱、PII、内容策略违规以及恶意链接,并根据 Lakera 的判定结果拦截或告警。 +--- + +<!-- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +--> + +<head> + <link rel="canonical" href="https://docs.api7.ai/hub/ai-lakera-guard" /> +</head> + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## 描述 + +`ai-lakera-guard` 插件集成了 [Lakera Guard API(v2)](https://docs.lakera.ai/docs/api),在网关层对 LLM 流量进行基于机器学习的安全扫描。它会检查请求提示词中的提示词注入、越狱、PII 泄露、内容策略违规以及恶意或未知链接,然后根据 Lakera 的判定结果进行**拦截**或**告警**,从而使各个后端 LLM 服务无需各自实现安全防护。 + +运行哪些检测器以及使用何种阈值,完全由通过 `project_id` 选择的 **Lakera 项目策略**控制。网关侧没有检测器列表;Lakera 每次调用返回单一的判定结果。 + +`ai-lakera-guard` 插件应与 [`ai-proxy`](./ai-proxy.md) 或 [`ai-proxy-multi`](./ai-proxy-multi.md) 插件配合使用以代理 LLM 请求。它依赖 `ai-proxy` 填充的上下文,以协议感知的方式提取对话内容。 + +未经过 `ai-proxy`/`ai-proxy-multi` 的请求(例如插件绑定在 Consumer 或 Service 级别时的普通 HTTP 流量)无法被检查。默认情况下,此类请求会被直接放行而不做检查;该行为可通过 `fail_mode` 配置。 + +:::note + +当前版本仅扫描**请求**(`direction: input`)。响应和流式扫描将在后续版本中加入。 + +::: + +## 属性 + +| 名称 | 类型 | 必选项 | 默认值 | 有效值 | 描述 | +|------|------|--------|--------|--------|------| +| api_key | string | 是 | | | Lakera Guard API 密钥,以 `Authorization: Bearer` 形式发送。该值在存储到 etcd 之前会使用 AES 加密,并支持[密钥引用](../terminology/secret.md)(`$secret://`)和环境变量(`$env://`)。 | +| lakera_endpoint | string | 否 | `https://api.lakera.ai/v2/guard` | | Lakera Guard v2 端点。可针对区域或自托管实例进行覆盖。 | +| project_id | string | 否 | | | 要应用其策略(检测器和阈值)的 Lakera 项目。如果未设置,则使用账号的默认策略。 | +| direction | string | 否 | `input` | `input` | 要扫描的流量。当前版本仅支持 `input`(请求)。 | +| action | string | 否 | `block` | `block`、`alert` | `block` 强制执行判定结果;`alert` 是仅记录日志的影子模式,始终放行流量。 | +| fail_open | boolean | 否 | `false` | | 当无法连接 Lakera(超时、连接错误、非 2xx、解码失败)时的处理行为。`false`(失败时拒绝,fail-closed)拦截请求;`true`(失败时放行,fail-open)放行请求。成功返回 `flagged: false` 时始终放行。 | +| fail_mode | string | 否 | `"skip"` | `skip`、`warn`、`error` | 当请求不是该插件可识别和检查的 AI 请求时的处理行为(例如 Consumer 级别绑定时的普通 HTTP 流量,或未经过 `ai-proxy` 的请求)。`skip`:放行请求且不做检查;`warn`:放行并记录 warning 日志;`error`:拒绝请求。与 `fail_open` 不同,后者用于处理 Lakera API 调用失败的情况。 | Review Comment: The `action` row says `alert` "always" passes traffic, but the plugin still blocks on Lakera errors/timeouts when `fail_open=false` (default), even in alert mode. Please clarify that `alert` only changes handling of *flagged* verdicts; API failures remain controlled by `fail_open`. ########## docs/en/latest/plugins/ai-lakera-guard.md: ########## @@ -0,0 +1,395 @@ +--- +title: ai-lakera-guard +keywords: + - Apache APISIX + - API Gateway + - Plugin + - ai-lakera-guard + - AI + - AI Security + - Lakera +description: The ai-lakera-guard Plugin integrates Apache APISIX with the Lakera Guard API (v2) to scan LLM requests for prompt injection, jailbreak, PII, content-policy violations, and malicious links, then blocks or alerts on Lakera's verdict. +--- + +<!-- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +--> + +<head> + <link rel="canonical" href="https://docs.api7.ai/hub/ai-lakera-guard" /> +</head> + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +## Description + +The `ai-lakera-guard` Plugin integrates with the [Lakera Guard API (v2)](https://docs.lakera.ai/docs/api) to perform ML-based security scanning of LLM traffic at the gateway. It inspects request prompts for prompt injection, jailbreak, PII leakage, content-policy violations, and malicious or unknown links, then **blocks** or **alerts** based on Lakera's verdict so individual backend LLM services do not each have to implement their own guardrails. + +Which detectors run and at what thresholds are controlled entirely by the **Lakera project policy**, selected with `project_id`. There is no gateway-side detector list; Lakera returns a single verdict per call. + +The `ai-lakera-guard` Plugin should be used with either the [`ai-proxy`](./ai-proxy.md) or [`ai-proxy-multi`](./ai-proxy-multi.md) Plugin for proxying LLM requests. It relies on the context that `ai-proxy` populates to extract chat content in a protocol-aware way. + +Requests that did not pass through `ai-proxy`/`ai-proxy-multi` (for example plain HTTP traffic when the Plugin is bound at the Consumer or Service level) cannot be inspected. By default such requests are passed through unchecked; this is configurable via `fail_mode`. + +:::note + +This release scans **requests** only (`direction: input`). Response and streaming scanning are added in later releases. + +::: + +## Attributes + +| Name | Type | Required | Default | Valid values | Description | +|------|------|----------|---------|--------------|-------------| +| api_key | string | True | | | Lakera Guard API key, sent as `Authorization: Bearer`. The value is encrypted with AES before being stored in etcd, and supports [secret references](../terminology/secret.md) (`$secret://`) and environment variables (`$env://`). | +| lakera_endpoint | string | False | `https://api.lakera.ai/v2/guard` | | Lakera Guard v2 endpoint. Override for regional or self-hosted instances. | +| project_id | string | False | | | Lakera project whose policy (detectors and thresholds) to apply. If unset, the account default policy is used. | +| direction | string | False | `input` | `input` | Which traffic to scan. Only `input` (request) is supported in this release. | +| action | string | False | `block` | `block`, `alert` | `block` enforces the verdict; `alert` is a log-only shadow mode that always passes traffic through. | +| fail_open | boolean | False | `false` | | Behavior when Lakera cannot be reached (timeout, connection error, non-2xx, decode failure). `false` (fail-closed) blocks the request; `true` (fail-open) allows it. A successful `flagged: false` always passes. | +| fail_mode | string | False | `"skip"` | `skip`, `warn`, `error` | Behavior when the request is not a recognized AI request that this Plugin can inspect (for example, plain HTTP traffic on a Consumer-bound Plugin, or a request that did not pass through `ai-proxy`). `skip`: let the request pass through unchecked; `warn`: pass through and log a warning; `error`: reject the request. Distinct from `fail_open`, which governs Lakera API failures. | Review Comment: The `action` attribute description says `alert` "always passes traffic through", but the implementation still blocks requests on Lakera errors/timeouts when `fail_open=false` (default), even in `alert` mode. Please clarify the docs to avoid suggesting that `alert` overrides `fail_open` (it only affects flagged verdict enforcement). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
