This is an automated email from the ASF dual-hosted git repository.
shreemaan-abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new 6f62be4f9 fix(loki-logger): resolve log labels per request to avoid
label leakage (#13562)
6f62be4f9 is described below
commit 6f62be4f9b99a30fdc5b23c1622eb0dcdfffc5dd
Author: Shreemaan Abhishek <[email protected]>
AuthorDate: Thu Jun 18 10:12:31 2026 +0800
fix(loki-logger): resolve log labels per request to avoid label leakage
(#13562)
* fix(loki-logger): resolve log labels per request to avoid label leakage
A global rule (or any single shared conf) running loki-logger with a
dynamic label value such as `"service_name": "$service_name"` leaked one
service's label onto other services' logs, and froze the value after the
first request.
Two defects combined:
1. The shared plugin `conf.log_labels` table was mutated in place while
resolving variables, so after the first request the template
`"$service_name"` was overwritten with a literal and never
re-resolved (resolve_var returns n_resolved == 0 for a literal).
2. Label resolution sat after the batch-processor `add_entry`
early-return, so it ran only for the request that created the
processor, and the flush closure built a single Loki stream shared by
every batched entry.
Fix: clone `conf.log_labels` (never mutate the shared conf), resolve
variables per request before the early-return, and stamp the resolved
set onto the entry (mirroring `entry.loki_log_time`). At flush time,
group entries into multiple Loki streams keyed by their resolved label
set so each request is logged under its own labels. Internal fields are
stripped from the entry before encoding the log line.
No schema change; pure runtime behavior.
* fix(loki-logger): make batch flush retry-safe and log label resolution
errors
Address review feedback:
- The batch processor reuses the same entry tables when retrying a
failed push (max_retry_count > 0). The flush nil-ed entry.loki_labels
and entry.loki_log_time before encoding, so a retry would call
gen_label_key(nil) and crash, dropping the whole batch. Restore both
internal fields after encoding the log line so retries are safe.
- Log a warning when core.utils.resolve_var fails on a label value, so a
misconfigured variable is visible to operators instead of silently
falling back to the literal.
* test(loki-logger): make per-request label test reproduce the bug
The previous TEST 17-20 issued the two requests in separate Test::Nginx
blocks, so each ran against a fresh nginx process and the shared-conf
mutation / single-stream batching never carried across requests; the
test passed even on the unfixed plugin.
Drive both requests from one block (worker_processes is 1) with
batch_max_size 2 so they share a worker and a batch, then assert each
service has its own Loki stream carrying only its own request. This
fails on the unfixed plugin and passes on the fix.
* test(loki-logger): tighten stream-count assert and add missing-header case
Assert exactly one Loki stream per service label so duplicate-stream
regressions are caught, and drive a request without x-service-name to
prove a missing label is not inherited from a prior request.
---
apisix/plugins/loki-logger.lua | 85 +++++++++++++++++++++++++++---------
t/plugin/loki-logger.t | 99 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 163 insertions(+), 21 deletions(-)
diff --git a/apisix/plugins/loki-logger.lua b/apisix/plugins/loki-logger.lua
index 0f157e5cb..b107ec10a 100644
--- a/apisix/plugins/loki-logger.lua
+++ b/apisix/plugins/loki-logger.lua
@@ -26,6 +26,8 @@ local ipairs = ipairs
local tostring = tostring
local math_random = math.random
local table_insert = table.insert
+local table_sort = table.sort
+local table_concat = table.concat
local ngx = ngx
local str_format = core.string.format
@@ -153,6 +155,24 @@ function _M.check_schema(conf, schema_type)
end
+-- build a stable, collision-resistant key for a resolved label set so entries
+-- sharing the exact same labels are grouped into a single Loki stream.
+local function gen_label_key(labels)
+ local keys = {}
+ for k in pairs(labels) do
+ keys[#keys + 1] = k
+ end
+ table_sort(keys)
+
+ local parts = new_tab(#keys, 0)
+ for i, k in ipairs(keys) do
+ parts[i] = k .. "=" .. labels[k]
+ end
+ -- NUL separator avoids collisions between distinct key/value boundaries
+ return table_concat(parts, "\0")
+end
+
+
local function send_http_data(conf, log)
local headers = conf.headers or {}
headers = core.table.clone(headers)
@@ -218,42 +238,65 @@ function _M.log(conf, ctx)
-- and then add 6 zeros by string concatenation
entry.loki_log_time = tostring(ngx.req.start_time() * 1000) .. "000000"
- if batch_processor_manager:add_entry(conf, entry, max_pending_entries) then
- return
- end
-
- local labels = conf.log_labels
-
- -- parsing possible variables in label value
+ -- resolve possible variables in label values per request and attach the
+ -- result to the entry. Clone first so the shared plugin conf is never
+ -- mutated, and resolve before the batch-processor early-return so every
+ -- entry carries its own labels (e.g. a per-request $service_name).
+ local labels = core.table.clone(conf.log_labels)
for key, value in pairs(labels) do
local new_val, err, n_resolved = core.utils.resolve_var(value, ctx.var)
- if not err and n_resolved > 0 then
+ if err then
+ core.log.warn("failed to resolve label '", key, "' value '",
value, "': ", err)
+ elseif n_resolved > 0 then
labels[key] = new_val
end
end
+ entry.loki_labels = labels
+
+ if batch_processor_manager:add_entry(conf, entry, max_pending_entries) then
+ return
+ end
-- generate a function to be executed by the batch processor
local func = function(entries)
- -- build loki request data
- local data = {
- streams = {
- {
- stream = labels,
- values = new_tab(1, 0),
- }
- }
- }
+ -- group entries into Loki streams by their resolved label set so each
+ -- request is logged under its own labels instead of a single shared
set
+ local streams = new_tab(1, 0)
+ local stream_by_key = {}
- -- add all entries to the batch
for _, entry in ipairs(entries) do
+ local entry_labels = entry.loki_labels
local log_time = entry.loki_log_time
- entry.loki_log_time = nil -- clean logger internal field
+ -- remove logger internal fields so they don't leak into the
encoded
+ -- log line, then restore them: the batch processor reuses the same
+ -- entry tables on retry, so they must survive a failed flush
+ entry.loki_log_time = nil
+ entry.loki_labels = nil
+ local line = core.json.encode(entry)
+ entry.loki_log_time = log_time
+ entry.loki_labels = entry_labels
+
+ local key = gen_label_key(entry_labels)
+ local stream = stream_by_key[key]
+ if not stream then
+ stream = {
+ stream = entry_labels,
+ values = new_tab(1, 0),
+ }
+ stream_by_key[key] = stream
+ table_insert(streams, stream)
+ end
- table_insert(data.streams[1].values, {
- log_time, core.json.encode(entry)
+ table_insert(stream.values, {
+ log_time, line
})
end
+ -- build loki request data
+ local data = {
+ streams = streams
+ }
+
return send_http_data(conf, data)
end
diff --git a/t/plugin/loki-logger.t b/t/plugin/loki-logger.t
index ffeed2784..a72c6f589 100644
--- a/t/plugin/loki-logger.t
+++ b/t/plugin/loki-logger.t
@@ -423,3 +423,102 @@ GET /hello
hello world
--- error_log
go(): authorization: test1234
+
+
+
+=== TEST 17: setup route with a per-request variable label (same conf for all
requests)
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "plugins": {
+ "loki-logger": {
+ "endpoint_addrs": ["http://127.0.0.1:3100"],
+ "tenant_id": "tenant_1",
+ "log_labels": {
+ "service": "$http_x_service_name"
+ },
+ "batch_max_size": 2
+ }
+ },
+ "upstream": {
+ "nodes": {
+ "127.0.0.1:1980": 1
+ },
+ "type": "roundrobin"
+ },
+ "uri": "/hello"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- response_body
+passed
+
+
+
+=== TEST 18: two requests with different label values share one worker and
must not leak
+--- config
+ location /t {
+ content_by_lua_block {
+ local http = require("resty.http")
+ local cjson = require("cjson")
+
+ -- all requests hit the same worker (worker_processes 1), so a
buggy
+ -- shared-conf / single-stream batch would freeze the first label
and
+ -- stamp every line with it. the last request omits x-service-name
+ -- (boundary case): it must not inherit a prior request's label
+ local req_headers = {
+ { ["x-service-name"] = "svc-alpha" },
+ { ["x-service-name"] = "svc-beta" },
+ {},
+ }
+ for _, headers in ipairs(req_headers) do
+ local httpc = http.new()
+ local res, err = httpc:request_uri(
+ "http://127.0.0.1:" .. ngx.var.server_port .. "/hello",
+ { headers = headers })
+ assert(res, "request failed: " .. (err or ""))
+ assert(res.status == 200, "unexpected status: " .. res.status)
+ end
+
+ -- wait for the batch flush timer and Loki ingestion
+ ngx.sleep(2)
+
+ local loki = require("lib.grafana_loki")
+ local now = ngx.now() * 1000
+ local from = tostring(now - 10000) .. "000000"
+ local to = tostring(now) .. "000000"
+
+ for _, svc in ipairs({"svc-alpha", "svc-beta"}) do
+ local data, err = loki.fetch_logs_from_loki(from, to,
+ { query = [[{service="]] .. svc .. [["} | json]] })
+
+ assert(err == nil, "fetch logs error: " .. (err or ""))
+ assert(data.status == "success",
+ "loki response error: " .. cjson.encode(data))
+ assert(#data.data.result == 1,
+ "expected exactly one stream for service=" .. svc .. ":
"
+ .. cjson.encode(data))
+
+ local entry = data.data.result[1]
+ assert(entry.stream.service == svc,
+ "expected stream service=" .. svc .. ": " ..
cjson.encode(entry))
+ assert(entry.stream.request_headers_x_service_name == svc,
+ "log line under service=" .. svc ..
+ " belongs to another request: " .. cjson.encode(entry))
+ end
+
+ ngx.say("passed")
+ }
+ }
+--- response_body
+passed