Copilot commented on code in PR #12686:
URL: https://github.com/apache/apisix/pull/12686#discussion_r2762500654


##########
apisix/secret.lua:
##########
@@ -148,16 +150,19 @@ local function fetch_by_uri_secret(secret_uri)
         return nil, "no secret conf, secret_uri: " .. secret_uri
     end
 
+    local span = tracer.start(ngx.ctx, "fetch_secret", tracer.kind.client)
     local ok, sm = pcall(require, "apisix.secret." .. opts.manager)
     if not ok then
         return nil, "no secret manager: " .. opts.manager
     end
 
     local value, err = sm.get(conf, opts.key)
     if err then
+        tracer.finish(ngx.ctx, span, tracer.status.ERROR, err)
         return nil, err

Review Comment:
   `tracer.finish` is called with the wrong argument order on the error path. 
It should be `tracer.finish(ngx.ctx, span, tracer.status.ERROR, err)` (or 
equivalent) so it doesn't attempt to treat the status enum as a span object.



##########
apisix/core/response.lua:
##########
@@ -86,6 +87,9 @@ function resp_exit(code, ...)
     end
 
     if code then
+        if code >= 400 then
+            tracer.finish_all(ngx.ctx, tracer.status.ERROR, "response code " 
.. code)

Review Comment:
   This call uses `tracer.finish(ngx.ctx, tracer.status.ERROR, ...)` but 
`finish` expects `(ctx, sp, code, message)`. As written it will crash for any 
`code >= 400` response when tracing is enabled. If the intent is “finish 
current span with error”, pass an explicit `nil` span (`finish(ctx, nil, code, 
msg)`) or introduce a dedicated `finish_current(ctx, code, msg)` helper.
   ```suggestion
               tracer.finish_all(ngx.ctx, nil, code, "response code " .. code)
   ```



##########
apisix/tracer.lua:
##########
@@ -0,0 +1,131 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local table = require("apisix.core.table")
+local tablepool = require("tablepool")
+local span = require("apisix.utils.span")
+local span_kind = require("opentelemetry.trace.span_kind")
+local span_status = require("opentelemetry.trace.span_status")
+local local_conf = require("apisix.core.config_local").local_conf()
+local ipairs = ipairs
+local ngx = ngx
+
+local enable_tracing = false
+if ngx.config.subsystem == "http" and type(local_conf.apisix.tracing) == 
"boolean" then
+    enable_tracing = local_conf.apisix.tracing
+end
+
+local _M = {
+    kind = span_kind,
+    status = span_status,
+    span_state = {},
+}
+
+function _M.start(ctx, name, kind)
+    if not enable_tracing then
+        return
+    end
+
+    local tracing = ctx and ctx.tracing
+    if not tracing then
+        local root_span = span.new()
+        tracing = tablepool.fetch("tracing", 0, 8)
+        tracing.spans = tablepool.fetch("tracing_spans", 20, 0)
+        tracing.root_span = root_span
+        tracing.current_span = root_span
+        table.insert(tracing.spans, root_span)
+        root_span.id = 1
+        ctx.tracing = tracing
+    end
+    if tracing.skip then
+        return
+    end
+
+    local spans = tracing.spans
+    local sp = span.new(name, kind)
+
+    table.insert(spans, sp)
+    local id = #spans
+    sp.id = id
+    local parent = tracing.current_span
+    if parent then
+        sp:set_parent(parent.id)
+        parent:append_child(id)
+    end
+    tracing.current_span = sp
+    return sp
+end
+
+
+local function finish_span(spans, sp, code, message)
+    if not sp or sp.end_time then
+        return
+    end
+    for _, id in ipairs(sp.child_ids or {}) do
+        finish_span(spans, spans[id])
+    end
+    if code then
+        sp:set_status(code, message)
+    end
+    sp:finish()
+end
+
+
+function _M.finish(ctx, sp, code, message)
+    local tracing = ctx and ctx.tracing
+    if not tracing then
+        return
+    end
+
+    sp = sp or tracing.current_span
+    if not sp then
+        return
+    end
+
+    finish_span(tracing.spans, sp, code, message)
+    tracing.current_span = tracing.spans[sp.parent_id]
+end

Review Comment:
   `finish()` defaults to `tracing.current_span` when `sp` is nil. Combined 
with `start()` returning nil when `tracing.skip` is true, common call sites 
(`local sp = start(...); finish(ctx, sp)`) will incorrectly finish the 
*current* span even though no new span was started. Consider making 
`finish(ctx, sp, ...)` a no-op when `sp` is nil, and add a separate 
`finish_current(ctx, ...)` API for the cases where finishing the current span 
is desired.



##########
apisix/plugins/opentelemetry.lua:
##########
@@ -327,10 +328,17 @@ function _M.rewrite(conf, api_ctx)
 
     local attributes = {
         attr.string("net.host.name", vars.host),
+        -- deprecated attributes
         attr.string("http.method", vars.method),
         attr.string("http.scheme", vars.scheme),
         attr.string("http.target", vars.request_uri),
         attr.string("http.user_agent", vars.http_user_agent),
+
+        -- new attributes
+        attr.string("http.request.method", vars.method),
+        attr.string("url.scheme", vars.scheme),
+        attr.string("uri.path",vars.request_uri),

Review Comment:
   `uri.path` should contain only the URL path and not the query string. 
`vars.request_uri` typically includes args (e.g. `/foo?a=1`), so this attribute 
will be incorrect. Use `vars.uri` (or another path-only value) for `uri.path` 
and keep `http.target`/`request_uri` for the full target if needed.
   ```suggestion
           attr.string("uri.path", vars.uri),
   ```



##########
apisix/ssl/router/radixtree_sni.lua:
##########
@@ -177,9 +179,10 @@ function _M.match_and_set(api_ctx, match_only, alt_sni)
             -- with it sometimes
             core.log.error("failed to find any SSL certificate by SNI: ", sni)
         end
+        tracer.finish(api_ctx.ngx_ctx, span, tracer.status.ERROR, "failed 
match SNI")

Review Comment:
   `tracer.finish` is called with the wrong argument order here. The signature 
is `finish(ctx, sp, code, message)`, but this passes `(ctx, code, message)` 
which will attempt to index a non-span value and can crash on SNI-miss. Pass 
the started `span` as the second argument (and the error code/message as 
3rd/4th), and ensure the span is finished before returning.



##########
apisix/tracer.lua:
##########
@@ -0,0 +1,131 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local table = require("apisix.core.table")
+local tablepool = require("tablepool")
+local span = require("apisix.utils.span")
+local span_kind = require("opentelemetry.trace.span_kind")
+local span_status = require("opentelemetry.trace.span_status")
+local local_conf = require("apisix.core.config_local").local_conf()
+local ipairs = ipairs
+local ngx = ngx
+
+local enable_tracing = false
+if ngx.config.subsystem == "http" and type(local_conf.apisix.tracing) == 
"boolean" then
+    enable_tracing = local_conf.apisix.tracing
+end
+
+local _M = {
+    kind = span_kind,
+    status = span_status,
+    span_state = {},
+}
+
+function _M.start(ctx, name, kind)
+    if not enable_tracing then
+        return
+    end
+
+    local tracing = ctx and ctx.tracing
+    if not tracing then
+        local root_span = span.new()
+        tracing = tablepool.fetch("tracing", 0, 8)
+        tracing.spans = tablepool.fetch("tracing_spans", 20, 0)
+        tracing.root_span = root_span
+        tracing.current_span = root_span
+        table.insert(tracing.spans, root_span)
+        root_span.id = 1
+        ctx.tracing = tracing
+    end

Review Comment:
   `start()` can be called with `ctx == nil` (call sites already pass `ngx.ctx` 
in some places, but this function itself allows `ctx` to be nil via `ctx and 
ctx.tracing`). However it unconditionally assigns `ctx.tracing = tracing`, 
which will throw if `ctx` is nil. Add an early return when `ctx` is nil (or 
require callers to always pass a non-nil ctx).



##########
apisix/utils/span.lua:
##########
@@ -0,0 +1,101 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+local tablepool = require("tablepool")
+local util = require("opentelemetry.util")
+local span_status = require("opentelemetry.trace.span_status")
+local setmetatable = setmetatable
+local table = table
+local select = select
+local pool_name = "opentelemetry_span"
+local update_time = ngx.update_time
+
+local _M = {}
+
+
+local mt = {
+    __index = _M
+}
+
+local function get_time()
+    update_time()
+    return util.time_nano()
+end
+
+
+function _M.new(name, kind)
+    local self = tablepool.fetch(pool_name, 0, 16)
+    self.start_time = get_time()
+    self.name = name
+    self.kind = kind
+    self.status = nil
+    self.dead = false
+    return setmetatable(self, mt)
+end

Review Comment:
   `_M.new()` does not initialize/reset fields that other methods rely on (e.g. 
`self.attributes` is never set, but `set_attributes()` does 
`table.insert(self.attributes, ...)` which will error). Initialize `attributes` 
to an empty array and ensure pooled fields like `child_ids`, `parent_id`, 
`end_time`, `id`, and any temporary flags are reset when fetching from the pool.



##########
apisix/plugins/opentelemetry.lua:
##########
@@ -376,48 +384,96 @@ function _M.rewrite(conf, api_ctx)
       ngx_var.opentelemetry_span_id = span_context.span_id
     end
 
+    if not ctx:span():is_recording() and ngx.ctx.tracing then
+        ngx.ctx.tracing.skip = true
+    end
+
     api_ctx.otel_context_token = ctx:attach()
 
     -- inject trace context into the headers of upstream HTTP request
     trace_context_propagator:inject(ctx, ngx.req)
 end
 
 
-function _M.delayed_body_filter(conf, api_ctx)
-    if api_ctx.otel_context_token and ngx.arg[2] then
-        local ctx = context:current()
-        ctx:detach(api_ctx.otel_context_token)
-        api_ctx.otel_context_token = nil
+local function create_child_span(tracer, parent_span_ctx, spans, span)
+    if not span or span.finished then
+        return
+    end
+    span.finished = true
+    local new_span_ctx, new_span = tracer:start(parent_span_ctx, span.name,
+                                    {
+                                        kind = span.kind,
+                                        attributes = span.attributes,
+                                    })
+    new_span.start_time = span.start_time
+
+    for _, idx in ipairs(span.child_ids or {}) do
+        create_child_span(tracer, new_span_ctx, spans, spans[idx])
+    end
+    if span.status then
+        new_span:set_status(span.status.code, span.status.message)
+    end
+    new_span:finish(span.end_time)
+end

Review Comment:
   `create_child_span()` calls `new_span:finish(span.end_time)` but spans 
created during the current log phase (e.g. the plugin-wrapper span around this 
opentelemetry plugin) have not been finished yet when `inject_core_spans()` 
runs, so `span.end_time` can be nil here. Depending on the OpenTelemetry Lua 
API this can either error or produce incorrect end timestamps. Consider 
exporting only spans with `end_time` set, or moving 
`tracer.finish_all(...)`/core-span injection to a point after log-phase spans 
are finished.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to