Copilot commented on code in PR #1483: URL: https://github.com/apache/dubbo-admin/pull/1483#discussion_r3407671797
########## pkg/mcp/tools/log/analyzer.go: ########## @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package log + +import ( + "fmt" + "regexp" + "sort" + "strings" +) + +var volatileTokenPattern = regexp.MustCompile(`(?i)\b[0-9a-f]{8,}\b|\b\d+\b`) + +func analyzeErrors(logs []LogItem, sourceEngine string) *AnalyzeErrorLogsResp { + patternsByName := map[string]*ErrorPattern{} + for _, item := range logs { + if !isErrorLog(item) { + continue + } + patternName := normalizeMessagePattern(item.Message) + pattern := patternsByName[patternName] + if pattern == nil { + pattern = &ErrorPattern{ + Pattern: patternName, + Example: item.Message, + FirstSeen: item.Timestamp, + LastSeen: item.Timestamp, + } + patternsByName[patternName] = pattern + } + pattern.Count++ + pattern.LastSeen = item.Timestamp + if len(pattern.Examples) < 3 { + pattern.Examples = append(pattern.Examples, item) + } Review Comment: `FirstSeen`/`LastSeen` are updated in a way that depends on input order. Since `SearchLogs` sorts logs descending, the current `pattern.LastSeen = item.Timestamp` ends up setting `LastSeen` to the oldest entry and never corrects `FirstSeen` when older entries are encountered. Update both fields using min/max comparisons instead of unconditional assignment. ########## pkg/mcp/tools/log/loki.go: ########## @@ -0,0 +1,610 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package log + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "regexp" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/apache/dubbo-admin/pkg/common/bizerror" + observabilitycfg "github.com/apache/dubbo-admin/pkg/config/observability" +) + +const ( + defaultQueryWindow = time.Hour + labelCacheTTL = 5 * time.Minute +) + +var fallbackSelectorPriority = []string{ + "namespace", + "job", + "app", + "appName", + "service_name", + "serviceName", + "service", + "pod", + "container", + "instance", + "instanceName", + "level", +} + +var lokiLabelsCache = struct { + sync.Mutex + items map[string]cachedLokiLabels +}{ + items: map[string]cachedLokiLabels{}, +} + +type cachedLokiLabels struct { + labels map[string]struct{} + expiresAt time.Time +} + +type lokiClient struct { + config observabilitycfg.LogProviderConfig + client *http.Client +} + +type lokiQueryRangeResp struct { + Status string `json:"status"` + Data struct { + Result []lokiStream `json:"result"` + } `json:"data"` + Error string `json:"error,omitempty"` +} + +type lokiLabelsResp struct { + Status string `json:"status"` + Data []string `json:"data"` + Error string `json:"error,omitempty"` +} + +type lokiStream struct { + Stream map[string]string `json:"stream"` + Values [][]string `json:"values"` +} + +func newLokiClient(cfg observabilitycfg.LogProviderConfig) *lokiClient { + return &lokiClient{ + config: cfg, + client: &http.Client{Timeout: 30 * time.Second}, + } +} + +func (c *lokiClient) search(ctx context.Context, req *SearchLogsReq) (*SearchLogsResp, error) { + if req.Limit <= 0 { + req.Limit = defaultLogLimit + } + start, end, err := resolveTimeRange(req.StartTime, req.EndTime) + if err != nil { + return nil, err + } + + labelNames, _ := c.labelNames(ctx, start, end) + queries := buildLogQLQueriesWithLabels(req, labelNames) + merged := &SearchLogsResp{SourceEngine: "loki", Logs: make([]LogItem, 0, req.Limit)} + seen := map[string]struct{}{} + for _, query := range queries { + logs, err := c.queryRange(ctx, query, start, end, req.Limit) + if err != nil { + return nil, err + } + // remove duplicates + for _, item := range logs { + key := dedupeKey(item) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + merged.Logs = append(merged.Logs, item) + if len(merged.Logs) >= req.Limit { + break + } + } + if len(merged.Logs) >= req.Limit { + break + } + } + + sort.SliceStable(merged.Logs, func(i, j int) bool { + return merged.Logs[i].Timestamp > merged.Logs[j].Timestamp + }) + return merged, nil +} + +func (c *lokiClient) capabilities(ctx context.Context, req *LogCapabilitiesReq) (*LogCapabilitiesResp, error) { + start, end, err := resolveTimeRange(req.StartTime, req.EndTime) + if err != nil { + return nil, err + } + labelNames, err := c.labelNames(ctx, start, end) + if err != nil { + return nil, err + } + + return &LogCapabilitiesResp{ + AvailableLabels: supportedLabels(labelNames), + SupportedFilters: []string{ + "mesh", + "appName", + "serviceName", + "instanceName", + "traceId", + "keywords", + "startTime", + "endTime", + "limit", + }, + LabelFilters: map[string][]string{ + "mesh": matchingLabels([]string{"mesh"}, labelNames), + "appName": matchingLabels([]string{"app", "appName"}, labelNames), + "serviceName": matchingLabels([]string{"service", "serviceName", "service_name"}, labelNames), + "instanceName": matchingLabels([]string{"instance", "instanceName", "pod"}, labelNames), + }, + ContentFilters: []string{"traceId", "keywords"}, + FallbackLabel: fallbackSelectorLabel(labelNames), + SourceEngine: "loki", + }, nil +} + +func (c *lokiClient) queryRange(ctx context.Context, query string, start, end time.Time, limit int) ([]LogItem, error) { + queryURL, err := c.queryRangeURL(query, start, end, limit) + if err != nil { + return nil, err + } + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, queryURL, nil) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.InternalError, "failed to create loki query request") + } + if c.config.Tenant != "" { + httpReq.Header.Set("X-Scope-OrgID", c.config.Tenant) + } + + httpResp, err := c.client.Do(httpReq) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.NetWorkError, "failed to query loki") + } + defer httpResp.Body.Close() + if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { + body, _ := io.ReadAll(io.LimitReader(httpResp.Body, 4096)) + return nil, bizerror.New(bizerror.NetWorkError, + fmt.Sprintf("loki query failed with status %d: %s", httpResp.StatusCode, strings.TrimSpace(string(body)))) + } + + var lokiResp lokiQueryRangeResp + if err := json.NewDecoder(httpResp.Body).Decode(&lokiResp); err != nil { + return nil, bizerror.Wrap(err, bizerror.JsonError, "failed to decode loki query response") + } + if lokiResp.Status != "success" { + if lokiResp.Error != "" { + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki query failed: %s", lokiResp.Error)) + } + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki query returned status %q", lokiResp.Status)) + } + return normalizeLokiLogs(lokiResp), nil +} + +func (c *lokiClient) labelNames(ctx context.Context, start, end time.Time) (map[string]struct{}, error) { + cacheKey := c.labelCacheKey() + now := time.Now() + lokiLabelsCache.Lock() + if cached, ok := lokiLabelsCache.items[cacheKey]; ok && now.Before(cached.expiresAt) { + labels := cloneLabelSet(cached.labels) + lokiLabelsCache.Unlock() + return labels, nil + } + lokiLabelsCache.Unlock() + + labelsURL, err := c.labelsURL(start, end) + if err != nil { + return nil, err + } + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, labelsURL, nil) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.InternalError, "failed to create loki labels request") + } + if c.config.Tenant != "" { + httpReq.Header.Set("X-Scope-OrgID", c.config.Tenant) + } + + httpResp, err := c.client.Do(httpReq) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.NetWorkError, "failed to query loki labels") + } + defer httpResp.Body.Close() + if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { + body, _ := io.ReadAll(io.LimitReader(httpResp.Body, 4096)) + return nil, bizerror.New(bizerror.NetWorkError, + fmt.Sprintf("loki labels query failed with status %d: %s", httpResp.StatusCode, strings.TrimSpace(string(body)))) + } + + var labelsResp lokiLabelsResp + if err := json.NewDecoder(httpResp.Body).Decode(&labelsResp); err != nil { + return nil, bizerror.Wrap(err, bizerror.JsonError, "failed to decode loki labels response") + } + if labelsResp.Status != "success" { + if labelsResp.Error != "" { + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki labels query failed: %s", labelsResp.Error)) + } + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki labels query returned status %q", labelsResp.Status)) + } + + labels := make(map[string]struct{}, len(labelsResp.Data)) + for _, label := range labelsResp.Data { + labels[label] = struct{}{} + } + + lokiLabelsCache.Lock() + lokiLabelsCache.items[cacheKey] = cachedLokiLabels{ + labels: cloneLabelSet(labels), + expiresAt: now.Add(labelCacheTTL), + } + lokiLabelsCache.Unlock() + return labels, nil +} + +func (c *lokiClient) labelCacheKey() string { + return c.config.Endpoint + "|" + c.config.Tenant +} + +func cloneLabelSet(labels map[string]struct{}) map[string]struct{} { + if labels == nil { + return nil + } + cloned := make(map[string]struct{}, len(labels)) + for label := range labels { + cloned[label] = struct{}{} + } + return cloned +} + +// e.g: endpoint: {endpoint}/loki/api/v1/query_range?query={app="order-service"}&start=1717200000000000000&end=1717203600000000000&limit=100&direction=backward +func (c *lokiClient) queryRangeURL(logQL string, start, end time.Time, limit int) (string, error) { + baseURL, err := url.Parse(c.config.Endpoint) + if err != nil { + return "", bizerror.Wrap(err, bizerror.ConfigError, "invalid loki endpoint") + } + baseURL.Path = strings.TrimRight(baseURL.Path, "/") + "/loki/api/v1/query_range" + + query := baseURL.Query() + query.Set("query", logQL) + query.Set("start", strconv.FormatInt(start.UnixNano(), 10)) + query.Set("end", strconv.FormatInt(end.UnixNano(), 10)) + query.Set("limit", strconv.Itoa(limit)) + query.Set("direction", "backward") + baseURL.RawQuery = query.Encode() + return baseURL.String(), nil +} + +func (c *lokiClient) labelsURL(start, end time.Time) (string, error) { + baseURL, err := url.Parse(c.config.Endpoint) + if err != nil { + return "", bizerror.Wrap(err, bizerror.ConfigError, "invalid loki endpoint") + } + baseURL.Path = strings.TrimRight(baseURL.Path, "/") + "/loki/api/v1/labels" + + query := baseURL.Query() + query.Set("start", strconv.FormatInt(start.UnixNano(), 10)) + query.Set("end", strconv.FormatInt(end.UnixNano(), 10)) + baseURL.RawQuery = query.Encode() + return baseURL.String(), nil +} + +func buildLogQLQueries(req *SearchLogsReq) []string { + return buildLogQLQueriesWithLabels(req, nil) +} + +func buildLogQLQueriesWithLabels(req *SearchLogsReq, labelNames map[string]struct{}) []string { + selectors := buildStreamSelectorsWithLabels(req, labelNames) + queries := make([]string, 0, len(selectors)) + for _, selector := range selectors { + query := selector + if req.Keywords != "" { + query += " |= " + strconv.Quote(req.Keywords) + } + if req.TraceID != "" { + query += " |= " + strconv.Quote(req.TraceID) + } + queries = append(queries, query) + } + return queries +} + +func buildStreamSelectors(req *SearchLogsReq) []string { + return buildStreamSelectorsWithLabels(req, nil) +} + +func buildStreamSelectorsWithLabels(req *SearchLogsReq, labelNames map[string]struct{}) []string { + labelGroups := make([][]string, 0, 4) + if req.Mesh != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"mesh"}, req.Mesh, labelNames)) + } + if req.AppName != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"app", "appName"}, req.AppName, labelNames)) + } + if req.ServiceName != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"service", "serviceName", "service_name"}, req.ServiceName, labelNames)) + } + if req.InstanceName != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"instance", "instanceName", "pod"}, req.InstanceName, labelNames)) + } + if len(labelGroups) == 0 { + return []string{fmt.Sprintf("{%s=~%s}", fallbackSelectorLabel(labelNames), strconv.Quote(".+"))} + } + + // Cartesian product + selectors := []string{""} + for _, group := range labelGroups { + next := make([]string, 0, len(selectors)*len(group)) + for _, prefix := range selectors { + for _, matcher := range group { + if prefix == "" { + next = append(next, matcher) + } else { + next = append(next, prefix+", "+matcher) + } + } + } + selectors = next + } + + result := make([]string, 0, len(selectors)) + for _, selector := range selectors { + result = append(result, fmt.Sprintf("{%s}", selector)) + } + return result +} + +func labelMatchers(names []string, value string) []string { + return labelMatchersWithLabels(names, value, nil) +} + +func labelMatchersWithLabels(names []string, value string, labelNames map[string]struct{}) []string { + selected := selectExistingLabels(names, labelNames) + matchers := make([]string, 0, len(names)) + for _, name := range selected { + matchers = append(matchers, labelMatcher(name, value)) + } + return matchers +} + +func selectExistingLabels(names []string, labelNames map[string]struct{}) []string { + if len(labelNames) == 0 { + return names + } + selected := make([]string, 0, len(names)) + for _, name := range names { + if _, ok := labelNames[name]; ok { + selected = append(selected, name) + } + } + if len(selected) == 0 { + return names + } + return selected +} + +func matchingLabels(names []string, labelNames map[string]struct{}) []string { + selected := make([]string, 0, len(names)) + for _, name := range names { + if _, ok := labelNames[name]; ok { + selected = append(selected, name) + } + } + return selected +} + +func fallbackSelectorLabel(labelNames map[string]struct{}) string { + if len(labelNames) == 0 { + return "namespace" + } + for _, label := range fallbackSelectorPriority { + if _, ok := labelNames[label]; ok { + return label + } + } + return "namespace" +} + +func supportedLabels(labelNames map[string]struct{}) []string { + labels := make([]string, 0, len(labelNames)) + for label := range labelNames { + labels = append(labels, label) + } + sort.Strings(labels) + return labels +} + +func labelMatcher(name, value string) string { + return fmt.Sprintf("%s=%s", name, strconv.Quote(value)) +} + +func resolveTimeRange(startRaw, endRaw string) (time.Time, time.Time, error) { + end := time.Now() + if endRaw != "" { + parsed, err := parseLogTime("endTime", endRaw) + if err != nil { + return time.Time{}, time.Time{}, err + } + end = parsed + } + start := end.Add(-defaultQueryWindow) + if startRaw != "" { + parsed, err := parseLogTime("startTime", startRaw) + if err != nil { + return time.Time{}, time.Time{}, err + } + start = parsed + } + if start.After(end) { + return time.Time{}, time.Time{}, bizerror.New(bizerror.InvalidArgument, "startTime must be less than or equal to endTime") + } + return start, end, nil +} + +func parseLogTime(field, value string) (time.Time, error) { + if ts, err := time.Parse(time.RFC3339Nano, value); err == nil { + return ts, nil + } + if ns, err := strconv.ParseInt(value, 10, 64); err == nil { + return time.Unix(0, ns), nil + } + return time.Time{}, bizerror.New(bizerror.InvalidArgument, + fmt.Sprintf("%s must be RFC3339, RFC3339Nano, or Unix nanoseconds", field)) +} + +func normalizeLokiLogs(resp lokiQueryRangeResp) []LogItem { + logs := make([]LogItem, 0) + for _, stream := range resp.Data.Result { + for _, value := range stream.Values { + if len(value) < 2 { + continue + } + raw := value[1] + message := extractLogField(raw, "msg", "message") + if message == "" { + message = raw + } + logs = append(logs, LogItem{ + Timestamp: normalizeLogTimestamp(value[0], extractLogField(raw, "time", "timestamp")), + AppName: firstLabel(stream.Stream, "app", "appName"), + ServiceName: firstLabel(stream.Stream, "service", "serviceName", "service_name"), + InstanceName: firstLabel(stream.Stream, "instance", "instanceName", "pod"), + Severity: firstNonEmpty(extractLogField(raw, "level", "severity"), firstLabel(stream.Stream, "level", "severity", "detected_level")), + Message: message, + TraceID: extractLogField(raw, "trace_id", "traceId", "traceid"), + SpanID: extractLogField(raw, "span_id", "spanId", "spanid"), + TraceFlags: extractLogField(raw, "trace_flags", "traceFlags", "traceflags"), Review Comment: Trace context is extracted only from the log line payload (`raw`). If Loki already has `trace_id`/`span_id`/`trace_flags` as stream labels, those values are ignored and the normalized result may miss trace context when the message doesn't include it. Prefer message-derived values, but fall back to stream labels. ########## pkg/mcp/tools/log/loki.go: ########## @@ -0,0 +1,610 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package log + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "regexp" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/apache/dubbo-admin/pkg/common/bizerror" + observabilitycfg "github.com/apache/dubbo-admin/pkg/config/observability" +) + +const ( + defaultQueryWindow = time.Hour + labelCacheTTL = 5 * time.Minute +) + +var fallbackSelectorPriority = []string{ + "namespace", + "job", + "app", + "appName", + "service_name", + "serviceName", + "service", + "pod", + "container", + "instance", + "instanceName", + "level", +} + +var lokiLabelsCache = struct { + sync.Mutex + items map[string]cachedLokiLabels +}{ + items: map[string]cachedLokiLabels{}, +} + +type cachedLokiLabels struct { + labels map[string]struct{} + expiresAt time.Time +} + +type lokiClient struct { + config observabilitycfg.LogProviderConfig + client *http.Client +} + +type lokiQueryRangeResp struct { + Status string `json:"status"` + Data struct { + Result []lokiStream `json:"result"` + } `json:"data"` + Error string `json:"error,omitempty"` +} + +type lokiLabelsResp struct { + Status string `json:"status"` + Data []string `json:"data"` + Error string `json:"error,omitempty"` +} + +type lokiStream struct { + Stream map[string]string `json:"stream"` + Values [][]string `json:"values"` +} + +func newLokiClient(cfg observabilitycfg.LogProviderConfig) *lokiClient { + return &lokiClient{ + config: cfg, + client: &http.Client{Timeout: 30 * time.Second}, + } +} + +func (c *lokiClient) search(ctx context.Context, req *SearchLogsReq) (*SearchLogsResp, error) { + if req.Limit <= 0 { + req.Limit = defaultLogLimit + } + start, end, err := resolveTimeRange(req.StartTime, req.EndTime) + if err != nil { + return nil, err + } + + labelNames, _ := c.labelNames(ctx, start, end) + queries := buildLogQLQueriesWithLabels(req, labelNames) + merged := &SearchLogsResp{SourceEngine: "loki", Logs: make([]LogItem, 0, req.Limit)} + seen := map[string]struct{}{} + for _, query := range queries { + logs, err := c.queryRange(ctx, query, start, end, req.Limit) + if err != nil { + return nil, err + } + // remove duplicates + for _, item := range logs { + key := dedupeKey(item) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + merged.Logs = append(merged.Logs, item) + if len(merged.Logs) >= req.Limit { + break + } + } + if len(merged.Logs) >= req.Limit { + break + } + } + + sort.SliceStable(merged.Logs, func(i, j int) bool { + return merged.Logs[i].Timestamp > merged.Logs[j].Timestamp + }) + return merged, nil +} + +func (c *lokiClient) capabilities(ctx context.Context, req *LogCapabilitiesReq) (*LogCapabilitiesResp, error) { + start, end, err := resolveTimeRange(req.StartTime, req.EndTime) + if err != nil { + return nil, err + } + labelNames, err := c.labelNames(ctx, start, end) + if err != nil { + return nil, err + } + + return &LogCapabilitiesResp{ + AvailableLabels: supportedLabels(labelNames), + SupportedFilters: []string{ + "mesh", + "appName", + "serviceName", + "instanceName", + "traceId", + "keywords", + "startTime", + "endTime", + "limit", + }, + LabelFilters: map[string][]string{ + "mesh": matchingLabels([]string{"mesh"}, labelNames), + "appName": matchingLabels([]string{"app", "appName"}, labelNames), + "serviceName": matchingLabels([]string{"service", "serviceName", "service_name"}, labelNames), + "instanceName": matchingLabels([]string{"instance", "instanceName", "pod"}, labelNames), + }, + ContentFilters: []string{"traceId", "keywords"}, + FallbackLabel: fallbackSelectorLabel(labelNames), + SourceEngine: "loki", + }, nil +} + +func (c *lokiClient) queryRange(ctx context.Context, query string, start, end time.Time, limit int) ([]LogItem, error) { + queryURL, err := c.queryRangeURL(query, start, end, limit) + if err != nil { + return nil, err + } + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, queryURL, nil) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.InternalError, "failed to create loki query request") + } + if c.config.Tenant != "" { + httpReq.Header.Set("X-Scope-OrgID", c.config.Tenant) + } + + httpResp, err := c.client.Do(httpReq) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.NetWorkError, "failed to query loki") + } + defer httpResp.Body.Close() + if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { + body, _ := io.ReadAll(io.LimitReader(httpResp.Body, 4096)) + return nil, bizerror.New(bizerror.NetWorkError, + fmt.Sprintf("loki query failed with status %d: %s", httpResp.StatusCode, strings.TrimSpace(string(body)))) + } + + var lokiResp lokiQueryRangeResp + if err := json.NewDecoder(httpResp.Body).Decode(&lokiResp); err != nil { + return nil, bizerror.Wrap(err, bizerror.JsonError, "failed to decode loki query response") + } + if lokiResp.Status != "success" { + if lokiResp.Error != "" { + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki query failed: %s", lokiResp.Error)) + } + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki query returned status %q", lokiResp.Status)) + } + return normalizeLokiLogs(lokiResp), nil +} + +func (c *lokiClient) labelNames(ctx context.Context, start, end time.Time) (map[string]struct{}, error) { + cacheKey := c.labelCacheKey() + now := time.Now() + lokiLabelsCache.Lock() + if cached, ok := lokiLabelsCache.items[cacheKey]; ok && now.Before(cached.expiresAt) { + labels := cloneLabelSet(cached.labels) + lokiLabelsCache.Unlock() + return labels, nil + } + lokiLabelsCache.Unlock() + + labelsURL, err := c.labelsURL(start, end) + if err != nil { + return nil, err + } + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, labelsURL, nil) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.InternalError, "failed to create loki labels request") + } + if c.config.Tenant != "" { + httpReq.Header.Set("X-Scope-OrgID", c.config.Tenant) + } + + httpResp, err := c.client.Do(httpReq) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.NetWorkError, "failed to query loki labels") + } + defer httpResp.Body.Close() + if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { + body, _ := io.ReadAll(io.LimitReader(httpResp.Body, 4096)) + return nil, bizerror.New(bizerror.NetWorkError, + fmt.Sprintf("loki labels query failed with status %d: %s", httpResp.StatusCode, strings.TrimSpace(string(body)))) + } + + var labelsResp lokiLabelsResp + if err := json.NewDecoder(httpResp.Body).Decode(&labelsResp); err != nil { + return nil, bizerror.Wrap(err, bizerror.JsonError, "failed to decode loki labels response") + } + if labelsResp.Status != "success" { + if labelsResp.Error != "" { + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki labels query failed: %s", labelsResp.Error)) + } + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki labels query returned status %q", labelsResp.Status)) + } + + labels := make(map[string]struct{}, len(labelsResp.Data)) + for _, label := range labelsResp.Data { + labels[label] = struct{}{} + } + + lokiLabelsCache.Lock() + lokiLabelsCache.items[cacheKey] = cachedLokiLabels{ + labels: cloneLabelSet(labels), + expiresAt: now.Add(labelCacheTTL), + } + lokiLabelsCache.Unlock() + return labels, nil +} + +func (c *lokiClient) labelCacheKey() string { + return c.config.Endpoint + "|" + c.config.Tenant +} + +func cloneLabelSet(labels map[string]struct{}) map[string]struct{} { + if labels == nil { + return nil + } + cloned := make(map[string]struct{}, len(labels)) + for label := range labels { + cloned[label] = struct{}{} + } + return cloned +} + +// e.g: endpoint: {endpoint}/loki/api/v1/query_range?query={app="order-service"}&start=1717200000000000000&end=1717203600000000000&limit=100&direction=backward +func (c *lokiClient) queryRangeURL(logQL string, start, end time.Time, limit int) (string, error) { + baseURL, err := url.Parse(c.config.Endpoint) + if err != nil { + return "", bizerror.Wrap(err, bizerror.ConfigError, "invalid loki endpoint") + } + baseURL.Path = strings.TrimRight(baseURL.Path, "/") + "/loki/api/v1/query_range" + + query := baseURL.Query() + query.Set("query", logQL) + query.Set("start", strconv.FormatInt(start.UnixNano(), 10)) + query.Set("end", strconv.FormatInt(end.UnixNano(), 10)) + query.Set("limit", strconv.Itoa(limit)) + query.Set("direction", "backward") + baseURL.RawQuery = query.Encode() + return baseURL.String(), nil +} + +func (c *lokiClient) labelsURL(start, end time.Time) (string, error) { + baseURL, err := url.Parse(c.config.Endpoint) + if err != nil { + return "", bizerror.Wrap(err, bizerror.ConfigError, "invalid loki endpoint") + } + baseURL.Path = strings.TrimRight(baseURL.Path, "/") + "/loki/api/v1/labels" + + query := baseURL.Query() + query.Set("start", strconv.FormatInt(start.UnixNano(), 10)) + query.Set("end", strconv.FormatInt(end.UnixNano(), 10)) + baseURL.RawQuery = query.Encode() + return baseURL.String(), nil +} + +func buildLogQLQueries(req *SearchLogsReq) []string { + return buildLogQLQueriesWithLabels(req, nil) +} + +func buildLogQLQueriesWithLabels(req *SearchLogsReq, labelNames map[string]struct{}) []string { + selectors := buildStreamSelectorsWithLabels(req, labelNames) + queries := make([]string, 0, len(selectors)) + for _, selector := range selectors { + query := selector + if req.Keywords != "" { + query += " |= " + strconv.Quote(req.Keywords) + } + if req.TraceID != "" { + query += " |= " + strconv.Quote(req.TraceID) + } + queries = append(queries, query) + } + return queries +} + +func buildStreamSelectors(req *SearchLogsReq) []string { + return buildStreamSelectorsWithLabels(req, nil) +} + +func buildStreamSelectorsWithLabels(req *SearchLogsReq, labelNames map[string]struct{}) []string { + labelGroups := make([][]string, 0, 4) + if req.Mesh != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"mesh"}, req.Mesh, labelNames)) + } + if req.AppName != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"app", "appName"}, req.AppName, labelNames)) + } + if req.ServiceName != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"service", "serviceName", "service_name"}, req.ServiceName, labelNames)) + } + if req.InstanceName != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"instance", "instanceName", "pod"}, req.InstanceName, labelNames)) + } + if len(labelGroups) == 0 { + return []string{fmt.Sprintf("{%s=~%s}", fallbackSelectorLabel(labelNames), strconv.Quote(".+"))} + } + + // Cartesian product + selectors := []string{""} + for _, group := range labelGroups { + next := make([]string, 0, len(selectors)*len(group)) + for _, prefix := range selectors { + for _, matcher := range group { + if prefix == "" { + next = append(next, matcher) + } else { + next = append(next, prefix+", "+matcher) + } + } + } + selectors = next + } + + result := make([]string, 0, len(selectors)) + for _, selector := range selectors { + result = append(result, fmt.Sprintf("{%s}", selector)) + } + return result +} + +func labelMatchers(names []string, value string) []string { + return labelMatchersWithLabels(names, value, nil) +} + +func labelMatchersWithLabels(names []string, value string, labelNames map[string]struct{}) []string { + selected := selectExistingLabels(names, labelNames) + matchers := make([]string, 0, len(names)) + for _, name := range selected { + matchers = append(matchers, labelMatcher(name, value)) + } + return matchers +} + +func selectExistingLabels(names []string, labelNames map[string]struct{}) []string { + if len(labelNames) == 0 { + return names + } + selected := make([]string, 0, len(names)) + for _, name := range names { + if _, ok := labelNames[name]; ok { + selected = append(selected, name) + } + } + if len(selected) == 0 { + return names + } + return selected +} + +func matchingLabels(names []string, labelNames map[string]struct{}) []string { + selected := make([]string, 0, len(names)) + for _, name := range names { + if _, ok := labelNames[name]; ok { + selected = append(selected, name) + } + } + return selected +} + +func fallbackSelectorLabel(labelNames map[string]struct{}) string { + if len(labelNames) == 0 { + return "namespace" + } + for _, label := range fallbackSelectorPriority { + if _, ok := labelNames[label]; ok { + return label + } + } + return "namespace" +} + +func supportedLabels(labelNames map[string]struct{}) []string { + labels := make([]string, 0, len(labelNames)) + for label := range labelNames { + labels = append(labels, label) + } + sort.Strings(labels) + return labels +} + +func labelMatcher(name, value string) string { + return fmt.Sprintf("%s=%s", name, strconv.Quote(value)) +} + +func resolveTimeRange(startRaw, endRaw string) (time.Time, time.Time, error) { + end := time.Now() + if endRaw != "" { + parsed, err := parseLogTime("endTime", endRaw) + if err != nil { + return time.Time{}, time.Time{}, err + } + end = parsed + } + start := end.Add(-defaultQueryWindow) + if startRaw != "" { + parsed, err := parseLogTime("startTime", startRaw) + if err != nil { + return time.Time{}, time.Time{}, err + } + start = parsed + } + if start.After(end) { + return time.Time{}, time.Time{}, bizerror.New(bizerror.InvalidArgument, "startTime must be less than or equal to endTime") + } + return start, end, nil +} + +func parseLogTime(field, value string) (time.Time, error) { + if ts, err := time.Parse(time.RFC3339Nano, value); err == nil { + return ts, nil + } + if ns, err := strconv.ParseInt(value, 10, 64); err == nil { + return time.Unix(0, ns), nil + } + return time.Time{}, bizerror.New(bizerror.InvalidArgument, + fmt.Sprintf("%s must be RFC3339, RFC3339Nano, or Unix nanoseconds", field)) +} + +func normalizeLokiLogs(resp lokiQueryRangeResp) []LogItem { + logs := make([]LogItem, 0) + for _, stream := range resp.Data.Result { + for _, value := range stream.Values { + if len(value) < 2 { + continue + } + raw := value[1] + message := extractLogField(raw, "msg", "message") + if message == "" { + message = raw + } + logs = append(logs, LogItem{ + Timestamp: normalizeLogTimestamp(value[0], extractLogField(raw, "time", "timestamp")), + AppName: firstLabel(stream.Stream, "app", "appName"), + ServiceName: firstLabel(stream.Stream, "service", "serviceName", "service_name"), + InstanceName: firstLabel(stream.Stream, "instance", "instanceName", "pod"), + Severity: firstNonEmpty(extractLogField(raw, "level", "severity"), firstLabel(stream.Stream, "level", "severity", "detected_level")), + Message: message, + TraceID: extractLogField(raw, "trace_id", "traceId", "traceid"), + SpanID: extractLogField(raw, "span_id", "spanId", "spanid"), + TraceFlags: extractLogField(raw, "trace_flags", "traceFlags", "traceflags"), + Attributes: extraLabels(stream.Stream), + Raw: raw, + }) + } + } + return logs +} + +func normalizeLogTimestamp(lokiTimestamp, logTimestamp string) string { + if logTimestamp != "" { + if parsed, err := time.Parse(time.RFC3339Nano, logTimestamp); err == nil { + return parsed.UTC().Format(time.RFC3339Nano) + } + } + return normalizeLokiTimestamp(lokiTimestamp) +} + +func normalizeLokiTimestamp(value string) string { + ns, err := strconv.ParseInt(value, 10, 64) + if err != nil { + return value + } + return time.Unix(0, ns).UTC().Format(time.RFC3339Nano) +} + +// firstLabel returns the first label value that matches any of the keys, or an empty string if none matches +func firstLabel(labels map[string]string, keys ...string) string { + for _, key := range keys { + if value := labels[key]; value != "" { + return value + } + } + return "" +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if value != "" { + return value + } + } + return "" +} + +func extractLogField(message string, keys ...string) string { + if value := extractJSONLogField(message, keys...); value != "" { + return value + } + return extractTextLogField(message, keys...) +} + +func extractJSONLogField(message string, keys ...string) string { + var payload map[string]any + if err := json.Unmarshal([]byte(message), &payload); err != nil { + return "" + } + for _, key := range keys { + if value := stringifyLogField(payload[key]); value != "" { + return value + } + } + return "" +} + +func stringifyLogField(value any) string { + switch v := value.(type) { + case string: + return v + case float64, bool: + return fmt.Sprint(v) + default: + return "" + } +} + +func extractTextLogField(message string, keys ...string) string { + for _, key := range keys { + pattern := regexp.MustCompile(`(?i)(?:^|[\s{,])"?` + regexp.QuoteMeta(key) + `"?\s*[:=]\s*"?([^"\s,}]+)`) + if matches := pattern.FindStringSubmatch(message); len(matches) == 2 { + return matches[1] + } + } + return "" +} Review Comment: `extractTextLogField` recompiles a regular expression for every key on every log line (`regexp.MustCompile` inside the loop). This is unnecessarily expensive in log-heavy responses; cache the compiled regex per key and reuse it. ########## pkg/config/observability/logs.go: ########## @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package observability + +import ( + "fmt" + "net/url" + + "github.com/duke-git/lancet/v2/strutil" + + "github.com/apache/dubbo-admin/pkg/common/bizerror" +) + +type LogProviderType string + +const ( + LogProviderLoki LogProviderType = "loki" +) + +type LogsConfig struct { + DefaultProvider string `json:"defaultProvider" yaml:"defaultProvider"` + Providers []LogProviderConfig `json:"providers" yaml:"providers"` +} + +type LogProviderConfig struct { + Name string `json:"name" yaml:"name"` + Type LogProviderType `json:"type" yaml:"type"` + Endpoint string `json:"endpoint" yaml:"endpoint"` + Tenant string `json:"tenant,omitempty" yaml:"tenant,omitempty"` +} + +func (c *LogsConfig) Validate() error { + if c == nil || len(c.Providers) == 0 { + return nil + } + if strutil.IsBlank(c.DefaultProvider) { + return bizerror.New(bizerror.ConfigError, "default log provider is required") + } + + foundDefault := false + for _, provider := range c.Providers { + if strutil.IsBlank(provider.Name) { + return bizerror.New(bizerror.ConfigError, "log provider name is required") + } + if provider.Name == c.DefaultProvider { + foundDefault = true + } + if provider.Type != LogProviderLoki { + return bizerror.New(bizerror.ConfigError, fmt.Sprintf("unsupported log provider type: %s", provider.Type)) + } + if strutil.IsBlank(provider.Endpoint) { + return bizerror.New(bizerror.ConfigError, "log provider endpoint is required") + } + if _, err := url.Parse(provider.Endpoint); err != nil { + return bizerror.Wrap(err, bizerror.ConfigError, fmt.Sprintf("invalid log provider endpoint: %s", provider.Endpoint)) + } Review Comment: `url.Parse` accepts inputs like `localhost:3100` without returning an error (it treats them as paths), so invalid endpoints can pass validation and later fail at runtime. Validate that the parsed URL has a scheme and host. ########## pkg/mcp/register.go: ########## @@ -310,4 +311,35 @@ func RegisterTools(server *Server) { }, Handler: tools.GetApplicationServices, }) + + logSearchProperties := logtools.LogSearchProperties() + server.RegisterTool(&common.ToolDef{ + Name: "search_logs", + Description: "查询 Dubbo 服务日志,支持按应用、服务、实例、TraceID 和关键字过滤", + InputSchema: common.InputSchema{ + Type: "object", + Properties: logSearchProperties, + }, + Handler: logtools.SearchLogs, + }) + + server.RegisterTool(&common.ToolDef{ + Name: "analyze_error_logs", + Description: "分析错误日志并按错误模式聚合", + InputSchema: common.InputSchema{ + Type: "object", + Properties: logSearchProperties, + }, + Handler: logtools.AnalyzeErrorLogs, + }) + + server.RegisterTool(&common.ToolDef{ + Name: "get_log_capabilities", + Description: "获取日志查询能力,返回 Loki 当前可用 labels 以及查询参数到 labels 的映射", Review Comment: The PR description says it registers two MCP tools (`search_logs` and `analyze_error_logs`), but this file also registers `get_log_capabilities`. Either update the PR description to include the third tool or drop this registration if it's out of scope. ########## pkg/mcp/tools/log/loki.go: ########## @@ -0,0 +1,610 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package log + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "regexp" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/apache/dubbo-admin/pkg/common/bizerror" + observabilitycfg "github.com/apache/dubbo-admin/pkg/config/observability" +) + +const ( + defaultQueryWindow = time.Hour + labelCacheTTL = 5 * time.Minute +) + +var fallbackSelectorPriority = []string{ + "namespace", + "job", + "app", + "appName", + "service_name", + "serviceName", + "service", + "pod", + "container", + "instance", + "instanceName", + "level", +} + +var lokiLabelsCache = struct { + sync.Mutex + items map[string]cachedLokiLabels +}{ + items: map[string]cachedLokiLabels{}, +} + +type cachedLokiLabels struct { + labels map[string]struct{} + expiresAt time.Time +} + +type lokiClient struct { + config observabilitycfg.LogProviderConfig + client *http.Client +} + +type lokiQueryRangeResp struct { + Status string `json:"status"` + Data struct { + Result []lokiStream `json:"result"` + } `json:"data"` + Error string `json:"error,omitempty"` +} + +type lokiLabelsResp struct { + Status string `json:"status"` + Data []string `json:"data"` + Error string `json:"error,omitempty"` +} + +type lokiStream struct { + Stream map[string]string `json:"stream"` + Values [][]string `json:"values"` +} + +func newLokiClient(cfg observabilitycfg.LogProviderConfig) *lokiClient { + return &lokiClient{ + config: cfg, + client: &http.Client{Timeout: 30 * time.Second}, + } +} + +func (c *lokiClient) search(ctx context.Context, req *SearchLogsReq) (*SearchLogsResp, error) { + if req.Limit <= 0 { + req.Limit = defaultLogLimit + } + start, end, err := resolveTimeRange(req.StartTime, req.EndTime) + if err != nil { + return nil, err + } + + labelNames, _ := c.labelNames(ctx, start, end) + queries := buildLogQLQueriesWithLabels(req, labelNames) + merged := &SearchLogsResp{SourceEngine: "loki", Logs: make([]LogItem, 0, req.Limit)} + seen := map[string]struct{}{} + for _, query := range queries { + logs, err := c.queryRange(ctx, query, start, end, req.Limit) + if err != nil { + return nil, err + } + // remove duplicates + for _, item := range logs { + key := dedupeKey(item) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + merged.Logs = append(merged.Logs, item) + if len(merged.Logs) >= req.Limit { + break + } + } + if len(merged.Logs) >= req.Limit { + break + } + } + + sort.SliceStable(merged.Logs, func(i, j int) bool { + return merged.Logs[i].Timestamp > merged.Logs[j].Timestamp + }) + return merged, nil +} + +func (c *lokiClient) capabilities(ctx context.Context, req *LogCapabilitiesReq) (*LogCapabilitiesResp, error) { + start, end, err := resolveTimeRange(req.StartTime, req.EndTime) + if err != nil { + return nil, err + } + labelNames, err := c.labelNames(ctx, start, end) + if err != nil { + return nil, err + } + + return &LogCapabilitiesResp{ + AvailableLabels: supportedLabels(labelNames), + SupportedFilters: []string{ + "mesh", + "appName", + "serviceName", + "instanceName", + "traceId", + "keywords", + "startTime", + "endTime", + "limit", + }, + LabelFilters: map[string][]string{ + "mesh": matchingLabels([]string{"mesh"}, labelNames), + "appName": matchingLabels([]string{"app", "appName"}, labelNames), + "serviceName": matchingLabels([]string{"service", "serviceName", "service_name"}, labelNames), + "instanceName": matchingLabels([]string{"instance", "instanceName", "pod"}, labelNames), + }, + ContentFilters: []string{"traceId", "keywords"}, + FallbackLabel: fallbackSelectorLabel(labelNames), + SourceEngine: "loki", + }, nil +} + +func (c *lokiClient) queryRange(ctx context.Context, query string, start, end time.Time, limit int) ([]LogItem, error) { + queryURL, err := c.queryRangeURL(query, start, end, limit) + if err != nil { + return nil, err + } + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, queryURL, nil) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.InternalError, "failed to create loki query request") + } + if c.config.Tenant != "" { + httpReq.Header.Set("X-Scope-OrgID", c.config.Tenant) + } + + httpResp, err := c.client.Do(httpReq) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.NetWorkError, "failed to query loki") + } + defer httpResp.Body.Close() + if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { + body, _ := io.ReadAll(io.LimitReader(httpResp.Body, 4096)) + return nil, bizerror.New(bizerror.NetWorkError, + fmt.Sprintf("loki query failed with status %d: %s", httpResp.StatusCode, strings.TrimSpace(string(body)))) + } + + var lokiResp lokiQueryRangeResp + if err := json.NewDecoder(httpResp.Body).Decode(&lokiResp); err != nil { + return nil, bizerror.Wrap(err, bizerror.JsonError, "failed to decode loki query response") + } + if lokiResp.Status != "success" { + if lokiResp.Error != "" { + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki query failed: %s", lokiResp.Error)) + } + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki query returned status %q", lokiResp.Status)) + } + return normalizeLokiLogs(lokiResp), nil +} + +func (c *lokiClient) labelNames(ctx context.Context, start, end time.Time) (map[string]struct{}, error) { + cacheKey := c.labelCacheKey() + now := time.Now() + lokiLabelsCache.Lock() + if cached, ok := lokiLabelsCache.items[cacheKey]; ok && now.Before(cached.expiresAt) { + labels := cloneLabelSet(cached.labels) + lokiLabelsCache.Unlock() + return labels, nil + } + lokiLabelsCache.Unlock() + + labelsURL, err := c.labelsURL(start, end) + if err != nil { + return nil, err + } + httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, labelsURL, nil) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.InternalError, "failed to create loki labels request") + } + if c.config.Tenant != "" { + httpReq.Header.Set("X-Scope-OrgID", c.config.Tenant) + } + + httpResp, err := c.client.Do(httpReq) + if err != nil { + return nil, bizerror.Wrap(err, bizerror.NetWorkError, "failed to query loki labels") + } + defer httpResp.Body.Close() + if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { + body, _ := io.ReadAll(io.LimitReader(httpResp.Body, 4096)) + return nil, bizerror.New(bizerror.NetWorkError, + fmt.Sprintf("loki labels query failed with status %d: %s", httpResp.StatusCode, strings.TrimSpace(string(body)))) + } + + var labelsResp lokiLabelsResp + if err := json.NewDecoder(httpResp.Body).Decode(&labelsResp); err != nil { + return nil, bizerror.Wrap(err, bizerror.JsonError, "failed to decode loki labels response") + } + if labelsResp.Status != "success" { + if labelsResp.Error != "" { + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki labels query failed: %s", labelsResp.Error)) + } + return nil, bizerror.New(bizerror.NetWorkError, fmt.Sprintf("loki labels query returned status %q", labelsResp.Status)) + } + + labels := make(map[string]struct{}, len(labelsResp.Data)) + for _, label := range labelsResp.Data { + labels[label] = struct{}{} + } + + lokiLabelsCache.Lock() + lokiLabelsCache.items[cacheKey] = cachedLokiLabels{ + labels: cloneLabelSet(labels), + expiresAt: now.Add(labelCacheTTL), + } + lokiLabelsCache.Unlock() + return labels, nil +} + +func (c *lokiClient) labelCacheKey() string { + return c.config.Endpoint + "|" + c.config.Tenant +} + +func cloneLabelSet(labels map[string]struct{}) map[string]struct{} { + if labels == nil { + return nil + } + cloned := make(map[string]struct{}, len(labels)) + for label := range labels { + cloned[label] = struct{}{} + } + return cloned +} + +// e.g: endpoint: {endpoint}/loki/api/v1/query_range?query={app="order-service"}&start=1717200000000000000&end=1717203600000000000&limit=100&direction=backward +func (c *lokiClient) queryRangeURL(logQL string, start, end time.Time, limit int) (string, error) { + baseURL, err := url.Parse(c.config.Endpoint) + if err != nil { + return "", bizerror.Wrap(err, bizerror.ConfigError, "invalid loki endpoint") + } + baseURL.Path = strings.TrimRight(baseURL.Path, "/") + "/loki/api/v1/query_range" + + query := baseURL.Query() + query.Set("query", logQL) + query.Set("start", strconv.FormatInt(start.UnixNano(), 10)) + query.Set("end", strconv.FormatInt(end.UnixNano(), 10)) + query.Set("limit", strconv.Itoa(limit)) + query.Set("direction", "backward") + baseURL.RawQuery = query.Encode() + return baseURL.String(), nil +} + +func (c *lokiClient) labelsURL(start, end time.Time) (string, error) { + baseURL, err := url.Parse(c.config.Endpoint) + if err != nil { + return "", bizerror.Wrap(err, bizerror.ConfigError, "invalid loki endpoint") + } + baseURL.Path = strings.TrimRight(baseURL.Path, "/") + "/loki/api/v1/labels" + + query := baseURL.Query() + query.Set("start", strconv.FormatInt(start.UnixNano(), 10)) + query.Set("end", strconv.FormatInt(end.UnixNano(), 10)) + baseURL.RawQuery = query.Encode() + return baseURL.String(), nil +} + +func buildLogQLQueries(req *SearchLogsReq) []string { + return buildLogQLQueriesWithLabels(req, nil) +} + +func buildLogQLQueriesWithLabels(req *SearchLogsReq, labelNames map[string]struct{}) []string { + selectors := buildStreamSelectorsWithLabels(req, labelNames) + queries := make([]string, 0, len(selectors)) + for _, selector := range selectors { + query := selector + if req.Keywords != "" { + query += " |= " + strconv.Quote(req.Keywords) + } + if req.TraceID != "" { + query += " |= " + strconv.Quote(req.TraceID) + } + queries = append(queries, query) + } + return queries +} + +func buildStreamSelectors(req *SearchLogsReq) []string { + return buildStreamSelectorsWithLabels(req, nil) +} + +func buildStreamSelectorsWithLabels(req *SearchLogsReq, labelNames map[string]struct{}) []string { + labelGroups := make([][]string, 0, 4) + if req.Mesh != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"mesh"}, req.Mesh, labelNames)) + } + if req.AppName != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"app", "appName"}, req.AppName, labelNames)) + } + if req.ServiceName != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"service", "serviceName", "service_name"}, req.ServiceName, labelNames)) + } + if req.InstanceName != "" { + labelGroups = append(labelGroups, labelMatchersWithLabels([]string{"instance", "instanceName", "pod"}, req.InstanceName, labelNames)) + } + if len(labelGroups) == 0 { + return []string{fmt.Sprintf("{%s=~%s}", fallbackSelectorLabel(labelNames), strconv.Quote(".+"))} + } + + // Cartesian product + selectors := []string{""} + for _, group := range labelGroups { + next := make([]string, 0, len(selectors)*len(group)) + for _, prefix := range selectors { + for _, matcher := range group { + if prefix == "" { + next = append(next, matcher) + } else { + next = append(next, prefix+", "+matcher) + } + } + } + selectors = next + } + + result := make([]string, 0, len(selectors)) + for _, selector := range selectors { + result = append(result, fmt.Sprintf("{%s}", selector)) + } + return result +} + +func labelMatchers(names []string, value string) []string { + return labelMatchersWithLabels(names, value, nil) +} + +func labelMatchersWithLabels(names []string, value string, labelNames map[string]struct{}) []string { + selected := selectExistingLabels(names, labelNames) + matchers := make([]string, 0, len(names)) + for _, name := range selected { + matchers = append(matchers, labelMatcher(name, value)) + } + return matchers +} + +func selectExistingLabels(names []string, labelNames map[string]struct{}) []string { + if len(labelNames) == 0 { + return names + } + selected := make([]string, 0, len(names)) + for _, name := range names { + if _, ok := labelNames[name]; ok { + selected = append(selected, name) + } + } + if len(selected) == 0 { + return names + } + return selected +} + +func matchingLabels(names []string, labelNames map[string]struct{}) []string { + selected := make([]string, 0, len(names)) + for _, name := range names { + if _, ok := labelNames[name]; ok { + selected = append(selected, name) + } + } + return selected +} + +func fallbackSelectorLabel(labelNames map[string]struct{}) string { + if len(labelNames) == 0 { + return "namespace" + } + for _, label := range fallbackSelectorPriority { + if _, ok := labelNames[label]; ok { + return label + } + } + return "namespace" +} + +func supportedLabels(labelNames map[string]struct{}) []string { + labels := make([]string, 0, len(labelNames)) + for label := range labelNames { + labels = append(labels, label) + } + sort.Strings(labels) + return labels +} + +func labelMatcher(name, value string) string { + return fmt.Sprintf("%s=%s", name, strconv.Quote(value)) +} + +func resolveTimeRange(startRaw, endRaw string) (time.Time, time.Time, error) { + end := time.Now() + if endRaw != "" { + parsed, err := parseLogTime("endTime", endRaw) + if err != nil { + return time.Time{}, time.Time{}, err + } + end = parsed + } + start := end.Add(-defaultQueryWindow) + if startRaw != "" { + parsed, err := parseLogTime("startTime", startRaw) + if err != nil { + return time.Time{}, time.Time{}, err + } + start = parsed + } + if start.After(end) { + return time.Time{}, time.Time{}, bizerror.New(bizerror.InvalidArgument, "startTime must be less than or equal to endTime") + } + return start, end, nil +} + +func parseLogTime(field, value string) (time.Time, error) { + if ts, err := time.Parse(time.RFC3339Nano, value); err == nil { + return ts, nil + } + if ns, err := strconv.ParseInt(value, 10, 64); err == nil { + return time.Unix(0, ns), nil + } + return time.Time{}, bizerror.New(bizerror.InvalidArgument, + fmt.Sprintf("%s must be RFC3339, RFC3339Nano, or Unix nanoseconds", field)) +} + +func normalizeLokiLogs(resp lokiQueryRangeResp) []LogItem { + logs := make([]LogItem, 0) + for _, stream := range resp.Data.Result { + for _, value := range stream.Values { + if len(value) < 2 { + continue + } + raw := value[1] + message := extractLogField(raw, "msg", "message") + if message == "" { + message = raw + } + logs = append(logs, LogItem{ + Timestamp: normalizeLogTimestamp(value[0], extractLogField(raw, "time", "timestamp")), + AppName: firstLabel(stream.Stream, "app", "appName"), + ServiceName: firstLabel(stream.Stream, "service", "serviceName", "service_name"), + InstanceName: firstLabel(stream.Stream, "instance", "instanceName", "pod"), + Severity: firstNonEmpty(extractLogField(raw, "level", "severity"), firstLabel(stream.Stream, "level", "severity", "detected_level")), + Message: message, + TraceID: extractLogField(raw, "trace_id", "traceId", "traceid"), + SpanID: extractLogField(raw, "span_id", "spanId", "spanid"), + TraceFlags: extractLogField(raw, "trace_flags", "traceFlags", "traceflags"), + Attributes: extraLabels(stream.Stream), + Raw: raw, + }) + } + } + return logs +} + +func normalizeLogTimestamp(lokiTimestamp, logTimestamp string) string { + if logTimestamp != "" { + if parsed, err := time.Parse(time.RFC3339Nano, logTimestamp); err == nil { + return parsed.UTC().Format(time.RFC3339Nano) + } + } + return normalizeLokiTimestamp(lokiTimestamp) +} + +func normalizeLokiTimestamp(value string) string { + ns, err := strconv.ParseInt(value, 10, 64) + if err != nil { + return value + } + return time.Unix(0, ns).UTC().Format(time.RFC3339Nano) +} + +// firstLabel returns the first label value that matches any of the keys, or an empty string if none matches +func firstLabel(labels map[string]string, keys ...string) string { + for _, key := range keys { + if value := labels[key]; value != "" { + return value + } + } + return "" +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if value != "" { + return value + } + } + return "" +} + +func extractLogField(message string, keys ...string) string { + if value := extractJSONLogField(message, keys...); value != "" { + return value + } + return extractTextLogField(message, keys...) +} + +func extractJSONLogField(message string, keys ...string) string { + var payload map[string]any + if err := json.Unmarshal([]byte(message), &payload); err != nil { + return "" + } + for _, key := range keys { + if value := stringifyLogField(payload[key]); value != "" { + return value + } + } + return "" +} + +func stringifyLogField(value any) string { + switch v := value.(type) { + case string: + return v + case float64, bool: + return fmt.Sprint(v) + default: + return "" + } +} + +func extractTextLogField(message string, keys ...string) string { + for _, key := range keys { + pattern := regexp.MustCompile(`(?i)(?:^|[\s{,])"?` + regexp.QuoteMeta(key) + `"?\s*[:=]\s*"?([^"\s,}]+)`) + if matches := pattern.FindStringSubmatch(message); len(matches) == 2 { + return matches[1] + } + } + return "" +} + +// extraLabels returns all labels except the ones used to filter the logs +func extraLabels(labels map[string]string) map[string]string { + attrs := make(map[string]string) + for key, value := range labels { + switch key { + case "mesh", "app", "appName", "service", "serviceName", "service_name", "instance", "instanceName", + "pod", "level", "severity", "detected_level", "trace_id", "traceId", "traceid", "span_id", "spanId", "spanid": + continue Review Comment: `extraLabels` excludes `trace_id`/`span_id` keys but not `trace_flags` variants. If `trace_flags` is present as a Loki label, it will be duplicated under `attributes` instead of being treated as trace context consistently. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
