Re: [PR] [Recon-Chatbot] Call Recon methods directly instead of using HTTP-calls. [ozone]

via GitHub Mon, 22 Jun 2026 01:59:05 -0700


devmadhuu commented on code in PR #10524:
URL: https://github.com/apache/ozone/pull/10524#discussion_r3450758576



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/recon/ReconEndpointRouter.java:
##########
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.recon.chatbot.recon;
+
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import java.io.IOException;
+import java.util.Map;
+import javax.ws.rs.core.Response;
+import org.apache.hadoop.ozone.recon.api.BucketEndpoint;
+import org.apache.hadoop.ozone.recon.api.ClusterStateEndpoint;
+import org.apache.hadoop.ozone.recon.api.ContainerEndpoint;
+import org.apache.hadoop.ozone.recon.api.NSSummaryEndpoint;
+import org.apache.hadoop.ozone.recon.api.NodeEndpoint;
+import org.apache.hadoop.ozone.recon.api.OMDBInsightEndpoint;
+import org.apache.hadoop.ozone.recon.api.PipelineEndpoint;
+import org.apache.hadoop.ozone.recon.api.TaskStatusService;
+import org.apache.hadoop.ozone.recon.api.UtilizationEndpoint;
+import org.apache.hadoop.ozone.recon.api.VolumeEndpoint;
+
+/**
+ * Dispatches chatbot tool names to in-process Recon JAX-RS endpoint beans (no 
HTTP loopback).
+ *
+ * <p>Which tools may run is enforced upstream by {@code 
ChatbotAgent.validateToolCall} against
+ * {@link ReconApiAllowlist}; {@link #hasRoute(String)} mirrors that 
allowlist. An unknown
+ * {@code toolName} here still throws {@link IllegalArgumentException} as a 
defensive fallback.
+ *
+ * <p>All routes call injected endpoint beans directly in the same JVM.
+ */
+@Singleton
+public class ReconEndpointRouter {
+
+  private final ClusterStateEndpoint clusterStateEndpoint;
+  private final NodeEndpoint nodeEndpoint;
+  private final PipelineEndpoint pipelineEndpoint;
+  private final ContainerEndpoint containerEndpoint;
+  private final OMDBInsightEndpoint omdbInsightEndpoint;
+  private final VolumeEndpoint volumeEndpoint;
+  private final BucketEndpoint bucketEndpoint;
+  private final TaskStatusService taskStatusService;
+  private final UtilizationEndpoint utilizationEndpoint;
+  private final NSSummaryEndpoint nsSummaryEndpoint;
+  private final ReconApiAllowlist reconApiAllowlist;
+
+  @Inject
+  public ReconEndpointRouter(
+      ClusterStateEndpoint clusterStateEndpoint,
+      NodeEndpoint nodeEndpoint,
+      PipelineEndpoint pipelineEndpoint,
+      ContainerEndpoint containerEndpoint,
+      OMDBInsightEndpoint omdbInsightEndpoint,
+      VolumeEndpoint volumeEndpoint,
+      BucketEndpoint bucketEndpoint,
+      TaskStatusService taskStatusService,
+      UtilizationEndpoint utilizationEndpoint,
+      NSSummaryEndpoint nsSummaryEndpoint,
+      ReconApiAllowlist reconApiAllowlist) {
+    this.clusterStateEndpoint = clusterStateEndpoint;
+    this.nodeEndpoint = nodeEndpoint;
+    this.pipelineEndpoint = pipelineEndpoint;
+    this.containerEndpoint = containerEndpoint;
+    this.omdbInsightEndpoint = omdbInsightEndpoint;
+    this.volumeEndpoint = volumeEndpoint;
+    this.bucketEndpoint = bucketEndpoint;
+    this.taskStatusService = taskStatusService;
+    this.utilizationEndpoint = utilizationEndpoint;
+    this.nsSummaryEndpoint = nsSummaryEndpoint;
+    this.reconApiAllowlist = reconApiAllowlist;
+  }
+
+  public boolean hasRoute(String toolName) {
+    return reconApiAllowlist.isRegistered(toolName);
+  }
+
+  public Response route(String toolName, Map<String, String> params) throws 
IOException {
+    // limit is pre-clamped by ReconQueryExecutor; MAX_RECORDS_PER_CALL is a 
defensive fallback only.
+    int limit = parseInt(params.get("limit"), 
ReconQueryExecutor.MAX_RECORDS_PER_CALL);
+    String startPrefix = params.get("startPrefix") == null ? "" : 
params.get("startPrefix");
+
+    switch (toolName) {
+    case "api_v1_clusterState":
+      return clusterStateEndpoint.getClusterState();
+    case "api_v1_datanodes":
+      return nodeEndpoint.getDatanodes();
+    case "api_v1_pipelines":
+      return pipelineEndpoint.getPipelines();
+    case "api_v1_containers":
+      return containerEndpoint.getContainers(limit, 0L);
+    case "api_v1_containers_missing":
+      return containerEndpoint.getMissingContainers(limit);
+    case "api_v1_containers_unhealthy":
+      return routeUnhealthyContainers(params, limit);
+    case "api_v1_containers_unhealthy_state":
+      return routeUnhealthyContainersByState(params, limit);
+    case "api_v1_containers_deleted":
+      return containerEndpoint.getSCMDeletedContainers(limit, 0L);
+    case "api_v1_containers_mismatch":
+      return routeContainersMismatch(params, limit);
+    case "api_v1_containers_mismatch_deleted":
+      return containerEndpoint.getOmContainersDeletedInSCM(limit, 0L);
+    case "api_v1_containers_quasiClosed":
+      return containerEndpoint.getQuasiClosedContainers(
+          limit, parseLong(params.get("minContainerId"), 0L));
+    case "api_v1_containers_unhealthy_export":
+      return containerEndpoint.listExportJobs();
+    case "api_v1_keys_open":
+      return routeOpenKeys(params, limit, startPrefix);
+    case "api_v1_keys_open_summary":
+      return omdbInsightEndpoint.getOpenKeySummary();
+    case "api_v1_keys_open_mpu_summary":
+      return omdbInsightEndpoint.getOpenMPUKeySummary();
+    case "api_v1_keys_deletePending_summary":
+      return omdbInsightEndpoint.getDeletedKeySummary();
+    case "api_v1_keys_deletePending":
+      return omdbInsightEndpoint.getDeletedKeyInfo(limit, "", startPrefix);
+    case "api_v1_keys_deletePending_dirs":
+      return omdbInsightEndpoint.getDeletedDirInfo(limit, "");
+    case "api_v1_keys_deletePending_dirs_summary":
+      return omdbInsightEndpoint.getDeletedDirectorySummary();
+    case "api_v1_keys_listKeys":
+      return routeListKeys(params, limit, startPrefix);
+    case "api_v1_volumes":
+      return volumeEndpoint.getVolumes(limit, "");
+    case "api_v1_buckets":
+      return bucketEndpoint.getBuckets(params.get("volume"), limit, "");
+    case "api_v1_task_status":
+      return taskStatusService.getTaskStats();
+    case "api_v1_utilization_fileCount":
+      return routeFileCount(params);
+    case "api_v1_utilization_containerCount":
+      return 
utilizationEndpoint.getContainerCounts(parseLong(params.get("containerSize"), 
0L));
+    case "api_v1_namespace_summary":
+      return nsSummaryEndpoint.getBasicInfo(params.get("path"));
+    case "api_v1_namespace_usage":
+      return routeNamespaceUsage(params);
+    case "api_v1_namespace_quota":
+      return nsSummaryEndpoint.getQuotaUsage(params.get("path"));
+    case "api_v1_namespace_dist":
+      return nsSummaryEndpoint.getFileSizeDistribution(params.get("path"));
+    default:
+      throw new IllegalArgumentException("No in-process route for " + 
toolName);
+    }
+  }
+
+  private Response routeUnhealthyContainers(Map<String, String> params, int 
limit) {
+    long maxContainerId = parseLong(params.get("maxContainerId"), 0L);
+    long minContainerId = parseLong(params.get("minContainerId"), 0L);
+    return containerEndpoint.getUnhealthyContainers(limit, maxContainerId, 
minContainerId);
+  }
+
+  private Response routeUnhealthyContainersByState(Map<String, String> params, 
int limit) {
+    String state = params.get("state");
+    long maxContainerId = parseLong(params.get("maxContainerId"), 0L);
+    long minContainerId = parseLong(params.get("minContainerId"), 0L);
+    return containerEndpoint.getUnhealthyContainers(state, limit, 
maxContainerId, minContainerId);
+  }
+
+  private Response routeContainersMismatch(Map<String, String> params, int 
limit) {
+    String missingIn = params.get("missingIn") == null ? "" : 
params.get("missingIn");
+    return containerEndpoint.getContainerMisMatchInsights(limit, 0L, 
missingIn);
+  }
+
+  private Response routeOpenKeys(Map<String, String> params, int limit, String 
startPrefix) {
+    boolean includeFso = parseBoolean(params.get("includeFso"), false);

Review Comment:
   We should never hardcode param names like this, because then there is a 
disconnect between hardcoded value here and constant param defined there in API 
method. E.g: `@QueryParam(RECON_OPEN_KEY_INCLUDE_FSO)` . Correct for all API 
routes. So few suggestions:
   
   1. Centralize the parameter key names as constants shared between 
`LlmToolSpecFactory` and `ReconEndpointRouter` (single source of truth), so a 
rename is a compile-time change in one place.



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/agent/LlmToolSpecFactory.java:
##########
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.recon.chatbot.agent;
+
+import com.google.inject.Singleton;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.hadoop.ozone.recon.chatbot.llm.LLMClient.ToolSpec;
+
+/**
+ * Builds native LLM tool specifications (names, descriptions, parameters).
+ * Descriptions are semantic (when to use / not use) and complement 
recon-tool-semantics.md.
+ */
+@Singleton
+public class LlmToolSpecFactory {
+
+  public List<ToolSpec> getToolSpecs() {

Review Comment:
   This is mostly static data and should not be called with every request.



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/agent/ChatbotUtils.java:
##########
@@ -129,82 +71,21 @@ public static boolean isBucketScopedListKeysPrefix(String 
startPrefix) {
   // JSON & Text Utilities
   // =========================================================================
 
-  /**
-   * <p>LLMs sometimes wrap their JSON response in prose text (e.g. "Here is 
the result: {...}")
-   * despite being instructed to return JSON only. A simple greedy regex like 
{@code \{.*\}}
-   * fails for nested objects because it can match from the first {@code {} to 
the last {@code }}
-   * in the entire string, returning multiple concatenated objects or 
truncating nested ones.
-   *
-   * <p>This method uses brace-counting with string-awareness to reliably 
extract the first
-   * outermost JSON object regardless of surrounding text, nesting depth, or 
number of
-   * objects in the response:
-   *
-   * @param text the raw LLM response string, which may contain prose 
before/after JSON
-   * @return the first complete JSON object string, or {@code null} if none is 
found
-   */
-  public static String extractFirstJsonObject(String text) {
-    if (text == null) {
-      return null;
-    }
-    int depth = 0;
-    int start = -1;
-    boolean inString = false;
-    boolean escape = false;
-    for (int i = 0; i < text.length(); i++) {
-      char c = text.charAt(i);
-      if (escape) {
-        escape = false;
-        continue;
-      }
-      if (c == '\\' && inString) {
-        escape = true;
-        continue;
-      }
-      if (c == '"') {
-        inString = !inString;
-        continue;
-      }
-      if (inString) {
-        continue;
-      }
-      if (c == '{') {
-        if (depth == 0) {
-          start = i;
-        }
-        depth++;
-      } else if (c == '}') {
-        depth--;
-        if (depth == 0 && start != -1) {
-          return text.substring(start, i + 1);
-        }
-      }
-    }
-    return null;
-  }
-
   public static int parsePositiveInt(String value, int defaultValue) {
     if (StringUtils.isBlank(value)) {
       return defaultValue;
     }
     try {
       int parsed = Integer.parseInt(value.trim());
-      return parsed > 0 ? parsed : defaultValue;
+      if (parsed <= 0) {
+        throw new IllegalArgumentException("limit must be a positive integer");
+      }
+      return parsed;
     } catch (NumberFormatException e) {
       return defaultValue;
     }
   }
 
-  public static String extractStringField(JsonNode node, String field) {
-    if (node == null || field == null || field.isEmpty()) {
-      return null;
-    }
-    JsonNode fieldNode = node.get(field);
-    if (fieldNode == null || fieldNode.isNull()) {
-      return null;
-    }
-    return fieldNode.asText("");
-  }
-
   public static int estimateRecordCount(JsonNode response) {

Review Comment:
   There are many APIs, the response of them doesn't treat `data` or `keys` as 
arrays. so this method may return 0 even if truncation has happened for some 
APIs output. E.g.  User can ask "`list all containers`", default limit or cap 
is 1000, but the response of API method will return `data` as an object not as 
an array, so this method will return 0 and will happily answer "there are 1000 
containers" or summarize a partial set as if it were complete.



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/chatbot/agent/ChatbotAgent.java:
##########
@@ -497,349 +504,167 @@ private String buildSummarizationUserPrompt(String 
userQuery,
     return sb.toString();
   }
 
-  private String buildClarificationForToolCalls(List<ToolCall> toolCalls) {
-    List<String> clarificationMessages = new ArrayList<>();
-    for (ToolCall toolCall : toolCalls) {
-      String clarification = validateToolCallForExecution(toolCall);
-      if (clarification != null) {
-        clarificationMessages.add(clarification);
+  /**
+   * Validates each tool call and returns the first error message, if any.
+   *
+   * @return error message when a tool call is not allowed; {@code null} when 
all pass
+   */
+  private String validateToolCalls(List<ToolSelection> toolCalls) {
+    for (ToolSelection toolCall : toolCalls) {
+      String error = validateToolCall(toolCall.toolName(), 
toolCall.parameters());
+      if (error != null) {
+        return error;
       }
     }
-    if (clarificationMessages.isEmpty()) {
-      return null;
-    }
-    return clarificationMessages.get(0);
+    return null;
   }
 
   /**
-   * Safety check: validates the endpoint the LLM wants to call before 
ToolExecutor
-   * makes any network request.
-   * <p>
-   * Two layers of defence:
-   * <p>
-   * 1. Allowlist check (always active): the normalised endpoint path must 
start with
-   * one of the known Recon API prefixes in ALLOWED_ENDPOINT_PREFIXES. This is 
the
-   * hard Java-side guard against prompt injection — regardless of what the LLM
-   * was tricked into outputting, only pre-approved paths can ever be called.
-   * <p>
-   * 2. Safe-scope check (when requireSafeScope is true): additional 
validation for
-   * endpoints that can return unbounded data, e.g. /keys/listKeys requires a
-   * bucket-scoped startPrefix to avoid memory exhaustion.
+   * Safety check before executing a tool call.
+   *
+   * <p>Returns {@code null} when the call is allowed. Otherwise returns a 
message shown
+   * directly to the user explaining why execution was blocked.
+   *
+   * <p>Two layers of defence:
+   * <ol>
+   *   <li>Allowlist — only registered tool names from {@link 
ReconApiAllowlist} may run.</li>
+   *   <li>Safe-scope (when {@code requireSafeScope} is true) — {@code 
api_v1_keys_listKeys}
+   *       requires a bucket-scoped {@code startPrefix}.</li>
+   * </ol>
    */
-  private String validateToolCallForExecution(ToolCall toolCall) {
-    if (toolCall == null || toolCall.getEndpoint() == null) {
+  private String validateToolCall(String toolName, Map<String, String> 
parameters) {
+    if (toolName == null) {
       return null;
     }
-    String rawEndpoint = 
ChatbotUtils.normalizeEndpoint(toolCall.getEndpoint());
-    String endpoint = ChatbotUtils.canonicalizeEndpointPath(rawEndpoint);
-    if (endpoint.isEmpty()) {
-      LOG.warn("Blocked invalid endpoint path from LLM output: {}", 
rawEndpoint);
-      return "I can only query known Recon APIs. The requested endpoint '" +
-          rawEndpoint + "' is not in the list of permitted paths.";
-    }
 
-    // Layer 1: Allowlist — reject anything not in our known-safe prefix set.
-    boolean allowed = false;
-    for (String prefix : ALLOWED_ENDPOINT_PREFIXES) {
-      if (ChatbotUtils.matchesAllowedPrefix(endpoint, prefix)) {
-        allowed = true;
-        break;
-      }
-    }
-    if (!allowed) {
-      LOG.warn("Blocked disallowed endpoint from LLM output: {}", endpoint);
-      return "I can only query known Recon APIs. The requested endpoint '" +
-          endpoint + "' is not in the list of permitted paths.";
+    // Layer 1: Allowlist — only registered Recon tools are ever permitted.
+    if (!reconApiAllowlist.isRegistered(toolName)) {
+      LOG.warn("Blocked disallowed toolName from LLM output: {}", toolName);
+      return "I can only query known Recon APIs. The requested tool '" +
+          toolName + "' is not in the list of permitted tools.";
     }
 
-    // Layer 2: Safe-scope check for endpoints that can return unbounded data.
-    if (!requireSafeScope) {
-      return null;
+    // Layer 2: Safe-scope — listKeys can return unbounded data, so when the 
safe-scope guard
+    // is enabled we require startPrefix to be scoped to at least 
/<volume>/<bucket>. Only this
+    // one tool is affected; everything else has already passed Layer 1 and is 
good to run.
+    if (requireSafeScope && LIST_KEYS_TOOL.equals(toolName) && 
!hasBucketScopedPrefix(parameters)) {
+      return "I need a bucket-scoped prefix to run listKeys. " +
+          "This chatbot returns at most 1000 records per request and is not a 
" +
+          "cluster-wide search engine. Please provide startPrefix as " +
+          "/<volume>/<bucket> (optionally with a deeper path), and an optional 
" +
+          "limit up to 1000 to narrow the sample.";
     }
 
-    if (!endpoint.endsWith(LIST_KEYS_ENDPOINT_SUFFIX)) {
-      return null;
-    }
-
-    String startPrefix = null;
-    if (toolCall.getParameters() != null) {
-      startPrefix = toolCall.getParameters().get("startPrefix");
-    }
-    if (!ChatbotUtils.isBucketScopedListKeysPrefix(startPrefix)) {
-      return "I need a bucket-scoped prefix to run listKeys safely. " +
-          "Please provide startPrefix in the form /<volume>/<bucket> " +
-          "(optionally with a deeper path), plus optional limit and page " +
-          "range if you want targeted analysis.";
-    }
+    // All checks passed — null signals "allowed" to the caller in 
processQuery.
     return null;
   }
 
-  private String buildResponseKey(ToolCall toolCall, int index, int total) {
-    String endpoint = toolCall == null ? "unknown" : toolCall.getEndpoint();
+  /**
+   * Returns true when {@code parameters} contains a {@code startPrefix} 
scoped to at least
+   * {@code /<volume>/<bucket>}. Used by the listKeys safe-scope check.
+   */
+  private static boolean hasBucketScopedPrefix(Map<String, String> parameters) 
{
+    String startPrefix = parameters == null ? null : 
parameters.get("startPrefix");
+    return ChatbotUtils.isBucketScopedListKeysPrefix(startPrefix);
+  }
+
+  private String buildResponseKey(ToolSelection toolCall, int index, int 
total) {
+    String toolName = toolCall == null ? "unknown" : toolCall.toolName();
     if (total <= 1) {
-      return endpoint;
+      return toolName;
     }
-    return endpoint + " [call " + (index + 1) + "]";
+    return toolName + " [call " + (index + 1) + "]";
   }
 
   private Map<String, Object> createExecutionMetadataMap(
-      ToolExecutor.ToolExecutionOutcome outcome) {
+      ReconQueryResult outcome) {
     Map<String, Object> metadata = new HashMap<>();
     metadata.put("recordsProcessed", outcome.getRecordsProcessed());
-    metadata.put("pagesFetched", outcome.getPagesFetched());
     metadata.put("truncated", outcome.isTruncated());

Review Comment:
   These `recordsProcessed` and `truncated` information might be always going 
wrong based on above comment and LLM will get confused based on the full 
response JSON and, right after it, the execution metadata. So we should handle 
the first truncate data records to avoid any false positives.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [Recon-Chatbot] Call Recon methods directly instead of using HTTP-calls. [ozone]

Reply via email to