aminghadersohi commented on code in PR #37200:
URL: https://github.com/apache/superset/pull/37200#discussion_r2699876815


##########
superset/mcp_service/middleware.py:
##########
@@ -755,3 +755,150 @@ def _filter_response(self, response: Any, object_type: 
str, user: Any) -> Any:
                 "Unknown response type for field filtering: %s", type(response)
             )
             return response
+
+
+class ResponseSizeGuardMiddleware(Middleware):
+    """
+    Middleware that prevents oversized responses from overwhelming LLM clients.
+
+    When a tool response exceeds the configured token limit, this middleware
+    intercepts it and returns a helpful error message with suggestions for
+    reducing the response size.
+
+    This is critical for protecting LLM clients like Claude Desktop which can
+    crash or become unresponsive when receiving extremely large responses.
+
+    Configuration via MCP_RESPONSE_SIZE_CONFIG in superset_config.py:
+    - enabled: Toggle the guard on/off (default: True)
+    - token_limit: Maximum estimated tokens per response (default: 50,000)
+    - warn_threshold_pct: Log warnings above this % of limit (default: 80%)
+    - excluded_tools: Tools to skip checking
+    """
+
+    def __init__(
+        self,
+        token_limit: int = 25000,
+        warn_threshold_pct: int = 80,
+        excluded_tools: list[str] | None = None,
+    ) -> None:
+        self.token_limit = token_limit
+        self.warn_threshold_pct = warn_threshold_pct
+        self.warn_threshold = int(token_limit * warn_threshold_pct / 100)
+        self.excluded_tools = set(excluded_tools or [])
+
+    async def on_call_tool(
+        self,
+        context: MiddlewareContext,
+        call_next: Callable[[MiddlewareContext], Awaitable[Any]],
+    ) -> Any:
+        """Check response size after tool execution."""
+        tool_name = getattr(context.message, "name", "unknown")
+
+        # Skip excluded tools
+        if tool_name in self.excluded_tools:
+            return await call_next(context)
+
+        # Execute the tool
+        response = await call_next(context)
+
+        # Estimate response token count
+        from superset.mcp_service.utils.token_utils import (
+            estimate_response_tokens,
+            format_size_limit_error,
+        )
+
+        estimated_tokens = estimate_response_tokens(response)
+
+        # Log warning if approaching limit
+        if estimated_tokens > self.warn_threshold:
+            logger.warning(
+                "Response size warning for %s: ~%d tokens (%.0f%% of %d 
limit)",
+                tool_name,
+                estimated_tokens,
+                (estimated_tokens / self.token_limit) * 100,
+                self.token_limit,
+            )
+
+        # Block if over limit
+        if estimated_tokens > self.token_limit:
+            # Extract params for smart suggestions
+            params = getattr(context.message, "params", {}) or {}
+
+            # Log the blocked response
+            logger.error(
+                "Response blocked for %s: ~%d tokens exceeds limit of %d",
+                tool_name,
+                estimated_tokens,
+                self.token_limit,
+            )
+
+            # Log to event logger for monitoring
+            try:
+                user_id = get_user_id()
+                event_logger.log(
+                    user_id=user_id,
+                    action="mcp_response_size_exceeded",
+                    curated_payload={
+                        "tool": tool_name,
+                        "estimated_tokens": estimated_tokens,
+                        "token_limit": self.token_limit,
+                        "params": params,
+                    },
+                )
+            except Exception as log_error:
+                logger.warning("Failed to log size exceeded event: %s", 
log_error)
+
+            # Generate helpful error message with suggestions
+            error_message = format_size_limit_error(
+                tool_name=tool_name,
+                params=params,
+                estimated_tokens=estimated_tokens,
+                token_limit=self.token_limit,
+                response=response,

Review Comment:
   ✅ **Addressed in commits `4c0fe55468` and `03e88de11b`**
   
   Both parts of this suggestion are now implemented:
   
   1. **Try/except around token estimation:**
   ```python
   try:
       estimated_tokens = estimate_response_tokens(response)
   except MemoryError as me:
       logger.warning("MemoryError while estimating tokens for %s: %s", 
tool_name, me)
       estimated_tokens = self.token_limit + 1
   except Exception as e:  # noqa: BLE001
       logger.warning("Failed to estimate response tokens for %s: %s", 
tool_name, e)
       estimated_tokens = self.token_limit + 1
   ```
   
   2. **Pass `None` instead of full response to formatter:**
   ```python
   error_message = format_size_limit_error(
       tool_name=tool_name,
       params=params,
       estimated_tokens=estimated_tokens,
       token_limit=self.token_limit,
       response=None,  # Avoids double serialization
   )
   ```
   
   This prevents OOM on large responses by:
   - Treating any serialization failure as "over limit" (safe default)
   - Avoiding a second serialization pass in the error formatter



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to