codeant-ai-for-open-source[bot] commented on code in PR #37200:
URL: https://github.com/apache/superset/pull/37200#discussion_r2757547073
##########
superset/mcp_service/middleware.py:
##########
@@ -755,3 +759,167 @@ def _filter_response(self, response: Any, object_type:
str, user: Any) -> Any:
"Unknown response type for field filtering: %s", type(response)
)
return response
+
+
+class ResponseSizeGuardMiddleware(Middleware):
+ """
+ Middleware that prevents oversized responses from overwhelming LLM clients.
+
+ When a tool response exceeds the configured token limit, this middleware
+ intercepts it and returns a helpful error message with suggestions for
+ reducing the response size.
+
+ This is critical for protecting LLM clients like Claude Desktop which can
+ crash or become unresponsive when receiving extremely large responses.
+
+ Configuration via MCP_RESPONSE_SIZE_CONFIG in superset_config.py:
+ - enabled: Toggle the guard on/off (default: True)
+ - token_limit: Maximum estimated tokens per response (default: 25,000)
+ - warn_threshold_pct: Log warnings above this % of limit (default: 80%)
+ - excluded_tools: Tools to skip checking
+ """
+
+ def __init__(
+ self,
+ token_limit: int = DEFAULT_TOKEN_LIMIT,
+ warn_threshold_pct: int = DEFAULT_WARN_THRESHOLD_PCT,
+ excluded_tools: list[str] | None = None,
+ ) -> None:
+ self.token_limit = token_limit
+ self.warn_threshold_pct = warn_threshold_pct
+ self.warn_threshold = int(token_limit * warn_threshold_pct / 100)
+ self.excluded_tools = set(excluded_tools or [])
+
+ async def on_call_tool(
+ self,
+ context: MiddlewareContext,
+ call_next: Callable[[MiddlewareContext], Awaitable[Any]],
+ ) -> Any:
+ """Check response size after tool execution."""
+ tool_name = getattr(context.message, "name", "unknown")
+
+ # Skip excluded tools
+ if tool_name in self.excluded_tools:
+ return await call_next(context)
+
+ # Execute the tool
+ response = await call_next(context)
+
+ # Estimate response token count (guard against huge responses causing
OOM)
+ from superset.mcp_service.utils.token_utils import (
+ estimate_response_tokens,
+ format_size_limit_error,
+ )
+
+ try:
+ estimated_tokens = estimate_response_tokens(response)
+ except MemoryError as me:
+ logger.warning(
+ "MemoryError while estimating tokens for %s: %s", tool_name, me
+ )
+ # Treat as over limit to avoid further serialization
+ estimated_tokens = self.token_limit + 1
+ except Exception as e: # noqa: BLE001
+ logger.warning(
+ "Failed to estimate response tokens for %s: %s", tool_name, e
+ )
+ # Conservative fallback: block rather than risk OOM
+ estimated_tokens = self.token_limit + 1
+
+ # Log warning if approaching limit
+ if estimated_tokens > self.warn_threshold:
+ logger.warning(
+ "Response size warning for %s: ~%d tokens (%.0f%% of %d
limit)",
+ tool_name,
+ estimated_tokens,
+ (estimated_tokens / self.token_limit) * 100,
+ self.token_limit,
+ )
+
+ # Block if over limit
+ if estimated_tokens > self.token_limit:
+ # Extract params for smart suggestions
+ params = getattr(context.message, "params", {}) or {}
+
+ # Log the blocked response
+ logger.error(
+ "Response blocked for %s: ~%d tokens exceeds limit of %d",
+ tool_name,
+ estimated_tokens,
+ self.token_limit,
+ )
+
+ # Log to event logger for monitoring
+ try:
+ user_id = get_user_id()
+ event_logger.log(
+ user_id=user_id,
+ action="mcp_response_size_exceeded",
+ curated_payload={
+ "tool": tool_name,
+ "estimated_tokens": estimated_tokens,
+ "token_limit": self.token_limit,
+ "params": params,
+ },
+ )
+ except Exception as log_error: # noqa: BLE001
+ logger.warning("Failed to log size exceeded event: %s",
log_error)
+
+ # Generate helpful error message with suggestions
+ # Avoid passing the full `response` (which may be huge) into the
formatter
+ # to prevent large-memory operations during error formatting.
+ error_message = format_size_limit_error(
+ tool_name=tool_name,
+ params=params,
+ estimated_tokens=estimated_tokens,
+ token_limit=self.token_limit,
+ response=None,
+ )
+
+ raise ToolError(error_message)
+
+ return response
+
+
+def create_response_size_guard_middleware() -> ResponseSizeGuardMiddleware |
None:
+ """
+ Factory function to create ResponseSizeGuardMiddleware from config.
+
+ Reads configuration from Flask app's MCP_RESPONSE_SIZE_CONFIG.
+ Returns None if the guard is disabled.
+
+ Returns:
+ ResponseSizeGuardMiddleware instance or None if disabled
+ """
+ try:
+ from superset.mcp_service.flask_singleton import get_flask_app
+ from superset.mcp_service.mcp_config import MCP_RESPONSE_SIZE_CONFIG
+
+ flask_app = get_flask_app()
+
+ # Get config from Flask app, falling back to defaults
+ config = flask_app.config.get(
+ "MCP_RESPONSE_SIZE_CONFIG", MCP_RESPONSE_SIZE_CONFIG
+ )
+
+ if not config.get("enabled", True):
+ logger.info("Response size guard is disabled")
+ return None
+
+ middleware = ResponseSizeGuardMiddleware(
+ token_limit=config.get("token_limit", DEFAULT_TOKEN_LIMIT),
+ warn_threshold_pct=config.get(
+ "warn_threshold_pct", DEFAULT_WARN_THRESHOLD_PCT
+ ),
Review Comment:
**Suggestion:** The factory passes `token_limit` and `warn_threshold_pct`
directly from the Flask config into the middleware without type coercion; if
these values are provided as strings or other non‑integers (which is common in
config), `ResponseSizeGuardMiddleware.__init__` will perform arithmetic on them
and raise a TypeError at runtime, preventing the MCP service from starting.
[type error]
<details>
<summary><b>Severity Level:</b> Major ⚠️</summary>
```mdx
- ❌ MCP middleware creation can raise TypeError at startup.
- ❌ Response size guard may fail to initialize and be unavailable.
- ⚠️ MCP tool calls proceed without size protection.
- ⚠️ Errors surface during application initialization.
```
</details>
```suggestion
raw_token_limit = config.get("token_limit", DEFAULT_TOKEN_LIMIT)
raw_warn_threshold_pct = config.get(
"warn_threshold_pct", DEFAULT_WARN_THRESHOLD_PCT
)
try:
token_limit = int(raw_token_limit)
except (TypeError, ValueError):
logger.warning(
"Invalid token_limit in MCP_RESPONSE_SIZE_CONFIG: %r; using
default %d",
raw_token_limit,
DEFAULT_TOKEN_LIMIT,
)
token_limit = DEFAULT_TOKEN_LIMIT
try:
warn_threshold_pct = int(raw_warn_threshold_pct)
except (TypeError, ValueError):
logger.warning(
"Invalid warn_threshold_pct in MCP_RESPONSE_SIZE_CONFIG: %r;
using default %d",
raw_warn_threshold_pct,
DEFAULT_WARN_THRESHOLD_PCT,
)
warn_threshold_pct = DEFAULT_WARN_THRESHOLD_PCT
middleware = ResponseSizeGuardMiddleware(
token_limit=token_limit,
warn_threshold_pct=warn_threshold_pct,
```
<details>
<summary><b>Steps of Reproduction ✅ </b></summary>
```mdx
1. Configure MCP application's Flask config with non-integer values in
superset_config.py:
set MCP_RESPONSE_SIZE_CONFIG = {"token_limit": "25000",
"warn_threshold_pct": "80"}
(this config is read in superset/mcp_service/middleware.py within
create_response_size_guard_middleware, see middleware.py lines ~907-915
where the
factory calls ResponseSizeGuardMiddleware).
2. Start the MCP service so application initialization runs
create_response_size_guard_middleware()
(superset/mcp_service/middleware.py:
create_response_size_guard_middleware call at
lines 909-915).
3. The factory passes the raw config values (strings) into
ResponseSizeGuardMiddleware(
token_limit=..., warn_threshold_pct=...)
(superset/mcp_service/middleware.py: lines
909-915).
4. Inside ResponseSizeGuardMiddleware.__init__
(superset/mcp_service/middleware.py:
~781-788),
the constructor performs arithmetic: self.warn_threshold =
int(token_limit *
warn_threshold_pct / 100).
If token_limit or warn_threshold_pct are strings, Python raises a
TypeError (e.g.
"can't multiply sequence by non-int of type 'str'") at that line, causing
middleware
creation to fail and bubble up during startup.
```
</details>
<details>
<summary><b>Prompt for AI Agent 🤖 </b></summary>
```mdx
This is a comment left during a code review.
**Path:** superset/mcp_service/middleware.py
**Line:** 909:913
**Comment:**
*Type Error: The factory passes `token_limit` and `warn_threshold_pct`
directly from the Flask config into the middleware without type coercion; if
these values are provided as strings or other non‑integers (which is common in
config), `ResponseSizeGuardMiddleware.__init__` will perform arithmetic on them
and raise a TypeError at runtime, preventing the MCP service from starting.
Validate the correctness of the flagged issue. If correct, How can I resolve
this? If you propose a fix, implement it and please make it concise.
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]