betodealmeida commented on code in PR #37200:
URL: https://github.com/apache/superset/pull/37200#discussion_r2701274031
##########
superset/mcp_service/mcp_config.py:
##########
@@ -155,6 +155,50 @@
],
}
+# =============================================================================
+# MCP Response Size Guard Configuration
+# =============================================================================
+#
+# Overview:
+# ---------
+# The Response Size Guard prevents oversized responses from overwhelming LLM
+# clients (e.g., Claude Desktop). When a tool response exceeds the token limit,
+# it returns a helpful error with suggestions for reducing the response size.
+#
+# How it works:
+# -------------
+# 1. After a tool executes, the middleware estimates the response's token count
+# 2. If the response exceeds the configured limit, it blocks the response
+# 3. Instead, it returns an error message with smart suggestions:
+# - Reduce page_size/limit
+# - Use select_columns to exclude large fields
+# - Add filters to narrow results
+# - Tool-specific recommendations
+#
+# Configuration:
+# --------------
+# - enabled: Toggle the guard on/off (default: True)
+# - token_limit: Maximum estimated tokens per response (default: 25,000)
+# - excluded_tools: Tools to skip checking (e.g., streaming tools)
+# - warn_threshold_pct: Log warnings above this % of limit (default: 80%)
+#
+# Token Estimation:
+# -----------------
+# Uses character-based heuristic (~3.5 chars per token for JSON).
+# This is intentionally conservative to avoid underestimating.
+# =============================================================================
+MCP_RESPONSE_SIZE_CONFIG: Dict[str, Any] = {
+ "enabled": True, # Enabled by default to protect LLM clients
+ "token_limit": 25000, # ~25k tokens prevents overwhelming LLM context
windows
Review Comment:
Nit, this is easier to read:
```suggestion
"token_limit": 25_000, # ~25k tokens prevents overwhelming LLM context
windows
```
##########
superset/mcp_service/middleware.py:
##########
@@ -755,3 +755,165 @@ def _filter_response(self, response: Any, object_type:
str, user: Any) -> Any:
"Unknown response type for field filtering: %s", type(response)
)
return response
+
+
+class ResponseSizeGuardMiddleware(Middleware):
+ """
+ Middleware that prevents oversized responses from overwhelming LLM clients.
+
+ When a tool response exceeds the configured token limit, this middleware
+ intercepts it and returns a helpful error message with suggestions for
+ reducing the response size.
+
+ This is critical for protecting LLM clients like Claude Desktop which can
+ crash or become unresponsive when receiving extremely large responses.
+
+ Configuration via MCP_RESPONSE_SIZE_CONFIG in superset_config.py:
+ - enabled: Toggle the guard on/off (default: True)
+ - token_limit: Maximum estimated tokens per response (default: 25,000)
+ - warn_threshold_pct: Log warnings above this % of limit (default: 80%)
+ - excluded_tools: Tools to skip checking
+ """
+
+ def __init__(
+ self,
+ token_limit: int = 25000,
Review Comment:
```suggestion
token_limit: int = 25_000,
```
##########
superset/mcp_service/server.py:
##########
@@ -104,6 +104,17 @@ def run_server(
# Build middleware list
middleware_list = []
+
+ # Add response size guard (protects LLM clients from huge responses)
+ from superset.mcp_service.middleware import (
+ create_response_size_guard_middleware,
+ )
+
+ size_guard_middleware = create_response_size_guard_middleware()
+ if size_guard_middleware:
Review Comment:
```suggestion
if size_guard_middleware := create_response_size_guard_middleware():
```
##########
tests/unit_tests/mcp_service/test_middleware.py:
##########
@@ -0,0 +1,336 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Unit tests for MCP service middleware.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from fastmcp.exceptions import ToolError
+
+from superset.mcp_service.middleware import (
+ create_response_size_guard_middleware,
+ ResponseSizeGuardMiddleware,
+)
+
+
+class TestResponseSizeGuardMiddleware:
+ """Test ResponseSizeGuardMiddleware class."""
+
+ def test_init_default_values(self):
+ """Should initialize with default values."""
+ middleware = ResponseSizeGuardMiddleware()
+ assert middleware.token_limit == 25000
+ assert middleware.warn_threshold_pct == 80
+ assert middleware.warn_threshold == 20000
+ assert middleware.excluded_tools == set()
+
+ def test_init_custom_values(self):
+ """Should initialize with custom values."""
+ middleware = ResponseSizeGuardMiddleware(
+ token_limit=10000,
+ warn_threshold_pct=70,
+ excluded_tools=["health_check", "get_chart_preview"],
+ )
+ assert middleware.token_limit == 10000
+ assert middleware.warn_threshold_pct == 70
+ assert middleware.warn_threshold == 7000
+ assert middleware.excluded_tools == {"health_check",
"get_chart_preview"}
+
+ @pytest.mark.asyncio
+ async def test_allows_small_response(self):
+ """Should allow responses under token limit."""
+ middleware = ResponseSizeGuardMiddleware(token_limit=25000)
+
+ # Create mock context
+ context = MagicMock()
+ context.message.name = "list_charts"
+ context.message.params = {}
+
+ # Create mock call_next that returns small response
+ small_response = {"charts": [{"id": 1, "name": "test"}]}
+ call_next = AsyncMock(return_value=small_response)
+
+ with (
+ patch("superset.mcp_service.middleware.get_user_id",
return_value=1),
+ patch("superset.mcp_service.middleware.event_logger"),
+ ):
+ result = await middleware.on_call_tool(context, call_next)
+
+ assert result == small_response
+ call_next.assert_called_once_with(context)
+
+ @pytest.mark.asyncio
+ async def test_blocks_large_response(self):
+ """Should block responses over token limit."""
+ middleware = ResponseSizeGuardMiddleware(token_limit=100) # Very low
limit
+
+ # Create mock context
+ context = MagicMock()
+ context.message.name = "list_charts"
+ context.message.params = {"page_size": 100}
+
+ # Create large response
+ large_response = {
+ "charts": [{"id": i, "name": f"chart_{i}"} for i in range(1000)]
+ }
+ call_next = AsyncMock(return_value=large_response)
+
+ with (
+ patch("superset.mcp_service.middleware.get_user_id",
return_value=1),
+ patch("superset.mcp_service.middleware.event_logger"),
+ ):
+ with pytest.raises(ToolError) as exc_info:
Review Comment:
Add this to the `with(...)` above?
##########
superset/mcp_service/middleware.py:
##########
@@ -755,3 +755,165 @@ def _filter_response(self, response: Any, object_type:
str, user: Any) -> Any:
"Unknown response type for field filtering: %s", type(response)
)
return response
+
+
+class ResponseSizeGuardMiddleware(Middleware):
+ """
+ Middleware that prevents oversized responses from overwhelming LLM clients.
+
+ When a tool response exceeds the configured token limit, this middleware
+ intercepts it and returns a helpful error message with suggestions for
+ reducing the response size.
+
+ This is critical for protecting LLM clients like Claude Desktop which can
+ crash or become unresponsive when receiving extremely large responses.
+
+ Configuration via MCP_RESPONSE_SIZE_CONFIG in superset_config.py:
+ - enabled: Toggle the guard on/off (default: True)
+ - token_limit: Maximum estimated tokens per response (default: 25,000)
+ - warn_threshold_pct: Log warnings above this % of limit (default: 80%)
+ - excluded_tools: Tools to skip checking
+ """
+
+ def __init__(
+ self,
+ token_limit: int = 25000,
+ warn_threshold_pct: int = 80,
+ excluded_tools: list[str] | None = None,
+ ) -> None:
+ self.token_limit = token_limit
+ self.warn_threshold_pct = warn_threshold_pct
+ self.warn_threshold = int(token_limit * warn_threshold_pct / 100)
+ self.excluded_tools = set(excluded_tools or [])
+
+ async def on_call_tool(
+ self,
+ context: MiddlewareContext,
+ call_next: Callable[[MiddlewareContext], Awaitable[Any]],
+ ) -> Any:
+ """Check response size after tool execution."""
+ tool_name = getattr(context.message, "name", "unknown")
+
+ # Skip excluded tools
+ if tool_name in self.excluded_tools:
+ return await call_next(context)
+
+ # Execute the tool
+ response = await call_next(context)
+
+ # Estimate response token count (guard against huge responses causing
OOM)
+ from superset.mcp_service.utils.token_utils import (
+ estimate_response_tokens,
+ format_size_limit_error,
+ )
+
+ try:
+ estimated_tokens = estimate_response_tokens(response)
+ except MemoryError as me:
+ logger.warning(
+ "MemoryError while estimating tokens for %s: %s", tool_name, me
+ )
+ # Treat as over limit to avoid further serialization
+ estimated_tokens = self.token_limit + 1
+ except Exception as e: # noqa: BLE001
+ logger.warning(
+ "Failed to estimate response tokens for %s: %s", tool_name, e
+ )
+ # Conservative fallback: block rather than risk OOM
+ estimated_tokens = self.token_limit + 1
+
+ # Log warning if approaching limit
+ if estimated_tokens > self.warn_threshold:
+ logger.warning(
+ "Response size warning for %s: ~%d tokens (%.0f%% of %d
limit)",
+ tool_name,
+ estimated_tokens,
+ (estimated_tokens / self.token_limit) * 100,
+ self.token_limit,
+ )
+
+ # Block if over limit
+ if estimated_tokens > self.token_limit:
+ # Extract params for smart suggestions
+ params = getattr(context.message, "params", {}) or {}
+
+ # Log the blocked response
+ logger.error(
+ "Response blocked for %s: ~%d tokens exceeds limit of %d",
+ tool_name,
+ estimated_tokens,
+ self.token_limit,
+ )
+
+ # Log to event logger for monitoring
+ try:
+ user_id = get_user_id()
+ event_logger.log(
+ user_id=user_id,
+ action="mcp_response_size_exceeded",
+ curated_payload={
+ "tool": tool_name,
+ "estimated_tokens": estimated_tokens,
+ "token_limit": self.token_limit,
+ "params": params,
+ },
+ )
+ except Exception as log_error: # noqa: BLE001
+ logger.warning("Failed to log size exceeded event: %s",
log_error)
+
+ # Generate helpful error message with suggestions
+ # Avoid passing the full `response` (which may be huge) into the
formatter
+ # to prevent large-memory operations during error formatting.
+ error_message = format_size_limit_error(
+ tool_name=tool_name,
+ params=params,
+ estimated_tokens=estimated_tokens,
+ token_limit=self.token_limit,
+ response=None,
+ )
+
+ raise ToolError(error_message)
+
+ return response
+
+
+def create_response_size_guard_middleware() -> ResponseSizeGuardMiddleware |
None:
+ """
+ Factory function to create ResponseSizeGuardMiddleware from config.
+
+ Reads configuration from Flask app's MCP_RESPONSE_SIZE_CONFIG.
+ Returns None if the guard is disabled.
+
+ Returns:
+ ResponseSizeGuardMiddleware instance or None if disabled
+ """
+ try:
+ from superset.mcp_service.flask_singleton import get_flask_app
+ from superset.mcp_service.mcp_config import MCP_RESPONSE_SIZE_CONFIG
+
+ flask_app = get_flask_app()
+
+ # Get config from Flask app, falling back to defaults
+ config = flask_app.config.get(
+ "MCP_RESPONSE_SIZE_CONFIG", MCP_RESPONSE_SIZE_CONFIG
+ )
+
+ if not config.get("enabled", True):
+ logger.info("Response size guard is disabled")
+ return None
+
+ middleware = ResponseSizeGuardMiddleware(
+ token_limit=config.get("token_limit", 25000),
Review Comment:
```suggestion
token_limit=config.get("token_limit", 25_000),
```
Actually, can we put this in a `constants.py` file?
##########
superset/mcp_service/utils/token_utils.py:
##########
@@ -0,0 +1,408 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Token counting and response size utilities for MCP service.
+
+This module provides utilities to estimate token counts and generate smart
+suggestions when responses exceed configured limits. This prevents large
+responses from overwhelming LLM clients like Claude Desktop.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List
+
+logger = logging.getLogger(__name__)
+
+# Approximate characters per token for estimation
+# Claude tokenizer averages ~4 chars per token for English text
+# JSON tends to be more verbose, so we use a slightly lower ratio
+CHARS_PER_TOKEN = 3.5
+
+
+def estimate_token_count(text: str | bytes) -> int:
+ """
+ Estimate the token count for a given text.
+
+ Uses a character-based heuristic since we don't have direct access to
+ the actual tokenizer. This is conservative to avoid underestimating.
+
+ Args:
+ text: The text to estimate tokens for (string or bytes)
+
+ Returns:
+ Estimated number of tokens
+ """
+ if isinstance(text, bytes):
+ text = text.decode("utf-8", errors="replace")
+
+ # Simple heuristic: ~3.5 characters per token for JSON/code
+ return int(len(text) / CHARS_PER_TOKEN)
+
+
+def estimate_response_tokens(response: Any) -> int:
+ """
+ Estimate token count for an MCP tool response.
+
+ Handles various response types including Pydantic models, dicts, and
strings.
+
+ Args:
+ response: The response object to estimate
+
+ Returns:
+ Estimated number of tokens
+ """
+ try:
+ from superset.utils import json
+
+ # Convert response to JSON string for accurate estimation
+ if hasattr(response, "model_dump"):
+ # Pydantic model
+ response_str = json.dumps(response.model_dump())
+ elif isinstance(response, (dict, list)):
+ response_str = json.dumps(response)
+ elif isinstance(response, (str, bytes)):
+ response_str = response if isinstance(response, str) else
response.decode()
+ else:
+ response_str = str(response)
+
+ return estimate_token_count(response_str)
+ except Exception as e: # noqa: BLE001
+ logger.warning("Failed to estimate response tokens: %s", e)
+ # Return a high estimate to be safe (conservative fallback)
+ return 100000
+
+
+def get_response_size_bytes(response: Any) -> int:
+ """
+ Get the size of a response in bytes.
+
+ Args:
+ response: The response object
+
+ Returns:
+ Size in bytes
+ """
+ try:
+ from superset.utils import json
+
+ if hasattr(response, "model_dump"):
+ response_str = json.dumps(response.model_dump())
+ elif isinstance(response, (dict, list)):
+ response_str = json.dumps(response)
+ elif isinstance(response, bytes):
+ return len(response)
+ elif isinstance(response, str):
+ return len(response.encode("utf-8"))
+ else:
+ response_str = str(response)
+
+ return len(response_str.encode("utf-8"))
+ except Exception as e: # noqa: BLE001
+ logger.warning("Failed to get response size: %s", e)
+ # Return a conservative large value to avoid allowing oversized
responses
+ # to bypass size checks (returning 0 would underestimate)
+ return 1_000_000 # 1MB fallback
+
+
+def extract_query_params(params: Dict[str, Any] | None) -> Dict[str, Any]:
+ """
+ Extract relevant query parameters from tool params for suggestions.
+
+ Args:
+ params: The tool parameters dict
+
+ Returns:
+ Extracted parameters relevant for size reduction suggestions
+ """
+ if not params:
+ return {}
+
+ # Handle nested request object (common pattern in MCP tools)
+ if "request" in params and isinstance(params["request"], dict):
+ params = params["request"]
+
+ extracted = {}
+
+ # Pagination params
+ if "page_size" in params:
+ extracted["page_size"] = params["page_size"]
+ if "limit" in params:
+ extracted["limit"] = params["limit"]
+
+ # Column selection
+ if "select_columns" in params:
+ extracted["select_columns"] = params["select_columns"]
+ if "columns" in params:
+ extracted["columns"] = params["columns"]
+
+ # Filters
+ if "filters" in params:
+ extracted["filters"] = params["filters"]
+
+ # Search
+ if "search" in params:
+ extracted["search"] = params["search"]
+
+ return extracted
Review Comment:
```suggestion
# Keys to extract from params
EXTRACT_KEYS = [
# Pagination
"page_size",
"limit",
# Column selection
"select_columns",
"columns",
# Filters
"filters",
# Search
"search",
]
return {k: params[k] for k in EXTRACT_KEYS if k in params}i
```
##########
superset/mcp_service/server.py:
##########
@@ -104,6 +104,17 @@ def run_server(
# Build middleware list
middleware_list = []
+
+ # Add response size guard (protects LLM clients from huge responses)
+ from superset.mcp_service.middleware import (
+ create_response_size_guard_middleware,
+ )
Review Comment:
Import should be at top-level if possible.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]