Re: [PR] feat(mcp): add response size guard to prevent oversized responses [superset]

via GitHub Fri, 16 Jan 2026 12:31:01 -0800


aminghadersohi commented on code in PR #37200:
URL: https://github.com/apache/superset/pull/37200#discussion_r2699879411



##########
tests/unit_tests/mcp_service/test_middleware.py:
##########
@@ -0,0 +1,336 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Unit tests for MCP service middleware.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from fastmcp.exceptions import ToolError
+
+from superset.mcp_service.middleware import (
+    create_response_size_guard_middleware,
+    ResponseSizeGuardMiddleware,
+)
+
+
+class TestResponseSizeGuardMiddleware:
+    """Test ResponseSizeGuardMiddleware class."""
+
+    def test_init_default_values(self):
+        """Should initialize with default values."""
+        middleware = ResponseSizeGuardMiddleware()
+        assert middleware.token_limit == 25000
+        assert middleware.warn_threshold_pct == 80
+        assert middleware.warn_threshold == 20000
+        assert middleware.excluded_tools == set()
+
+    def test_init_custom_values(self):
+        """Should initialize with custom values."""
+        middleware = ResponseSizeGuardMiddleware(
+            token_limit=10000,
+            warn_threshold_pct=70,
+            excluded_tools=["health_check", "get_chart_preview"],
+        )
+        assert middleware.token_limit == 10000
+        assert middleware.warn_threshold_pct == 70
+        assert middleware.warn_threshold == 7000
+        assert middleware.excluded_tools == {"health_check", 
"get_chart_preview"}
+
+    @pytest.mark.asyncio
+    async def test_allows_small_response(self):
+        """Should allow responses under token limit."""
+        middleware = ResponseSizeGuardMiddleware(token_limit=25000)
+
+        # Create mock context
+        context = MagicMock()
+        context.message.name = "list_charts"
+        context.message.params = {}
+
+        # Create mock call_next that returns small response
+        small_response = {"charts": [{"id": 1, "name": "test"}]}
+        call_next = AsyncMock(return_value=small_response)
+
+        with (
+            patch("superset.mcp_service.middleware.get_user_id", 
return_value=1),
+            patch("superset.mcp_service.middleware.event_logger"),
+        ):
+            result = await middleware.on_call_tool(context, call_next)
+
+        assert result == small_response
+        call_next.assert_called_once_with(context)
+
+    @pytest.mark.asyncio
+    async def test_blocks_large_response(self):
+        """Should block responses over token limit."""
+        middleware = ResponseSizeGuardMiddleware(token_limit=100)  # Very low 
limit
+
+        # Create mock context
+        context = MagicMock()
+        context.message.name = "list_charts"
+        context.message.params = {"page_size": 100}
+
+        # Create large response
+        large_response = {
+            "charts": [{"id": i, "name": f"chart_{i}"} for i in range(1000)]
+        }
+        call_next = AsyncMock(return_value=large_response)
+
+        with (
+            patch("superset.mcp_service.middleware.get_user_id", 
return_value=1),
+            patch("superset.mcp_service.middleware.event_logger"),
+        ):

Review Comment:
   Same as above - valid Python 3.10+ syntax (PEP 617). No change needed.



##########
tests/unit_tests/mcp_service/test_middleware.py:
##########
@@ -0,0 +1,336 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Unit tests for MCP service middleware.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from fastmcp.exceptions import ToolError
+
+from superset.mcp_service.middleware import (
+    create_response_size_guard_middleware,
+    ResponseSizeGuardMiddleware,
+)
+
+
+class TestResponseSizeGuardMiddleware:
+    """Test ResponseSizeGuardMiddleware class."""
+
+    def test_init_default_values(self):
+        """Should initialize with default values."""
+        middleware = ResponseSizeGuardMiddleware()
+        assert middleware.token_limit == 25000
+        assert middleware.warn_threshold_pct == 80
+        assert middleware.warn_threshold == 20000
+        assert middleware.excluded_tools == set()
+
+    def test_init_custom_values(self):
+        """Should initialize with custom values."""
+        middleware = ResponseSizeGuardMiddleware(
+            token_limit=10000,
+            warn_threshold_pct=70,
+            excluded_tools=["health_check", "get_chart_preview"],
+        )
+        assert middleware.token_limit == 10000
+        assert middleware.warn_threshold_pct == 70
+        assert middleware.warn_threshold == 7000
+        assert middleware.excluded_tools == {"health_check", 
"get_chart_preview"}
+
+    @pytest.mark.asyncio
+    async def test_allows_small_response(self):
+        """Should allow responses under token limit."""
+        middleware = ResponseSizeGuardMiddleware(token_limit=25000)
+
+        # Create mock context
+        context = MagicMock()
+        context.message.name = "list_charts"
+        context.message.params = {}
+
+        # Create mock call_next that returns small response
+        small_response = {"charts": [{"id": 1, "name": "test"}]}
+        call_next = AsyncMock(return_value=small_response)
+
+        with (
+            patch("superset.mcp_service.middleware.get_user_id", 
return_value=1),
+            patch("superset.mcp_service.middleware.event_logger"),
+        ):
+            result = await middleware.on_call_tool(context, call_next)
+
+        assert result == small_response
+        call_next.assert_called_once_with(context)
+
+    @pytest.mark.asyncio
+    async def test_blocks_large_response(self):
+        """Should block responses over token limit."""
+        middleware = ResponseSizeGuardMiddleware(token_limit=100)  # Very low 
limit
+
+        # Create mock context
+        context = MagicMock()
+        context.message.name = "list_charts"
+        context.message.params = {"page_size": 100}
+
+        # Create large response
+        large_response = {
+            "charts": [{"id": i, "name": f"chart_{i}"} for i in range(1000)]
+        }
+        call_next = AsyncMock(return_value=large_response)
+
+        with (
+            patch("superset.mcp_service.middleware.get_user_id", 
return_value=1),
+            patch("superset.mcp_service.middleware.event_logger"),
+        ):
+            with pytest.raises(ToolError) as exc_info:
+                await middleware.on_call_tool(context, call_next)
+
+        # Verify error contains helpful information
+        error_message = str(exc_info.value)
+        assert "Response too large" in error_message
+        assert "limit" in error_message.lower()
+
+    @pytest.mark.asyncio
+    async def test_skips_excluded_tools(self):
+        """Should skip checking for excluded tools."""
+        middleware = ResponseSizeGuardMiddleware(
+            token_limit=100, excluded_tools=["health_check"]
+        )
+
+        # Create mock context for excluded tool
+        context = MagicMock()
+        context.message.name = "health_check"
+        context.message.params = {}
+
+        # Create response that would exceed limit
+        large_response = {"data": "x" * 10000}
+        call_next = AsyncMock(return_value=large_response)
+
+        # Should not raise even though response exceeds limit
+        result = await middleware.on_call_tool(context, call_next)
+        assert result == large_response
+
+    @pytest.mark.asyncio
+    async def test_logs_warning_at_threshold(self):
+        """Should log warning when approaching limit."""
+        middleware = ResponseSizeGuardMiddleware(
+            token_limit=1000, warn_threshold_pct=80
+        )
+
+        context = MagicMock()
+        context.message.name = "list_charts"
+        context.message.params = {}
+
+        # Response at ~85% of limit (should trigger warning but not block)
+        response = {"data": "x" * 2900}  # ~828 tokens at 3.5 chars/token
+        call_next = AsyncMock(return_value=response)
+
+        with (
+            patch("superset.mcp_service.middleware.get_user_id", 
return_value=1),
+            patch("superset.mcp_service.middleware.event_logger"),
+            patch("superset.mcp_service.middleware.logger") as mock_logger,
+        ):

Review Comment:
   Same as above - valid Python 3.10+ syntax (PEP 617). No change needed.



##########
superset/mcp_service/utils/token_utils.py:
##########
@@ -0,0 +1,402 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Token counting and response size utilities for MCP service.
+
+This module provides utilities to estimate token counts and generate smart
+suggestions when responses exceed configured limits. This prevents large
+responses from overwhelming LLM clients like Claude Desktop.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List
+
+logger = logging.getLogger(__name__)
+
+# Approximate characters per token for estimation
+# Claude tokenizer averages ~4 chars per token for English text
+# JSON tends to be more verbose, so we use a slightly lower ratio
+CHARS_PER_TOKEN = 3.5
+
+
+def estimate_token_count(text: str | bytes) -> int:
+    """
+    Estimate the token count for a given text.
+
+    Uses a character-based heuristic since we don't have direct access to
+    the actual tokenizer. This is conservative to avoid underestimating.
+
+    Args:
+        text: The text to estimate tokens for (string or bytes)
+
+    Returns:
+        Estimated number of tokens
+    """
+    if isinstance(text, bytes):
+        text = text.decode("utf-8", errors="replace")
+
+    # Simple heuristic: ~3.5 characters per token for JSON/code
+    return int(len(text) / CHARS_PER_TOKEN)
+
+
+def estimate_response_tokens(response: Any) -> int:
+    """
+    Estimate token count for an MCP tool response.
+
+    Handles various response types including Pydantic models, dicts, and 
strings.
+
+    Args:
+        response: The response object to estimate
+
+    Returns:
+        Estimated number of tokens
+    """
+    try:
+        from superset.utils import json
+
+        # Convert response to JSON string for accurate estimation
+        if hasattr(response, "model_dump"):
+            # Pydantic model
+            response_str = json.dumps(response.model_dump())
+        elif isinstance(response, (dict, list)):
+            response_str = json.dumps(response)
+        elif isinstance(response, (str, bytes)):
+            response_str = response if isinstance(response, str) else 
response.decode()
+        else:
+            response_str = str(response)
+
+        return estimate_token_count(response_str)
+    except (TypeError, ValueError, AttributeError) as e:
+        logger.warning("Failed to estimate response tokens: %s", e)
+        # Return a high estimate to be safe
+        return 100000
+
+
+def get_response_size_bytes(response: Any) -> int:
+    """
+    Get the size of a response in bytes.
+
+    Args:
+        response: The response object
+
+    Returns:
+        Size in bytes
+    """
+    try:
+        from superset.utils import json
+
+        if hasattr(response, "model_dump"):
+            response_str = json.dumps(response.model_dump())
+        elif isinstance(response, (dict, list)):
+            response_str = json.dumps(response)
+        elif isinstance(response, bytes):
+            return len(response)
+        elif isinstance(response, str):
+            return len(response.encode("utf-8"))
+        else:
+            response_str = str(response)
+
+        return len(response_str.encode("utf-8"))
+    except (TypeError, ValueError, AttributeError) as e:
+        logger.warning("Failed to get response size: %s", e)
+        return 0

Review Comment:
   ✅ **Addressed in commit `03e88de11b`**
   
   Changed `get_response_size_bytes()` to return a conservative large value 
(1MB) on failure instead of 0:
   
   ```python
   except Exception as e:  # noqa: BLE001
       logger.warning("Failed to get response size: %s", e)
       # Return a conservative large value to avoid allowing oversized responses
       # to bypass size checks (returning 0 would underestimate)
       return 1_000_000  # 1MB fallback
   ```
   
   This prevents oversized responses from slipping through when serialization 
fails.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] feat(mcp): add response size guard to prevent oversized responses [superset]

Reply via email to