codeant-ai-for-open-source[bot] commented on code in PR #37972:
URL: https://github.com/apache/superset/pull/37972#discussion_r2833710292


##########
superset/mcp_service/jwt_verifier.py:
##########
@@ -0,0 +1,343 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Detailed JWT verification for the MCP service.
+
+Provides step-by-step JWT validation with specific error messages
+instead of the generic "invalid_token" response from the base JWTVerifier.
+"""
+
+import base64
+import logging
+import time
+from collections.abc import Callable
+from contextvars import ContextVar
+from typing import Any, cast
+
+from authlib.jose.errors import (
+    BadSignatureError,
+    DecodeError,
+    ExpiredTokenError,
+    JoseError,
+)
+from fastmcp.server.auth.auth import AccessToken
+from fastmcp.server.auth.providers.jwt import JWTVerifier
+from mcp.server.auth.middleware.auth_context import AuthContextMiddleware
+from mcp.server.auth.middleware.bearer_auth import BearerAuthBackend
+from starlette.authentication import AuthenticationError
+from starlette.middleware import Middleware
+from starlette.middleware.authentication import AuthenticationMiddleware
+from starlette.requests import HTTPConnection
+from starlette.responses import JSONResponse
+
+from superset.utils import json
+
+logger = logging.getLogger(__name__)
+
+# Thread-safe storage for the specific JWT failure reason.
+# Set by DetailedJWTVerifier.load_access_token() on failure,
+# read by DetailedBearerAuthBackend.authenticate() to raise
+# an AuthenticationError with the specific reason.
+_jwt_failure_reason: ContextVar[str | None] = ContextVar(
+    "_jwt_failure_reason", default=None
+)
+
+
+def _sanitize_header_value(value: str) -> str:
+    """Sanitize a string for safe use in HTTP header values.
+
+    Removes/replaces characters that could enable header injection
+    (CR, LF, quotes) from attacker-controlled JWT claims.
+    """
+    return value.replace("\r", " ").replace("\n", " ").replace('"', "'")
+
+
+def _make_json_auth_error_handler(
+    debug_errors: bool = False,
+) -> Callable[[HTTPConnection, AuthenticationError], JSONResponse]:
+    """Create a JSON 401 error handler for authentication failures.
+
+    Args:
+        debug_errors: If True, include detailed JWT failure reasons in the
+            HTTP response body and WWW-Authenticate header. If False (default),
+            return only generic error information to avoid leaking server
+            configuration per RFC 6750 Section 3.1. Detailed reasons are
+            always logged server-side regardless of this setting.
+    """
+
+    def handler(conn: HTTPConnection, exc: AuthenticationError) -> 
JSONResponse:
+        reason = str(exc)
+
+        if debug_errors:
+            safe_reason = _sanitize_header_value(reason)
+            return JSONResponse(
+                status_code=401,
+                content={
+                    "error": "invalid_token",
+                    "error_description": reason,
+                },
+                headers={
+                    "WWW-Authenticate": f'Bearer error="invalid_token", '
+                    f'error_description="{safe_reason}"',
+                },
+            )
+
+        # Default: generic error response (no claim values or server config 
leaked)
+        logger.warning("JWT authentication failed: %s", reason)
+        return JSONResponse(
+            status_code=401,
+            content={
+                "error": "invalid_token",
+                "error_description": "Authentication failed",
+            },
+            headers={
+                "WWW-Authenticate": 'Bearer error="invalid_token"',
+            },
+        )
+
+    return handler
+
+
+class DetailedBearerAuthBackend(BearerAuthBackend):
+    """
+    Bearer auth backend that raises AuthenticationError with specific
+    JWT failure reasons instead of silently returning None.
+    """
+
+    async def authenticate(self, conn: HTTPConnection) -> tuple[Any, Any] | 
None:
+        result = await super().authenticate(conn)
+
+        if result is not None:
+            # Clear any stale failure reason on success
+            _jwt_failure_reason.set(None)
+            return result
+
+        # Check if there's a Bearer token present - if so, there was a
+        # validation failure we can report with a specific reason
+        auth_header = next(
+            (
+                conn.headers.get(key)
+                for key in conn.headers
+                if key.lower() == "authorization"
+            ),
+            None,
+        )
+        if auth_header and auth_header.lower().startswith("bearer "):
+            reason = _jwt_failure_reason.get()
+            if reason:
+                _jwt_failure_reason.set(None)
+                raise AuthenticationError(reason)
+
+        return None
+
+
+class DetailedJWTVerifier(JWTVerifier):
+    """
+    JWT verifier that provides specific error messages for each
+    validation failure instead of generic "invalid_token".
+
+    Overrides load_access_token() to perform step-by-step validation,
+    storing the specific failure reason in a ContextVar that the
+    custom BearerAuthBackend reads to return a descriptive 401 response.
+
+    Args:
+        debug_errors: When True, detailed JWT failure reasons are included
+            in HTTP responses. When False (default), only generic errors
+            are returned to clients. Detailed reasons are always logged
+            server-side regardless of this setting.
+    """
+
+    def __init__(self, *args: Any, debug_errors: bool = False, **kwargs: Any) 
-> None:
+        super().__init__(*args, **kwargs)
+        self.debug_errors = debug_errors
+
+    async def load_access_token(self, token: str) -> AccessToken | None:  # 
noqa: C901
+        """
+        Validate a JWT bearer token with detailed error reporting.
+
+        Each validation step stores a specific failure reason in the
+        _jwt_failure_reason ContextVar before returning None.
+        """
+        # Reset any previous failure reason
+        _jwt_failure_reason.set(None)
+
+        try:
+            # Step 1: Decode header and check algorithm
+            try:
+                header = self._decode_token_header(token)
+            except (ValueError, DecodeError) as e:
+                reason = f"Malformed token header: {e}"
+                _jwt_failure_reason.set(reason)
+                logger.warning(reason)
+                return None
+
+            token_alg = header.get("alg")
+            if self.algorithm and token_alg != self.algorithm:
+                reason = (
+                    f"Algorithm mismatch: token uses '{token_alg}', "

Review Comment:
   **Suggestion:** The inner header-decode block in the JWT verifier only 
catches ValueError and DecodeError, but base64 decoding can raise other 
exception types (e.g. binascii.Error), which will bubble past both the inner 
and outer except blocks and cause a 500 instead of a clean "invalid_token" 
response; broaden this inner except to catch all exceptions and consistently 
map any header parsing failure to a malformed token error. [possible bug]
   
   <details>
   <summary><b>Severity Level:</b> Major ⚠️</summary>
   
   ```mdx
   - ❌ Malformed JWTs cause 500 instead of 401 invalid_token.
   - ⚠️ Authentication errors may leak stack traces to clients.
   ```
   </details>
   
   ```suggestion
               except Exception as e:
   ```
   <details>
   <summary><b>Steps of Reproduction ✅ </b></summary>
   
   ```mdx
   1. Configure the MCP service to use `DetailedJWTVerifier.get_middleware()` 
from
   `superset/mcp_service/jwt_verifier.py:313-323`, which returns Starlette
   `AuthenticationMiddleware` with `DetailedBearerAuthBackend(self)` as backend.
   
   2. Send an HTTP request to any endpoint protected by this middleware with an
   `Authorization` header like `Bearer abc.def.ghi` where the first segment 
`abc` is not
   valid base64url (e.g., contains invalid characters or is badly truncated).
   
   3. In `DetailedBearerAuthBackend.authenticate()`
   (`superset/mcp_service/jwt_verifier.py:121-145`), the bearer token is 
extracted and passed
   to `DetailedJWTVerifier.load_access_token()`
   (`superset/mcp_service/jwt_verifier.py:168+`), which calls 
`_decode_token_header(token)`
   (`superset/mcp_service/jwt_verifier.py:332-343`).
   
   4. `base64.urlsafe_b64decode(header_b64)` inside `_decode_token_header()` 
raises
   `binascii.Error` for the malformed header; this is not a `ValueError` or 
`DecodeError`, so
   it bypasses the inner `except (ValueError, DecodeError)` block and also the 
outer `except
   (ValueError, JoseError, KeyError, AttributeError, TypeError)` in 
`load_access_token()`,
   propagating as an unhandled exception and causing a 500 error instead of a 
controlled 401
   `invalid_token` response.
   ```
   </details>
   <details>
   <summary><b>Prompt for AI Agent 🤖 </b></summary>
   
   ```mdx
   This is a comment left during a code review.
   
   **Path:** superset/mcp_service/jwt_verifier.py
   **Line:** 191:191
   **Comment:**
        *Possible Bug: The inner header-decode block in the JWT verifier only 
catches ValueError and DecodeError, but base64 decoding can raise other 
exception types (e.g. binascii.Error), which will bubble past both the inner 
and outer except blocks and cause a 500 instead of a clean "invalid_token" 
response; broaden this inner except to catch all exceptions and consistently 
map any header parsing failure to a malformed token error.
   
   Validate the correctness of the flagged issue. If correct, How can I resolve 
this? If you propose a fix, implement it and please make it concise.
   ```
   </details>
   <a 
href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37972&comment_hash=685daffc08a127787f979b7494703d224bf619c8e9bed7da2d9946557a9ebe1a&reaction=like'>👍</a>
 | <a 
href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37972&comment_hash=685daffc08a127787f979b7494703d224bf619c8e9bed7da2d9946557a9ebe1a&reaction=dislike'>👎</a>



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to