villebro commented on code in PR #35621:
URL: https://github.com/apache/superset/pull/35621#discussion_r2433617652


##########
superset/config.py:
##########
@@ -199,6 +199,21 @@ def _try_json_readsha(filepath: str, length: int) -> str | 
None:
 SUPERSET_DASHBOARD_POSITION_DATA_LIMIT = 65535
 CUSTOM_SECURITY_MANAGER = None
 SQLALCHEMY_TRACK_MODIFICATIONS = False
+
+# ---------------------------------------------------------
+# FedRAMP Cryptographic Compliance
+# ---------------------------------------------------------
+
+# Hash algorithm used for non-cryptographic purposes (cache keys, thumbnails, 
etc.)
+# Options: 'md5' (legacy), 'sha256'
+#
+# IMPORTANT: Changing this value will invalidate all existing cached content.
+# Cache will re-warm naturally within 24-48 hours.

Review Comment:
   As @michael-s-molina stated, this  doesn't only apply to cached values, but 
will also break key-value entries, which includes permalinks.



##########
superset/key_value/utils.py:
##########
@@ -66,10 +68,31 @@ def decode_permalink_id(key: str, salt: str) -> int:
     raise KeyValueParseKeyError(_("Invalid permalink key"))
 
 
-def get_uuid_namespace(seed: str) -> UUID:
-    md5_obj = md5()  # noqa: S324
-    md5_obj.update(seed.encode("utf-8"))
-    return UUID(md5_obj.hexdigest())
+def get_uuid_namespace(seed: str, app: Any = None) -> UUID:
+    """
+    Generate a UUID namespace from a seed string using configured hash 
algorithm.
+
+    Args:
+        seed: Seed string for namespace generation
+        app: Flask app instance (optional, uses current_app if not provided)
+
+    Returns:
+        UUID namespace
+    """
+    app = app or current_app
+
+    algorithm = app.config["HASH_ALGORITHM"]
+
+    if algorithm == "sha256":
+        sha256_obj = hashlib.sha256()
+        sha256_obj.update(seed.encode("utf-8"))
+        # Use first 16 bytes of SHA-256 digest for UUID
+        return UUID(bytes=sha256_obj.digest()[:16])
+    else:
+        # Legacy MD5 path for backward compatibility
+        md5_obj = md5()  # noqa: S324
+        md5_obj.update(seed.encode("utf-8"))
+        return UUID(md5_obj.hexdigest())

Review Comment:
   To keep existing permalinks (or other key-value entries that rely on 
deterministic UUIDs) from breaking, we will probably need to update all 
key-value entries from the old format to the new one. See 
https://github.com/apache/superset/blob/78907d08cd5a3a53e25050a08328bb3b7770f3c2/superset/key_value/utils.py#L56-L66
 for the critical code that would break existing permalinks.
   
   I suspect changing this to not break existing permalinks will require a 
fairly substantial redesign of how permalink ids are encoded/decoded.. 🤔 



##########
tests/unit_tests/key_value/utils_test.py:
##########
@@ -58,3 +58,58 @@ def test_decode_permalink_id_invalid() -> None:
 
     with pytest.raises(KeyValueParseKeyError):
         decode_permalink_id("foo", "bar")
+
+
+def test_get_uuid_namespace_md5() -> None:
+    """Test UUID namespace generation with MD5 (legacy mode)."""
+    from unittest.mock import MagicMock
+
+    from superset.key_value.utils import get_uuid_namespace
+
+    mock_app = MagicMock()
+    mock_app.config = {"HASH_ALGORITHM": "md5"}
+    namespace = get_uuid_namespace("test_seed", app=mock_app)
+    # MD5-based UUID should be consistent
+    assert isinstance(namespace, UUID)
+    assert namespace == UUID("d81a8c4d-6522-9513-525d-6a5cef1c7c9d")
+
+
+def test_get_uuid_namespace_sha256() -> None:
+    """Test UUID namespace generation with SHA-256 (FedRAMP compliant mode)."""
+    from unittest.mock import MagicMock
+
+    from superset.key_value.utils import get_uuid_namespace
+
+    mock_app = MagicMock()
+    mock_app.config = {"HASH_ALGORITHM": "sha256"}
+    namespace = get_uuid_namespace("test_seed", app=mock_app)
+    # SHA-256-based UUID (first 16 bytes)
+    assert isinstance(namespace, UUID)
+    # SHA-256("test_seed") first 16 bytes as UUID
+    assert namespace == UUID("4504d44d-861b-6919-7db1-d95e47344234")

Review Comment:
   Nit: these tests could probably be parametrized



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to