This is an automated email from the ASF dual-hosted git repository.
arivero pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 15b3c96f8e9 fix(security): Add table blocklist and fix MCP SQL
validation bypass (#37411)
15b3c96f8e9 is described below
commit 15b3c96f8e97148f63e036038727c9a3ea9e15d3
Author: Amin Ghadersohi <[email protected]>
AuthorDate: Mon Feb 9 08:12:06 2026 -0500
fix(security): Add table blocklist and fix MCP SQL validation bypass
(#37411)
---
superset/config.py | 28 ++++++++++
superset/exceptions.py | 15 +++++
superset/sql/execution/executor.py | 41 +++++++++++++-
superset/sql/parse.py | 40 ++++++++++++++
superset/sql_lab.py | 15 +++++
tests/unit_tests/sql/execution/test_executor.py | 73 +++++++++++++++++++++++--
tests/unit_tests/sql/parse_tests.py | 33 +++++++++++
7 files changed, 238 insertions(+), 7 deletions(-)
diff --git a/superset/config.py b/superset/config.py
index 9731d80bd58..970c965d051 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -1811,6 +1811,34 @@ DISALLOWED_SQL_FUNCTIONS: dict[str, set[str]] = {
},
}
+# Per-engine blocklist of system catalog tables/views that should not be
queried.
+# Prevents information disclosure through system catalog access.
+DISALLOWED_SQL_TABLES: dict[str, set[str]] = {
+ "postgresql": {
+ "pg_stat_activity",
+ "pg_roles",
+ "pg_shadow",
+ "pg_authid",
+ "pg_settings",
+ "pg_config",
+ "pg_hba_file_rules",
+ "pg_stat_ssl",
+ "pg_stat_replication",
+ "pg_stat_wal_receiver",
+ "pg_user",
+ },
+ "mysql": {
+ "mysql.user",
+ "performance_schema.threads",
+ "performance_schema.processlist",
+ },
+ "mssql": {
+ "sys.server_principals",
+ "sys.sql_logins",
+ "sys.configurations",
+ },
+}
+
# A function that intercepts the SQL to be executed and can alter it.
# A common use case for this is around adding some sort of comment header to
the SQL
diff --git a/superset/exceptions.py b/superset/exceptions.py
index 4967ffbfa81..fabbbe13347 100644
--- a/superset/exceptions.py
+++ b/superset/exceptions.py
@@ -399,6 +399,21 @@ class
SupersetDisallowedSQLFunctionException(SupersetErrorException):
)
+class SupersetDisallowedSQLTableException(SupersetErrorException):
+ """
+ Disallowed table/view found in SQL statement
+ """
+
+ def __init__(self, tables: set[str]):
+ super().__init__(
+ SupersetError(
+ message=f"SQL statement references disallowed table(s):
{tables}",
+ error_type=SupersetErrorType.SYNTAX_ERROR,
+ level=ErrorLevel.ERROR,
+ )
+ )
+
+
class CreateKeyValueDistributedLockFailedException(Exception): # noqa: N818
"""
Exception to signalize failure to acquire lock.
diff --git a/superset/sql/execution/executor.py
b/superset/sql/execution/executor.py
index b1f5e794049..e021920023d 100644
--- a/superset/sql/execution/executor.py
+++ b/superset/sql/execution/executor.py
@@ -451,10 +451,22 @@ class SQLExecutor:
:raises SupersetSecurityException: If security checks fail
"""
# Check disallowed functions
- if disallowed := self._check_disallowed_functions(script):
+ if disallowed_functions := self._check_disallowed_functions(script):
raise SupersetSecurityException(
SupersetError(
- message=f"Disallowed SQL functions: {',
'.join(disallowed)}",
+ message=(
+ f"Disallowed SQL functions: {',
'.join(disallowed_functions)}"
+ ),
+ error_type=SupersetErrorType.INVALID_SQL_ERROR,
+ level=ErrorLevel.ERROR,
+ )
+ )
+
+ # Check disallowed tables
+ if disallowed_tables := self._check_disallowed_tables(script):
+ raise SupersetSecurityException(
+ SupersetError(
+ message=f"Disallowed SQL tables: {',
'.join(disallowed_tables)}",
error_type=SupersetErrorType.INVALID_SQL_ERROR,
level=ErrorLevel.ERROR,
)
@@ -684,6 +696,31 @@ class SQLExecutor:
return found if found else None
+ def _check_disallowed_tables(self, script: SQLScript) -> set[str] | None:
+ """
+ Check for disallowed SQL tables/views.
+
+ :param script: Parsed SQL script
+ :returns: Set of disallowed tables found, or None if none found
+ """
+ disallowed_config = app.config.get("DISALLOWED_SQL_TABLES", {})
+ engine_name = self.database.db_engine_spec.engine
+
+ # Get disallowed tables for this engine
+ engine_disallowed = disallowed_config.get(engine_name, set())
+ if not engine_disallowed:
+ return None
+
+ # Single-pass AST-based table detection
+ found: set[str] = set()
+ for statement in script.statements:
+ present = {table.table.lower() for table in statement.tables}
+ for table in engine_disallowed:
+ if table.lower() in present:
+ found.add(table)
+
+ return found or None
+
def _apply_rls_to_script(
self, script: SQLScript, catalog: str | None, schema: str | None
) -> None:
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index af9a740ec75..843962eb091 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -452,6 +452,15 @@ class BaseSQLStatement(Generic[InternalRepresentation]):
"""
raise NotImplementedError()
+ def check_tables_present(self, tables: set[str]) -> bool:
+ """
+ Check if any of the given tables are present in the statement.
+
+ :param tables: Set of table names to check for (case-insensitive)
+ :return: True if any of the tables are present
+ """
+ raise NotImplementedError()
+
def get_limit_value(self) -> int | None:
"""
Get the limit value of the statement.
@@ -766,6 +775,16 @@ class SQLStatement(BaseSQLStatement[exp.Expression]):
}
return any(function.upper() in present for function in functions)
+ def check_tables_present(self, tables: set[str]) -> bool:
+ """
+ Check if any of the given tables are present in the statement.
+
+ :param tables: Set of table names to check for (case-insensitive)
+ :return: True if any of the tables are present
+ """
+ present = {table.table.lower() for table in self.tables}
+ return any(table.lower() in present for table in tables)
+
def get_limit_value(self) -> int | None:
"""
Parse a SQL query and return the `LIMIT` or `TOP` value, if present.
@@ -1172,6 +1191,16 @@ class KustoKQLStatement(BaseSQLStatement[str]):
logger.warning("Kusto KQL doesn't support checking for functions
present.")
return False
+ def check_tables_present(self, tables: set[str]) -> bool:
+ """
+ Check if any of the given tables are present in the statement.
+
+ :param tables: Set of table names to check for (case-insensitive)
+ :return: True if any of the tables are present
+ """
+ logger.warning("Kusto KQL doesn't support checking for tables
present.")
+ return False
+
def get_limit_value(self) -> int | None:
"""
Get the limit value of the statement.
@@ -1313,6 +1342,17 @@ class SQLScript:
for statement in self.statements
)
+ def check_tables_present(self, tables: set[str]) -> bool:
+ """
+ Check if any of the given tables are present in the script.
+
+ :param tables: Set of table names to check for (case-insensitive)
+ :return: True if any of the tables are present
+ """
+ return any(
+ statement.check_tables_present(tables) for statement in
self.statements
+ )
+
def is_valid_ctas(self) -> bool:
"""
Check if the script contains a valid CTAS statement.
diff --git a/superset/sql_lab.py b/superset/sql_lab.py
index 8ce20da5bb6..5e170630812 100644
--- a/superset/sql_lab.py
+++ b/superset/sql_lab.py
@@ -45,6 +45,7 @@ from superset.errors import ErrorLevel, SupersetError,
SupersetErrorType
from superset.exceptions import (
OAuth2RedirectError,
SupersetDisallowedSQLFunctionException,
+ SupersetDisallowedSQLTableException,
SupersetDMLNotAllowedException,
SupersetErrorException,
SupersetErrorsException,
@@ -411,6 +412,20 @@ def execute_sql_statements( # noqa: C901
):
raise SupersetDisallowedSQLFunctionException(disallowed_functions)
+ disallowed_tables = app.config["DISALLOWED_SQL_TABLES"].get(
+ db_engine_spec.engine,
+ set(),
+ )
+ if disallowed_tables and
parsed_script.check_tables_present(disallowed_tables):
+ # Report only the tables actually found in the query
+ found_tables = set()
+ for statement in parsed_script.statements:
+ present = {table.table.lower() for table in statement.tables}
+ for table in disallowed_tables:
+ if table.lower() in present:
+ found_tables.add(table)
+ raise SupersetDisallowedSQLTableException(found_tables or
disallowed_tables)
+
if parsed_script.has_mutation() and not database.allow_dml:
raise SupersetDMLNotAllowedException()
diff --git a/tests/unit_tests/sql/execution/test_executor.py
b/tests/unit_tests/sql/execution/test_executor.py
index 5168959e9ea..8cfd87e2ead 100644
--- a/tests/unit_tests/sql/execution/test_executor.py
+++ b/tests/unit_tests/sql/execution/test_executor.py
@@ -350,6 +350,64 @@ def test_execute_allowed_functions(
assert result.status == QueryStatus.SUCCESS
+def test_execute_disallowed_tables(
+ mocker: MockerFixture, database: Database, app_context: None
+) -> None:
+ """Test that disallowed SQL tables are blocked."""
+ mocker.patch.dict(
+ current_app.config,
+ {
+ "SQL_QUERY_MUTATOR": None,
+ "SQLLAB_TIMEOUT": 30,
+ "DISALLOWED_SQL_FUNCTIONS": {},
+ "DISALLOWED_SQL_TABLES": {"sqlite": {"pg_stat_activity",
"pg_roles"}},
+ },
+ )
+
+ result = database.execute("SELECT * FROM pg_stat_activity")
+
+ assert result.status == QueryStatus.FAILED
+ assert result.error_message is not None
+ assert "Disallowed SQL tables: pg_stat_activity" == result.error_message
+
+
+def test_execute_allowed_tables(
+ mocker: MockerFixture, database: Database, app_context: None
+) -> None:
+ """Test that allowed SQL tables work normally."""
+ mock_query_execution(mocker, database, return_data=[(1,)],
column_names=["id"])
+ mocker.patch.dict(
+ current_app.config,
+ {
+ "SQL_QUERY_MUTATOR": None,
+ "SQLLAB_TIMEOUT": 30,
+ "SQL_MAX_ROW": None,
+ "DISALLOWED_SQL_FUNCTIONS": {},
+ "DISALLOWED_SQL_TABLES": {"sqlite": {"pg_stat_activity",
"pg_roles"}},
+ "QUERY_LOGGER": None,
+ },
+ )
+
+ result = database.execute("SELECT * FROM users")
+
+ assert result.status == QueryStatus.SUCCESS
+
+
+def test_check_disallowed_tables_no_config(
+ mocker: MockerFixture, database: Database, app_context: None
+) -> None:
+ """Test disallowed tables check when no config exists."""
+ from superset.sql.execution.executor import SQLExecutor
+
+ mocker.patch.dict(current_app.config, {"DISALLOWED_SQL_TABLES": {}})
+
+ executor = SQLExecutor(database)
+ script = MagicMock()
+ result = executor._check_disallowed_tables(script)
+
+ assert result is None
+
+
# =============================================================================
# Row-Level Security Tests
# =============================================================================
@@ -1293,7 +1351,8 @@ def test_async_handle_get_result_query_not_found(
query_result = result.get_result()
assert query_result.status == QueryStatus.FAILED
- assert "not found" in query_result.error_message.lower() # type:
ignore[union-attr]
+ assert query_result.error_message is not None
+ assert "not found" in query_result.error_message.lower()
def test_async_handle_get_result_pending(
@@ -1434,7 +1493,8 @@ def test_async_handle_get_result_backend_load_error(
query_result = result.get_result()
assert query_result.status == QueryStatus.FAILED
- assert "Error loading results" in query_result.error_message # type:
ignore[operator]
+ assert query_result.error_message is not None
+ assert "Error loading results" in query_result.error_message
def test_async_handle_get_result_no_results_key(
@@ -1465,7 +1525,8 @@ def test_async_handle_get_result_no_results_key(
query_result = result.get_result()
assert query_result.status == QueryStatus.FAILED
- assert "Results not available" in query_result.error_message # type:
ignore[operator]
+ assert query_result.error_message is not None
+ assert "Results not available" in query_result.error_message
def test_async_handle_get_status_query_not_found(
@@ -1970,7 +2031,8 @@ def test_async_handle_get_result_with_empty_blob(
# Should return failure when blob not found
assert query_result.status == QueryStatus.FAILED
- assert "Results not available" in query_result.error_message # type:
ignore[operator]
+ assert query_result.error_message is not None
+ assert "Results not available" in query_result.error_message
def test_async_handle_get_result_no_results_backend(
@@ -2010,7 +2072,8 @@ def test_async_handle_get_result_no_results_backend(
# Should return failure when no results backend
assert query_result.status == QueryStatus.FAILED
- assert "Results not available" in query_result.error_message # type:
ignore[operator]
+ assert query_result.error_message is not None
+ assert "Results not available" in query_result.error_message
def test_create_query_record_with_user(
diff --git a/tests/unit_tests/sql/parse_tests.py
b/tests/unit_tests/sql/parse_tests.py
index f8e7251d808..b50718173cb 100644
--- a/tests/unit_tests/sql/parse_tests.py
+++ b/tests/unit_tests/sql/parse_tests.py
@@ -2942,6 +2942,39 @@ def test_check_functions_present(sql: str, engine: str,
expected: bool) -> None:
assert SQLScript(sql, engine).check_functions_present(functions) ==
expected
[email protected](
+ "sql, engine, expected",
+ [
+ ("SELECT * FROM my_table", "postgresql", False),
+ ("SELECT * FROM pg_stat_activity", "postgresql", True),
+ ("SELECT * FROM PG_STAT_ACTIVITY", "postgresql", True),
+ ("SELECT * FROM pg_roles", "postgresql", True),
+ (
+ "WITH cte AS (SELECT 1) SELECT * FROM cte",
+ "postgresql",
+ False,
+ ),
+ (
+ "SELECT * FROM my_table; SELECT * FROM pg_settings",
+ "postgresql",
+ True,
+ ),
+ (
+ "SELECT * FROM schema.pg_stat_activity",
+ "postgresql",
+ True,
+ ),
+ ("Table | limit 10", "kustokql", False),
+ ],
+)
+def test_check_tables_present(sql: str, engine: str, expected: bool) -> None:
+ """
+ Check the `check_tables_present` method.
+ """
+ tables = {"pg_stat_activity", "pg_roles", "pg_settings"}
+ assert SQLScript(sql, engine).check_tables_present(tables) == expected
+
+
@pytest.mark.parametrize(
"kql, expected",
[