This is an automated email from the ASF dual-hosted git repository.
FreeOnePlus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-mcp-server.git
The following commit(s) were added to refs/heads/master by this push:
new 3fe6811 Fix SELECT detection and max_rows bypass for comment-prefixed
SQL (#87)
3fe6811 is described below
commit 3fe6811632e60c103b7b1bf98c502967f89dc53f
Author: Bibo <[email protected]>
AuthorDate: Thu May 21 19:41:56 2026 +0800
Fix SELECT detection and max_rows bypass for comment-prefixed SQL (#87)
* [fix] Use cursor.description to detect result-returning queries
Replace brittle sql_upper.startswith() keyword check with
cursor.description, which reliably detects any statement that
returns a result set (SELECT, SHOW, DESCRIBE, EXPLAIN, CTEs, etc.)
without maintaining a hardcoded list of SQL keywords.
* Fix max_rows bypass for comment-prefixed SELECT in execute_sql_for_mcp
Same root cause as #75: sql.upper().startswith("SELECT") at
query_executor.py:689 returns False when SQL begins with a leading
'--' or '/* */' comment, so the auto-injected LIMIT {max_rows} is
silently skipped and large queries can be dispatched unbounded.
This callsite runs before cursor.execute, so cursor.description is not
available. Use a small helper that strips leading SQL comments before
extracting the first keyword.
* Add regression tests for result-set detection contract
These tests pin the user-facing contract of DorisConnection.execute():
any SQL the driver reports as producing a result set must return its
rows, regardless of how the statement is phrased.
Guards against regression of:
- Issue #62 Bug 5 (CTE / WITH returning empty data)
- The leading-comment bug fixed in #75 (data: [] with row_count > 0
for SELECT prefixed by '--' or '/* */')
- The same-root-cause LIMIT bypass fixed in the previous commit
Also adds unit tests for the new get_first_sql_keyword helper.
---------
Co-authored-by: jonas.brami <[email protected]>
---
doris_mcp_server/utils/db.py | 29 ++++--
doris_mcp_server/utils/query_executor.py | 9 +-
test/utils/test_db.py | 161 ++++++++++++++++++++++++++++++-
3 files changed, 185 insertions(+), 14 deletions(-)
diff --git a/doris_mcp_server/utils/db.py b/doris_mcp_server/utils/db.py
index cd7b0ce..a48811f 100644
--- a/doris_mcp_server/utils/db.py
+++ b/doris_mcp_server/utils/db.py
@@ -24,6 +24,7 @@ Supports asynchronous operations and concurrent connection
management, ensuring
import asyncio
import logging
+import re
import time
from contextlib import asynccontextmanager
from dataclasses import dataclass
@@ -36,6 +37,22 @@ from aiomysql import Connection, Pool
from .logger import get_logger
+_SQL_COMMENT_RE = re.compile(r"/\*.*?\*/|--[^\n]*", re.DOTALL)
+
+
+def get_first_sql_keyword(sql: str) -> str:
+ """Return the first SQL keyword (uppercase), ignoring leading
comments/whitespace.
+
+ Strips `--` line comments and `/* */` block comments before extracting
+ the first token. A leading comment must not change how a statement is
+ classified (e.g. `-- note\\nSELECT 1` is still a SELECT).
+ """
+ if not sql:
+ return ""
+ stripped = _SQL_COMMENT_RE.sub("", sql).strip()
+ if not stripped:
+ return ""
+ return stripped.split(None, 1)[0].upper()
@dataclass
@@ -95,15 +112,9 @@ class DorisConnection:
async with self.connection.cursor(aiomysql.DictCursor) as cursor:
await cursor.execute(sql, params)
- # Check if it's a query statement (statement that returns
result set)
- # FIX for Issue #62 Bug 5: Added WITH support for Common Table
Expressions (CTE)
- sql_upper = sql.strip().upper()
- if (sql_upper.startswith("SELECT") or
- sql_upper.startswith("SHOW") or
- sql_upper.startswith("DESCRIBE") or
- sql_upper.startswith("DESC") or
- sql_upper.startswith("EXPLAIN") or
- sql_upper.startswith("WITH")): # FIX: Support CTE queries
+ # cursor.description is set by the DB driver for any statement
that returns rows,
+ # avoiding a brittle hardcoded keyword list (e.g. missing
WITH/CTE, comments before keywords).
+ if cursor.description:
data = await cursor.fetchall()
row_count = len(data)
else:
diff --git a/doris_mcp_server/utils/query_executor.py
b/doris_mcp_server/utils/query_executor.py
index 75abf34..b919cad 100644
--- a/doris_mcp_server/utils/query_executor.py
+++ b/doris_mcp_server/utils/query_executor.py
@@ -35,7 +35,7 @@ from decimal import Decimal
import sqlparse
-from .db import DorisConnectionManager, QueryResult
+from .db import DorisConnectionManager, QueryResult, get_first_sql_keyword
from .logger import get_logger
from .sql_security_utils import get_auth_context
@@ -685,8 +685,11 @@ class DorisQueryExecutor:
else:
self.logger.warning("Security configuration not found,
proceeding without validation")
- # Add LIMIT if not present and it's a SELECT query
- if sql.upper().startswith("SELECT") and "LIMIT" not in
sql.upper():
+ # Add LIMIT if not present and it's a SELECT query.
+ # get_first_sql_keyword skips leading comments so `--
note\nSELECT ...`
+ # still gets the LIMIT cap (sql.startswith would silently
bypass it).
+ sql_upper = sql.upper()
+ if get_first_sql_keyword(sql) == "SELECT" and "LIMIT" not in
sql_upper:
if sql.endswith(";"):
sql = sql[:-1]
sql = f"{sql} LIMIT {limit}"
diff --git a/test/utils/test_db.py b/test/utils/test_db.py
index 4ad721a..e12d507 100644
--- a/test/utils/test_db.py
+++ b/test/utils/test_db.py
@@ -1,7 +1,11 @@
-from unittest.mock import MagicMock
+from unittest.mock import AsyncMock, MagicMock
import pytest
-from doris_mcp_server.utils.db import DorisConnection, DorisSessionCache
+from doris_mcp_server.utils.db import (
+ DorisConnection,
+ DorisSessionCache,
+ get_first_sql_keyword,
+)
@pytest.fixture
@@ -76,3 +80,156 @@ class TestDorisSessionCache:
connection_manager.release_connection.assert_any_call("query",
mock_conn1)
connection_manager.release_connection.assert_any_call("system",
mock_conn2)
assert connection_manager.release_connection.call_count == 2
+
+
+class TestGetFirstSqlKeyword:
+ """Unit tests for get_first_sql_keyword.
+
+ Used by query_executor.py:689 to detect SELECT before cursor.execute
+ (where cursor.description is not yet available), so the auto-injected
+ LIMIT {max_rows} cap also works when the SQL is comment-prefixed.
+ """
+
+ def test_plain_select(self):
+ assert get_first_sql_keyword("SELECT 1") == "SELECT"
+
+ def test_leading_whitespace(self):
+ assert get_first_sql_keyword(" \n\t SELECT 1") == "SELECT"
+
+ def test_lowercase(self):
+ assert get_first_sql_keyword("select 1") == "SELECT"
+
+ def test_line_comment_then_select(self):
+ sql = "-- a leading note\nSELECT 1"
+ assert get_first_sql_keyword(sql) == "SELECT"
+
+ def test_block_comment_then_select(self):
+ sql = "/* note */ SELECT 1"
+ assert get_first_sql_keyword(sql) == "SELECT"
+
+ def test_multiline_block_comment_then_select(self):
+ sql = "/*\n multi\n line\n*/\nSELECT 1"
+ assert get_first_sql_keyword(sql) == "SELECT"
+
+ def test_mixed_whitespace_and_comments(self):
+ sql = " -- one\n /* two */ \n SELECT 1"
+ assert get_first_sql_keyword(sql) == "SELECT"
+
+ def test_comment_then_with_cte(self):
+ sql = "-- note\nWITH x AS (SELECT 1) SELECT * FROM x"
+ assert get_first_sql_keyword(sql) == "WITH"
+
+ def test_non_select_unaffected(self):
+ assert get_first_sql_keyword("INSERT INTO t VALUES (1)") == "INSERT"
+ assert get_first_sql_keyword("-- c\nINSERT INTO t VALUES (1)") ==
"INSERT"
+
+ def test_empty_and_only_comments(self):
+ assert get_first_sql_keyword("") == ""
+ assert get_first_sql_keyword(" ") == ""
+ assert get_first_sql_keyword("-- only a comment") == ""
+ assert get_first_sql_keyword("/* only */") == ""
+
+
+def _make_doris_connection(cursor_description, fetchall_rows, rowcount=0):
+ """Build a DorisConnection whose underlying cursor returns the given
values.
+
+ The driver-level cursor is fully mocked: only `description`, `fetchall()`
+ and `rowcount` matter for the result-set-detection branch we want to test.
+ """
+ cursor = MagicMock()
+ cursor.execute = AsyncMock(return_value=None)
+ cursor.fetchall = AsyncMock(return_value=fetchall_rows)
+ cursor.description = cursor_description
+ cursor.rowcount = rowcount
+
+ cursor_ctx = MagicMock()
+ cursor_ctx.__aenter__ = AsyncMock(return_value=cursor)
+ cursor_ctx.__aexit__ = AsyncMock(return_value=None)
+
+ raw_connection = MagicMock()
+ raw_connection.cursor = MagicMock(return_value=cursor_ctx)
+
+ return DorisConnection(connection=raw_connection, session_id="test")
+
+
+class TestExecuteResultSetDetection:
+ """Behavior contract for DorisConnection.execute().
+
+ These tests pin the user-facing contract: any statement the driver
+ reports as producing a result set must have its rows returned, and any
+ statement that does not produce a result set must report rowcount.
+
+ Guards against regression of:
+ - Issue #62 Bug 5 (CTE / WITH returning empty data)
+ - The leading-comment bug (SELECT prefixed by `--` or `/* */` returning
+ empty data while row_count was non-zero)
+ - Future "missing keyword in the whitelist" bugs of the same class
+
+ The tests deliberately do not assert anything about how the SQL text is
+ parsed — they only assert that when `cursor.description` is populated,
+ rows are fetched, regardless of the SQL phrasing.
+ """
+
+ @pytest.mark.parametrize(
+ "sql",
+ [
+ "SELECT 1",
+ " SELECT 1",
+ "-- leading line comment\nSELECT 1",
+ "/* leading block comment */ SELECT 1",
+ "/*\n multi\n line\n*/\nSELECT 1",
+ " -- one\n /* two */ \n SELECT 1",
+ "(SELECT 1)",
+ "WITH t AS (SELECT 1) SELECT * FROM t",
+ "-- comment\nWITH t AS (SELECT 1) SELECT * FROM t",
+ "SHOW TABLES",
+ "DESC some_table",
+ "EXPLAIN SELECT 1",
+ ],
+ ids=[
+ "plain_select",
+ "leading_whitespace",
+ "line_comment_then_select",
+ "block_comment_then_select",
+ "multiline_block_comment",
+ "mixed_whitespace_and_comments",
+ "parenthesized_select",
+ "with_cte",
+ "comment_then_with_cte",
+ "show",
+ "desc",
+ "explain",
+ ],
+ )
+ async def test_returns_rows_when_driver_reports_result_set(self, sql):
+ rows = [{"col": 1}]
+ conn = _make_doris_connection(
+ cursor_description=[("col", None, None, None, None, None, None)],
+ fetchall_rows=rows,
+ )
+
+ result = await conn.execute(sql)
+
+ assert result.data == rows
+ assert result.row_count == len(rows)
+
+ @pytest.mark.parametrize(
+ "sql, affected",
+ [
+ ("INSERT INTO t VALUES (1)", 1),
+ ("UPDATE t SET x = 1", 5),
+ ("DELETE FROM t WHERE x = 1", 3),
+ ("CREATE TABLE t (x INT)", 0),
+ ],
+ )
+ async def test_no_fetch_when_driver_reports_no_result_set(self, sql,
affected):
+ conn = _make_doris_connection(
+ cursor_description=None,
+ fetchall_rows=[],
+ rowcount=affected,
+ )
+
+ result = await conn.execute(sql)
+
+ assert result.data == []
+ assert result.row_count == affected
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]