codeant-ai-for-open-source[bot] commented on code in PR #37677:
URL: https://github.com/apache/superset/pull/37677#discussion_r2765001048


##########
superset/db_engine_specs/datastore.py:
##########
@@ -0,0 +1,611 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import logging
+import re
+from datetime import datetime
+from re import Pattern
+from typing import Any, TYPE_CHECKING, TypedDict
+from urllib import parse
+
+from apispec import APISpec
+from apispec.ext.marshmallow import MarshmallowPlugin
+from flask_babel import gettext as __
+from marshmallow import fields, Schema
+from marshmallow.exceptions import ValidationError
+from sqlalchemy import column, types
+from sqlalchemy.engine.base import Engine
+from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.engine.url import URL
+from sqlalchemy.sql import sqltypes
+
+from superset.constants import TimeGrain
+from superset.databases.schemas import encrypted_field_properties, 
EncryptedString
+from superset.databases.utils import make_url_safe
+from superset.db_engine_specs.base import (
+    BaseEngineSpec,
+    BasicPropertiesType,
+    DatabaseCategory,
+)
+from superset.db_engine_specs.exceptions import SupersetDBAPIConnectionError
+from superset.errors import SupersetError, SupersetErrorType
+from superset.exceptions import SupersetException
+from superset.models.core import Database
+from superset.sql.parse import LimitMethod, Table
+from superset.superset_typing import ResultSetColumnType
+from superset.utils import json
+from superset.utils.hashing import hash_from_str
+
+logger = logging.getLogger(__name__)
+
+try:
+    import google.auth
+    from google.cloud import datastore
+    from google.oauth2 import service_account
+
+    dependencies_installed = True
+except ImportError:
+    dependencies_installed = False
+
+if TYPE_CHECKING:
+    from superset.models.core import Database  # pragma: no cover
+    from superset.models.sql_lab import Query  # pragma: no cover
+
+logger = logging.getLogger()
+
+CONNECTION_DATABASE_PERMISSIONS_REGEX = re.compile(
+    "Access Denied: Project (?P<project_name>.+?): User does not have "
+    + "datastore.databases.create permission in project (?P<project>.+?)"
+)
+
+TABLE_DOES_NOT_EXIST_REGEX = re.compile(
+    'Table name "(?P<table>.*?)" missing dataset while no default '
+    "dataset is set in the request"
+)
+
+COLUMN_DOES_NOT_EXIST_REGEX = re.compile(
+    r"Unrecognized name: (?P<column>.*?) at \[(?P<location>.+?)\]"
+)
+
+SCHEMA_DOES_NOT_EXIST_REGEX = re.compile(
+    r"datastore error: 404 Not found: Dataset (?P<dataset>.*?):"
+    r"(?P<schema>.*?) was not found in location"
+)
+
+SYNTAX_ERROR_REGEX = re.compile(
+    'Syntax error: Expected end of input but got identifier 
"(?P<syntax_error>.+?)"'
+)
+
+ma_plugin = MarshmallowPlugin()
+
+
+class DatastoreParametersSchema(Schema):
+    credentials_info = EncryptedString(
+        required=False,
+        metadata={"description": "Contents of Datastore JSON credentials."},
+    )
+    query = fields.Dict(required=False)
+
+
+class DatastoreParametersType(TypedDict):
+    credentials_info: dict[str, Any]
+    query: dict[str, Any]
+
+
+class DatastoreEngineSpec(BaseEngineSpec):  # pylint: 
disable=too-many-public-methods
+    """Engine spec for Google's Datastore
+
+    As contributed by @hychang.1997.tw"""
+
+    engine = "datastore"
+    engine_name = "Google Datastore"
+    max_column_name_length = 128
+    disable_ssh_tunneling = True
+
+    parameters_schema = DatastoreParametersSchema()
+    default_driver = "datastore"
+    sqlalchemy_uri_placeholder = "datastore://{project_id}"
+
+    # Use FETCH_MANY to prevent Superset from injecting LIMIT via sqlglot AST
+    # manipulation. GQL queries should not be modified by sqlglot since it
+    # uses BigQuery dialect which transforms GQL-incompatible syntax.
+    limit_method = LimitMethod.FETCH_MANY
+
+    metadata = {
+        "description": (
+            "Google Cloud Datastore is a highly scalable NoSQL database "
+            "for your applications."
+        ),
+        "logo": "google-biquery.png",
+        "homepage_url": "https://cloud.google.com/datastore/";,
+        "categories": [
+            DatabaseCategory.CLOUD_GCP,
+            DatabaseCategory.PROPRIETARY,
+        ],
+        "pypi_packages": ["python-datastore-sqlalchemy"],
+        "connection_string": "datastore://{project_id}",
+        "authentication_methods": [
+            {
+                "name": "Service Account JSON",
+                "description": (
+                    "Upload service account credentials JSON or paste in 
Secure Extra"
+                ),
+                "secure_extra": {
+                    "credentials_info": {
+                        "type": "service_account",
+                        "project_id": "...",
+                        "private_key_id": "...",
+                        "private_key": "...",
+                        "client_email": "...",
+                        "client_id": "...",
+                        "auth_uri": "...",
+                        "token_uri": "...",
+                    }
+                },
+            },
+        ],
+        "notes": (
+            "Create a Service Account via GCP console with access to "
+            "datastore datasets."
+        ),
+        "docs_url": "https://github.com/splasky/Python-datastore-sqlalchemy";,
+    }
+
+    # Datastore doesn't maintain context when running multiple statements in 
the
+    # same cursor, so we need to run all statements at once
+    run_multiple_statements_as_one = True
+
+    allows_hidden_cc_in_orderby = True
+
+    supports_dynamic_schema = True
+    supports_catalog = supports_dynamic_catalog = 
supports_cross_catalog_queries = True
+
+    # when editing the database, mask this field in `encrypted_extra`
+    # pylint: disable=invalid-name
+    encrypted_extra_sensitive_fields = {"$.credentials_info.private_key"}
+
+    """
+    https://www.python.org/dev/peps/pep-0249/#arraysize
+    raw_connections bypass the sqlalchemy-datastore query execution context 
and deal
+    with raw dbapi connection directly.
+    If this value is not set, the default value is set to 1.
+    """
+    arraysize = 5000
+
+    _date_trunc_functions = {
+        "DATE": "DATE_TRUNC",
+        "DATETIME": "DATETIME_TRUNC",
+        "TIME": "TIME_TRUNC",
+        "TIMESTAMP": "TIMESTAMP_TRUNC",
+    }
+
+    _time_grain_expressions = {
+        None: "{col}",
+        TimeGrain.SECOND: "CAST(TIMESTAMP_SECONDS("
+        "UNIX_SECONDS(CAST({col} AS TIMESTAMP))"
+        ") AS {type})",
+        TimeGrain.MINUTE: "CAST(TIMESTAMP_SECONDS("
+        "60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 60)"
+        ") AS {type})",
+        TimeGrain.FIVE_MINUTES: "CAST(TIMESTAMP_SECONDS("
+        "5*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 5*60)"
+        ") AS {type})",
+        TimeGrain.TEN_MINUTES: "CAST(TIMESTAMP_SECONDS("
+        "10*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 10*60)"
+        ") AS {type})",
+        TimeGrain.FIFTEEN_MINUTES: "CAST(TIMESTAMP_SECONDS("
+        "15*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 15*60)"
+        ") AS {type})",
+        TimeGrain.THIRTY_MINUTES: "CAST(TIMESTAMP_SECONDS("
+        "30*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 30*60)"
+        ") AS {type})",
+        TimeGrain.HOUR: "{func}({col}, HOUR)",
+        TimeGrain.DAY: "{func}({col}, DAY)",
+        TimeGrain.WEEK: "{func}({col}, WEEK)",
+        TimeGrain.WEEK_STARTING_MONDAY: "{func}({col}, ISOWEEK)",
+        TimeGrain.MONTH: "{func}({col}, MONTH)",
+        TimeGrain.QUARTER: "{func}({col}, QUARTER)",
+        TimeGrain.YEAR: "{func}({col}, YEAR)",
+    }
+
+    custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, 
Any]]] = {
+        CONNECTION_DATABASE_PERMISSIONS_REGEX: (
+            __(
+                "Unable to connect. Verify that the following roles are set "
+                'on the service account: "Cloud Datastore Viewer", '
+                '"Cloud Datastore User", "Cloud Datastore Creator"'
+            ),
+            SupersetErrorType.CONNECTION_DATABASE_PERMISSIONS_ERROR,
+            {},
+        ),
+        TABLE_DOES_NOT_EXIST_REGEX: (
+            __(
+                'The table "%(table)s" does not exist. '
+                "A valid table must be used to run this query.",
+            ),
+            SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR,
+            {},
+        ),
+        COLUMN_DOES_NOT_EXIST_REGEX: (
+            __('We can\'t seem to resolve column "%(column)s" at line 
%(location)s.'),
+            SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR,
+            {},
+        ),
+        SCHEMA_DOES_NOT_EXIST_REGEX: (
+            __(
+                'The schema "%(schema)s" does not exist. '
+                "A valid schema must be used to run this query."
+            ),
+            SupersetErrorType.SCHEMA_DOES_NOT_EXIST_ERROR,
+            {},
+        ),
+        SYNTAX_ERROR_REGEX: (
+            __(
+                "Please check your query for syntax errors at or near "
+                '"%(syntax_error)s". Then, try running your query again.'
+            ),
+            SupersetErrorType.SYNTAX_ERROR,
+            {},
+        ),
+    }
+
+    @staticmethod
+    def _mutate_label(label: str) -> str:
+        """
+        Datastore field_name should start with a letter or underscore and 
contain
+        only alphanumeric characters. Labels that start with a number are 
prefixed
+        with an underscore. Any unsupported characters are replaced with 
underscores
+        and an md5 hash is added to the end of the label to avoid possible
+        collisions.
+
+        :param label: Expected expression label
+        :return: Conditionally mutated label
+        """
+        label_hashed = "_" + hash_from_str(label)
+
+        # if label starts with number, add underscore as first character
+        label_mutated = "_" + label if re.match(r"^\d", label) else label
+
+        # replace non-alphanumeric characters with underscores
+        label_mutated = re.sub(r"[^\w]+", "_", label_mutated)
+        if label_mutated != label:
+            # add first 5 chars from md5 hash to label to avoid possible 
collisions
+            label_mutated += label_hashed[:6]
+
+        return label_mutated
+
+    @classmethod
+    def _truncate_label(cls, label: str) -> str:
+        """Datastore requires column names start with either a letter or
+        underscore. To make sure this is always the case, an underscore is 
prefixed
+        to the md5 hash of the original label.
+
+        :param label: expected expression label
+        :return: truncated label
+        """
+        return "_" + hash_from_str(label)
+
+    @classmethod
+    def convert_dttm(
+        cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None 
= None
+    ) -> str | None:
+        sqla_type = cls.get_sqla_column_type(target_type)
+        if isinstance(sqla_type, types.Date):
+            return f"CAST('{dttm.date().isoformat()}' AS DATE)"
+        if isinstance(sqla_type, types.TIMESTAMP):
+            return f"""CAST('{dttm.isoformat(timespec="microseconds")}' AS 
TIMESTAMP)"""
+        if isinstance(sqla_type, types.DateTime):
+            return f"""CAST('{dttm.isoformat(timespec="microseconds")}' AS 
DATETIME)"""
+        if isinstance(sqla_type, types.Time):
+            return f"""CAST('{dttm.strftime("%H:%M:%S.%f")}' AS TIME)"""
+        return None
+
+    @classmethod
+    def fetch_data(cls, cursor: Any, limit: int | None = None) -> 
list[tuple[Any, ...]]:
+        data = super().fetch_data(cursor, limit)
+        # Support type Datastore Row, introduced here PR #4071
+        # google.cloud.datastore.table.Row
+        if data and type(data[0]).__name__ == "Row":
+            data = [r.values() for r in data]  # type: ignore
+        return data
+
+    @classmethod
+    def _get_client(cls, engine: Engine, database: Database) -> 
datastore.Client:
+        """
+        Return the Datastore client associated with an engine.
+        """
+        if not dependencies_installed:
+            raise SupersetException(
+                "Could not import libraries needed to connect to Datastore."
+            )
+
+        if credentials_info := engine.dialect.credentials_info:
+            credentials = 
service_account.Credentials.from_service_account_info(
+                credentials_info
+            )
+            return datastore.Client(credentials=credentials)
+
+        try:
+            credentials = google.auth.default()[0]
+            return datastore.Client(credentials=credentials, database=database)

Review Comment:
   **Suggestion:** The Datastore client is instantiated with an unsupported 
keyword argument `database`, which will raise a TypeError at runtime when 
default Google credentials are used, preventing connections from being 
established. [type error]
   
   <details>
   <summary><b>Severity Level:</b> Critical 🚨</summary>
   
   ```mdx
   - ❌ Datastore DB connections fail during client instantiation.
   - ❌ Default catalog resolution (`get_default_catalog`) breaks.
   - ⚠️ Any metadata calls relying on `_get_client` error.
   ```
   </details>
   
   ```suggestion
               return datastore.Client(credentials=credentials)
   ```
   <details>
   <summary><b>Steps of Reproduction ✅ </b></summary>
   
   ```mdx
   1. Import the module `superset/db_engine_specs/datastore.py` (occurs on any 
process that
   loads DB engine specs). The function `_get_client` is defined starting at 
line 329 and
   executed when a Datastore client is needed.
   
   2. Trigger a code path that calls 
`DatastoreEngineSpec.get_default_catalog(database)`
   (file: superset/db_engine_specs/datastore.py, lines 353-368). At line 366 
the code
   executes `client = cls._get_client(engine, database)`.
   
   3. In `_get_client` (file: superset/db_engine_specs/datastore.py, lines 
329-346), if
   `engine.dialect.credentials_info` is falsy (no explicit service-account 
provided) the code
   reaches the `try:` block at line 344 and executes `credentials = 
google.auth.default()[0]`
   (line 345).
   
   4. Immediately after, the code executes the existing call `return
   datastore.Client(credentials=credentials, database=database)` (line 346). 
The google Cloud
   Datastore client does not accept a `database` keyword in its constructor in 
supported
   client versions, causing Python to raise TypeError like: "TypeError: 
__init__() got an
   unexpected keyword argument 'database'". This prevents 
`get_default_catalog()` from
   returning `client.project` and causes connection/metadata lookup to fail. 
(Concrete
   evidence: lines 345-346 in the file show the exact offending call.)
   ```
   </details>
   <details>
   <summary><b>Prompt for AI Agent 🤖 </b></summary>
   
   ```mdx
   This is a comment left during a code review.
   
   **Path:** superset/db_engine_specs/datastore.py
   **Line:** 346:346
   **Comment:**
        *Type Error: The Datastore client is instantiated with an unsupported 
keyword argument `database`, which will raise a TypeError at runtime when 
default Google credentials are used, preventing connections from being 
established.
   
   Validate the correctness of the flagged issue. If correct, How can I resolve 
this? If you propose a fix, implement it and please make it concise.
   ```
   </details>



##########
tests/integration_tests/db_engine_specs/datastore_tests.py:
##########
@@ -0,0 +1,516 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import unittest.mock as mock
+from datetime import datetime
+from typing import Any
+
+import pytest
+from marshmallow.exceptions import ValidationError
+from sqlalchemy import column
+from sqlalchemy.engine.url import make_url
+
+from superset.connectors.sqla.models import TableColumn
+from superset.db_engine_specs.base import BaseEngineSpec
+from superset.db_engine_specs.datastore import DatastoreEngineSpec
+from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
+from superset.superset_typing import ResultSetColumnType
+from tests.integration_tests.base_tests import SupersetTestCase
+from tests.integration_tests.fixtures.birth_names_dashboard import (
+    load_birth_names_dashboard_with_slices,  # noqa: F401
+    load_birth_names_data,  # noqa: F401
+)
+
+
+class TestDatastoreDbEngineSpec(SupersetTestCase):
+    def test_datastore_sqla_column_label(self):
+        """
+        DB Eng Specs (datastore): Test column label
+        """
+        # Expected labels with SHA-256 hash suffix (first 5 chars prefixed 
with _)
+        test_cases = {
+            "Col": "Col",
+            "SUM(x)": "SUM_x__b681e",
+            "SUM[x]": "SUM_x__ceaf6",
+            "12345_col": "_12345_col_b1415",
+        }
+        for original, expected in test_cases.items():
+            actual = 
DatastoreEngineSpec.make_label_compatible(column(original).name)
+            assert actual == expected
+
+    def test_timegrain_expressions(self):
+        """
+        DB Eng Specs (datastore): Test time grain expressions
+        """
+        col = column("temporal")
+        test_cases = {
+            "DATE": "DATE_TRUNC(temporal, HOUR)",
+            "TIME": "TIME_TRUNC(temporal, HOUR)",
+            "DATETIME": "DATETIME_TRUNC(temporal, HOUR)",
+            "TIMESTAMP": "TIMESTAMP_TRUNC(temporal, HOUR)",
+        }
+        for type_, expected in test_cases.items():
+            col.type = type_
+            actual = DatastoreEngineSpec.get_timestamp_expr(
+                col=col, pdf=None, time_grain="PT1H"
+            )
+            assert str(actual) == expected
+
+    def test_custom_minute_timegrain_expressions(self):
+        """
+        DB Eng Specs (datastore): Test time grain expressions
+        """
+        col = column("temporal")
+        test_cases = {
+            "DATE": "CAST(TIMESTAMP_SECONDS("
+            "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
+            ") AS DATE)",
+            "DATETIME": "CAST(TIMESTAMP_SECONDS("
+            "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
+            ") AS DATETIME)",
+            "TIMESTAMP": "CAST(TIMESTAMP_SECONDS("
+            "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
+            ") AS TIMESTAMP)",
+        }
+        for type_, expected in test_cases.items():
+            col.type = type_
+            actual = DatastoreEngineSpec.get_timestamp_expr(
+                col=col, pdf=None, time_grain="PT5M"
+            )
+            assert str(actual) == expected
+
+    def test_fetch_data(self):
+        """
+        DB Eng Specs (datastore): Test fetch data
+        """
+
+        # Mock a google.cloud.datastore.table.Row
+        class Row:
+            def __init__(self, value):
+                self._value = value
+
+            def values(self):
+                return self._value
+
+        data1 = [(1, "foo")]
+        with mock.patch.object(BaseEngineSpec, "fetch_data", 
return_value=data1):
+            result = DatastoreEngineSpec.fetch_data(None, 0)
+        assert result == data1
+
+        data2 = [Row(1), Row(2)]
+        with mock.patch.object(BaseEngineSpec, "fetch_data", 
return_value=data2):
+            result = DatastoreEngineSpec.fetch_data(None, 0)
+        assert result == [1, 2]

Review Comment:
   **Suggestion:** The `test_fetch_data` stub for the Datastore `Row` type 
returns a bare scalar from `values()` and asserts a list of scalars, which 
contradicts the `fetch_data` contract of returning a list of tuples and can 
hide bugs with multi-column rows; the stub should return a tuple (or sequence) 
and the assertion should expect a list of tuples. [logic error]
   
   <details>
   <summary><b>Severity Level:</b> Major ⚠️</summary>
   
   ```mdx
   - ❌ Test may miss multi-column fetch_data regressions.
   - ⚠️ Datastore fetch_data integration test weaker than intended.
   ```
   </details>
   
   ```suggestion
                   return (self._value,)
   
           data1 = [(1, "foo")]
           with mock.patch.object(BaseEngineSpec, "fetch_data", 
return_value=data1):
               result = DatastoreEngineSpec.fetch_data(None, 0)
           assert result == data1
   
           data2 = [Row(1), Row(2)]
           with mock.patch.object(BaseEngineSpec, "fetch_data", 
return_value=data2):
               result = DatastoreEngineSpec.fetch_data(None, 0)
           assert result == [(1,), (2,)]
   ```
   <details>
   <summary><b>Steps of Reproduction ✅ </b></summary>
   
   ```mdx
   1. Run the single integration test: pytest
   
tests/integration_tests/db_engine_specs/datastore_tests.py::TestDatastoreDbEngineSpec::test_fetch_data
   -q.
   
   2. The test defines a local Row stub at
   tests/integration_tests/db_engine_specs/datastore_tests.py:101-106 whose 
values() returns
   a bare scalar (the code visible in the test file).
   
   3. The test patches BaseEngineSpec.fetch_data to return a list of those Row 
objects at
   tests/integration_tests/db_engine_specs/datastore_tests.py:114-115.
   
   4. DatastoreEngineSpec.fetch_data 
(superset/db_engine_specs/datastore.py:320-326) calls
   r.values() for each Row; because the test stub returns a scalar, the 
resulting list
   contains scalars. The test asserts a list of scalars at tests/...:116, so it 
passes, but
   this contradicts the declared fetch_data contract (list[tuple]) and would 
not catch
   regressions where real Row.values() returns a sequence/tuple. Changing the 
stub to return
   a tuple and asserting [(1,), (2,)] makes the test verify the expected shape.
   ```
   </details>
   <details>
   <summary><b>Prompt for AI Agent 🤖 </b></summary>
   
   ```mdx
   This is a comment left during a code review.
   
   **Path:** tests/integration_tests/db_engine_specs/datastore_tests.py
   **Line:** 106:116
   **Comment:**
        *Logic Error: The `test_fetch_data` stub for the Datastore `Row` type 
returns a bare scalar from `values()` and asserts a list of scalars, which 
contradicts the `fetch_data` contract of returning a list of tuples and can 
hide bugs with multi-column rows; the stub should return a tuple (or sequence) 
and the assertion should expect a list of tuples.
   
   Validate the correctness of the flagged issue. If correct, How can I resolve 
this? If you propose a fix, implement it and please make it concise.
   ```
   </details>



##########
tests/integration_tests/db_engine_specs/datastore_tests.py:
##########
@@ -0,0 +1,516 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import unittest.mock as mock
+from datetime import datetime
+from typing import Any
+
+import pytest
+from marshmallow.exceptions import ValidationError
+from sqlalchemy import column

Review Comment:
   **Suggestion:** This integration test module patches 
`sqlalchemy_datastore.*` in a decorator, which causes the entire test module 
import to fail with `ModuleNotFoundError` when the optional 
`sqlalchemy_datastore` package is not installed; using 
`pytest.importorskip("sqlalchemy_datastore")` at module import time will skip 
the whole test module instead of crashing when the dependency is missing. 
[possible bug]
   
   <details>
   <summary><b>Severity Level:</b> Major ⚠️</summary>
   
   ```mdx
   - ❌ Integration test module import fails during collection.
   - ⚠️ CI or local test runs may error without optional dependency.
   ```
   </details>
   
   ```suggestion
   
   pytest.importorskip("sqlalchemy_datastore")
   ```
   <details>
   <summary><b>Steps of Reproduction ✅ </b></summary>
   
   ```mdx
   1. Ensure the optional package is not installed: uninstall 
sqlalchemy_datastore from the
   test environment (pip uninstall sqlalchemy-datastore).
   
   2. Run pytest collection for the integration tests: pytest
   tests/integration_tests/db_engine_specs/datastore_tests.py -q. Pytest will 
import the
   module at tests/integration_tests/db_engine_specs/datastore_tests.py (file 
top-level
   imports and decorators live in this file).
   
   3. During module import, the decorators at
   tests/integration_tests/db_engine_specs/datastore_tests.py:228-236 (the
   @mock.patch("sqlalchemy_datastore.base.create_datastore_client", ...) and
   @mock.patch("sqlalchemy_datastore._helpers.create_datastore_client", ...)) 
are applied;
   mock.patch with a string target attempts to resolve/import the target 
module, which raises
   ModuleNotFoundError when sqlalchemy_datastore is absent.
   
   4. Observe ModuleNotFoundError preventing test collection and causing the 
test run to
   fail. Using pytest.importorskip("sqlalchemy_datastore") at module import (as 
suggested)
   will skip the whole test module on missing optional dependency instead of 
crashing.
   ```
   </details>
   <details>
   <summary><b>Prompt for AI Agent 🤖 </b></summary>
   
   ```mdx
   This is a comment left during a code review.
   
   **Path:** tests/integration_tests/db_engine_specs/datastore_tests.py
   **Line:** 23:23
   **Comment:**
        *Possible Bug: This integration test module patches 
`sqlalchemy_datastore.*` in a decorator, which causes the entire test module 
import to fail with `ModuleNotFoundError` when the optional 
`sqlalchemy_datastore` package is not installed; using 
`pytest.importorskip("sqlalchemy_datastore")` at module import time will skip 
the whole test module instead of crashing when the dependency is missing.
   
   Validate the correctness of the flagged issue. If correct, How can I resolve 
this? If you propose a fix, implement it and please make it concise.
   ```
   </details>



##########
tests/integration_tests/db_engine_specs/datastore_tests.py:
##########
@@ -0,0 +1,516 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import unittest.mock as mock
+from datetime import datetime
+from typing import Any
+
+import pytest
+from marshmallow.exceptions import ValidationError
+from sqlalchemy import column
+from sqlalchemy.engine.url import make_url
+
+from superset.connectors.sqla.models import TableColumn
+from superset.db_engine_specs.base import BaseEngineSpec
+from superset.db_engine_specs.datastore import DatastoreEngineSpec
+from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
+from superset.superset_typing import ResultSetColumnType
+from tests.integration_tests.base_tests import SupersetTestCase
+from tests.integration_tests.fixtures.birth_names_dashboard import (
+    load_birth_names_dashboard_with_slices,  # noqa: F401
+    load_birth_names_data,  # noqa: F401
+)
+
+
+class TestDatastoreDbEngineSpec(SupersetTestCase):
+    def test_datastore_sqla_column_label(self):
+        """
+        DB Eng Specs (datastore): Test column label
+        """
+        # Expected labels with SHA-256 hash suffix (first 5 chars prefixed 
with _)
+        test_cases = {
+            "Col": "Col",
+            "SUM(x)": "SUM_x__b681e",
+            "SUM[x]": "SUM_x__ceaf6",
+            "12345_col": "_12345_col_b1415",
+        }
+        for original, expected in test_cases.items():
+            actual = 
DatastoreEngineSpec.make_label_compatible(column(original).name)
+            assert actual == expected
+
+    def test_timegrain_expressions(self):
+        """
+        DB Eng Specs (datastore): Test time grain expressions
+        """
+        col = column("temporal")
+        test_cases = {
+            "DATE": "DATE_TRUNC(temporal, HOUR)",
+            "TIME": "TIME_TRUNC(temporal, HOUR)",
+            "DATETIME": "DATETIME_TRUNC(temporal, HOUR)",
+            "TIMESTAMP": "TIMESTAMP_TRUNC(temporal, HOUR)",
+        }
+        for type_, expected in test_cases.items():
+            col.type = type_
+            actual = DatastoreEngineSpec.get_timestamp_expr(
+                col=col, pdf=None, time_grain="PT1H"
+            )
+            assert str(actual) == expected
+
+    def test_custom_minute_timegrain_expressions(self):
+        """
+        DB Eng Specs (datastore): Test time grain expressions
+        """
+        col = column("temporal")
+        test_cases = {
+            "DATE": "CAST(TIMESTAMP_SECONDS("
+            "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
+            ") AS DATE)",
+            "DATETIME": "CAST(TIMESTAMP_SECONDS("
+            "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
+            ") AS DATETIME)",
+            "TIMESTAMP": "CAST(TIMESTAMP_SECONDS("
+            "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
+            ") AS TIMESTAMP)",
+        }
+        for type_, expected in test_cases.items():
+            col.type = type_
+            actual = DatastoreEngineSpec.get_timestamp_expr(
+                col=col, pdf=None, time_grain="PT5M"
+            )
+            assert str(actual) == expected
+
+    def test_fetch_data(self):
+        """
+        DB Eng Specs (datastore): Test fetch data
+        """
+
+        # Mock a google.cloud.datastore.table.Row
+        class Row:
+            def __init__(self, value):
+                self._value = value
+
+            def values(self):
+                return self._value
+
+        data1 = [(1, "foo")]
+        with mock.patch.object(BaseEngineSpec, "fetch_data", 
return_value=data1):
+            result = DatastoreEngineSpec.fetch_data(None, 0)
+        assert result == data1
+
+        data2 = [Row(1), Row(2)]
+        with mock.patch.object(BaseEngineSpec, "fetch_data", 
return_value=data2):
+            result = DatastoreEngineSpec.fetch_data(None, 0)
+        assert result == [1, 2]
+
+    def test_extract_errors(self):
+        msg = "403 POST https://datastore.googleapis.com/: Access Denied: 
Project my-project: User does not have datastore.databases.create permission in 
project my-project"  # noqa: E501
+        result = DatastoreEngineSpec.extract_errors(Exception(msg))
+        assert result == [
+            SupersetError(
+                message="Unable to connect. Verify that the following roles 
are set "
+                'on the service account: "Cloud Datastore Viewer", '
+                '"Cloud Datastore User", "Cloud Datastore Creator"',
+                
error_type=SupersetErrorType.CONNECTION_DATABASE_PERMISSIONS_ERROR,
+                level=ErrorLevel.ERROR,
+                extra={
+                    "engine_name": "Google Datastore",
+                    "issue_codes": [
+                        {
+                            "code": 1017,
+                            "message": "",
+                        }
+                    ],
+                },
+            )
+        ]
+
+        msg = "datastore error: 404 Not found: Dataset fakeDataset:bogusSchema 
was not found in location"  # noqa: E501
+        result = DatastoreEngineSpec.extract_errors(Exception(msg))
+        assert result == [
+            SupersetError(
+                message='The schema "bogusSchema" does not exist. A valid 
schema must be used to run this query.',  # noqa: E501
+                error_type=SupersetErrorType.SCHEMA_DOES_NOT_EXIST_ERROR,
+                level=ErrorLevel.ERROR,
+                extra={
+                    "engine_name": "Google Datastore",
+                    "issue_codes": [
+                        {
+                            "code": 1003,
+                            "message": "Issue 1003 - There is a syntax error 
in the SQL query. Perhaps there was a misspelling or a typo.",  # noqa: E501
+                        },
+                        {
+                            "code": 1004,
+                            "message": "Issue 1004 - The column was deleted or 
renamed in the database.",  # noqa: E501
+                        },
+                    ],
+                },
+            )
+        ]
+
+        msg = 'Table name "badtable" missing dataset while no default dataset 
is set in the request'  # noqa: E501
+        result = DatastoreEngineSpec.extract_errors(Exception(msg))
+        assert result == [
+            SupersetError(
+                message='The table "badtable" does not exist. A valid table 
must be used to run this query.',  # noqa: E501
+                error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR,
+                level=ErrorLevel.ERROR,
+                extra={
+                    "engine_name": "Google Datastore",
+                    "issue_codes": [
+                        {
+                            "code": 1003,
+                            "message": "Issue 1003 - There is a syntax error 
in the SQL query. Perhaps there was a misspelling or a typo.",  # noqa: E501
+                        },
+                        {
+                            "code": 1005,
+                            "message": "Issue 1005 - The table was deleted or 
renamed in the database.",  # noqa: E501
+                        },
+                    ],
+                },
+            )
+        ]
+
+        msg = "Unrecognized name: badColumn at [1:8]"
+        result = DatastoreEngineSpec.extract_errors(Exception(msg))
+        assert result == [
+            SupersetError(
+                message='We can\'t seem to resolve column "badColumn" at line 
1:8.',
+                error_type=SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR,
+                level=ErrorLevel.ERROR,
+                extra={
+                    "engine_name": "Google Datastore",
+                    "issue_codes": [
+                        {
+                            "code": 1003,
+                            "message": "Issue 1003 - There is a syntax error 
in the SQL query. Perhaps there was a misspelling or a typo.",  # noqa: E501
+                        },
+                        {
+                            "code": 1004,
+                            "message": "Issue 1004 - The column was deleted or 
renamed in the database.",  # noqa: E501
+                        },
+                    ],
+                },
+            )
+        ]
+
+        msg = 'Syntax error: Expected end of input but got identifier "from_"'
+        result = DatastoreEngineSpec.extract_errors(Exception(msg))
+        assert result == [
+            SupersetError(
+                message='Please check your query for syntax errors at or near 
"from_". Then, try running your query again.',  # noqa: E501
+                error_type=SupersetErrorType.SYNTAX_ERROR,
+                level=ErrorLevel.ERROR,
+                extra={
+                    "engine_name": "Google Datastore",
+                    "issue_codes": [
+                        {
+                            "code": 1030,
+                            "message": "Issue 1030 - The query has a syntax 
error.",
+                        }
+                    ],
+                },
+            )
+        ]
+
+    @mock.patch("superset.models.core.Database.db_engine_spec", 
DatastoreEngineSpec)
+    @mock.patch(
+        "sqlalchemy_datastore.base.create_datastore_client",
+        mock.Mock(return_value=(mock.Mock(), mock.Mock())),
+    )
+    @mock.patch(
+        "sqlalchemy_datastore._helpers.create_datastore_client",
+        mock.Mock(return_value=(mock.Mock(), mock.Mock())),
+    )
+    @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
+    def test_calculated_column_in_order_by(self):
+        table = self.get_table(name="birth_names")
+        TableColumn(
+            column_name="gender_cc",
+            type="VARCHAR(255)",
+            table=table,
+            expression="""
+            case
+              when gender='boy' then 'male'
+              else 'female'
+            end
+            """,
+        )
+
+        table.database.sqlalchemy_uri = "datastore://"
+        query_obj = {
+            "groupby": ["gender_cc"],
+            "is_timeseries": False,
+            "filter": [],
+            "orderby": [["gender_cc", True]],
+        }
+        sql = table.get_query_str(query_obj)
+        assert "ORDER BY gender_cc ASC" in sql
+
+    def test_build_sqlalchemy_uri(self):
+        """
+        DB Eng Specs (datastore): Test building SQLAlchemy URI from parameters
+        """
+        parameters: dict[str, Any] = {"query": {}}
+        encrypted_extra = {
+            "credentials_info": {
+                "project_id": "my-project",
+                "private_key": "SECRET",
+            }
+        }
+        result = DatastoreEngineSpec.build_sqlalchemy_uri(parameters, 
encrypted_extra)
+        assert result == "datastore://my-project/?"
+
+        # Test with query parameters
+        parameters_with_query: dict[str, Any] = {"query": {"location": "US"}}
+        result = DatastoreEngineSpec.build_sqlalchemy_uri(
+            parameters_with_query, encrypted_extra
+        )
+        assert result == "datastore://my-project/?location=US"
+
+        # Test missing encrypted_extra raises ValidationError
+        with pytest.raises(ValidationError, match="Missing service 
credentials"):
+            DatastoreEngineSpec.build_sqlalchemy_uri(parameters, None)
+
+        # Test missing project_id raises ValidationError
+        bad_extra = {"credentials_info": {"private_key": "SECRET"}}
+        with pytest.raises(ValidationError, match="Invalid service 
credentials"):
+            DatastoreEngineSpec.build_sqlalchemy_uri(parameters, bad_extra)
+
+    def test_get_function_names(self):
+        """
+        DB Eng Specs (datastore): Test retrieving function names for 
autocomplete
+        """
+        database = mock.MagicMock()
+        result = DatastoreEngineSpec.get_function_names(database)
+        assert result == ["sum", "avg", "count"]
+
+    def test_get_view_names(self):
+        """
+        DB Eng Specs (datastore): Test that Datastore returns no view names
+        """
+        database = mock.MagicMock()
+        inspector = mock.MagicMock()
+        result = DatastoreEngineSpec.get_view_names(database, inspector, 
"some_schema")
+        assert result == set()
+
+    def test_validate_parameters(self):
+        """
+        DB Eng Specs (datastore): Test parameter validation returns no errors
+        """
+        result = DatastoreEngineSpec.validate_parameters(
+            {
+                "host": "localhost",
+                "port": 5432,
+                "username": "",
+                "password": "",
+                "database": "",
+                "query": {},
+            }
+        )
+        assert result == []
+
+    def test_get_allow_cost_estimate(self):
+        """
+        DB Eng Specs (datastore): Test that cost estimate is not supported
+        """
+        assert DatastoreEngineSpec.get_allow_cost_estimate({}) is False
+
+    def test_parse_error_exception(self):
+        """
+        DB Eng Specs (datastore): Test error message parsing extracts first 
line
+        """
+        multiline_msg = (
+            'datastore error: 400 Syntax error: Table "t" must be qualified.\n'
+            "\n"
+            "(job ID: abc-123)\n"
+            "\n"
+            "     -----Query Job SQL Follows-----\n"
+            "   1:select * from t\n"
+        )
+        result = 
DatastoreEngineSpec.parse_error_exception(Exception(multiline_msg))
+        assert str(result) == (
+            'datastore error: 400 Syntax error: Table "t" must be qualified.'
+        )
+
+        # Simple single-line messages pass through unchanged
+        simple_msg = "Some simple error"
+        result = 
DatastoreEngineSpec.parse_error_exception(Exception(simple_msg))
+        assert str(result) == simple_msg
+
+    def test_convert_dttm(self):
+        """
+        DB Eng Specs (datastore): Test datetime conversion for all supported 
types
+        """
+        dttm = datetime(2019, 1, 2, 3, 4, 5, 678900)
+
+        assert (
+            DatastoreEngineSpec.convert_dttm("DATE", dttm)
+            == "CAST('2019-01-02' AS DATE)"
+        )
+        assert (
+            DatastoreEngineSpec.convert_dttm("DATETIME", dttm)
+            == "CAST('2019-01-02T03:04:05.678900' AS DATETIME)"
+        )
+        assert (
+            DatastoreEngineSpec.convert_dttm("TIMESTAMP", dttm)
+            == "CAST('2019-01-02T03:04:05.678900' AS TIMESTAMP)"
+        )
+        assert (
+            DatastoreEngineSpec.convert_dttm("TIME", dttm)
+            == "CAST('03:04:05.678900' AS TIME)"
+        )
+        assert DatastoreEngineSpec.convert_dttm("UnknownType", dttm) is None
+
+    def test_get_parameters_from_uri(self):
+        """
+        DB Eng Specs (datastore): Test extracting parameters from URI
+        """
+        encrypted_extra = {
+            "credentials_info": {
+                "project_id": "my-project",
+                "private_key": "SECRET",
+            }
+        }
+        result = DatastoreEngineSpec.get_parameters_from_uri(
+            "datastore://my-project/", encrypted_extra
+        )
+        assert result == {
+            "credentials_info": {
+                "project_id": "my-project",
+                "private_key": "SECRET",
+            },
+            "query": {},
+        }
+
+        # URI with query parameters
+        result = DatastoreEngineSpec.get_parameters_from_uri(
+            "datastore://my-project/?location=US", encrypted_extra
+        )
+        assert result["query"] == {"location": "US"}
+
+        # Missing encrypted_extra raises ValidationError
+        with pytest.raises(ValidationError, match="Invalid service 
credentials"):
+            
DatastoreEngineSpec.get_parameters_from_uri("datastore://my-project/", None)
+
+    def test_get_dbapi_exception_mapping(self):
+        """
+        DB Eng Specs (datastore): Test DBAPI exception mapping includes
+        DefaultCredentialsError
+        """
+        from superset.db_engine_specs.exceptions import 
SupersetDBAPIConnectionError

Review Comment:
   **Suggestion:** The `test_get_dbapi_exception_mapping` test assumes that the 
`google.auth` package is installed and will fail when it is not, even though 
the engine spec can legitimately return an empty mapping in that case; adding 
`pytest.importorskip("google.auth")` inside the test ensures it is skipped 
rather than failing when the optional dependency is missing. [possible bug]
   
   <details>
   <summary><b>Severity Level:</b> Major ⚠️</summary>
   
   ```mdx
   - ❌ test_get_dbapi_exception_mapping fails without google.auth.
   - ⚠️ Test suite unstable across developer environments.
   ```
   </details>
   
   ```suggestion
           pytest.importorskip("google.auth")
   ```
   <details>
   <summary><b>Steps of Reproduction ✅ </b></summary>
   
   ```mdx
   1. Remove google-auth from the test environment (pip uninstall google-auth) 
so the
   optional google.auth package is unavailable.
   
   2. Run just this test: pytest
   
tests/integration_tests/db_engine_specs/datastore_tests.py::TestDatastoreDbEngineSpec::test_get_dbapi_exception_mapping
   -q.
   
   3. The test calls DatastoreEngineSpec.get_dbapi_exception_mapping
   (superset/db_engine_specs/datastore.py) and asserts len(mapping) == 1; when 
google.auth is
   missing the engine spec can legitimately return an empty mapping and the 
assertion fails,
   causing the test to fail.
   
   4. Adding pytest.importorskip("google.auth") at the top of the test (as 
suggested) will
   skip the test in environments missing the optional dependency instead of 
failing.
   ```
   </details>
   <details>
   <summary><b>Prompt for AI Agent 🤖 </b></summary>
   
   ```mdx
   This is a comment left during a code review.
   
   **Path:** tests/integration_tests/db_engine_specs/datastore_tests.py
   **Line:** 413:413
   **Comment:**
        *Possible Bug: The `test_get_dbapi_exception_mapping` test assumes that 
the `google.auth` package is installed and will fail when it is not, even 
though the engine spec can legitimately return an empty mapping in that case; 
adding `pytest.importorskip("google.auth")` inside the test ensures it is 
skipped rather than failing when the optional dependency is missing.
   
   Validate the correctness of the flagged issue. If correct, How can I resolve 
this? If you propose a fix, implement it and please make it concise.
   ```
   </details>



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to