[superset] branch master updated: feat: command to test DB engine specs (#24918)

beto Fri, 11 Aug 2023 10:11:04 -0700

This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git



The following commit(s) were added to refs/heads/master by this push:
     new a1e32dbfa6 feat: command to test DB engine specs (#24918)
a1e32dbfa6 is described below

commit a1e32dbfa695b22aa7f570b92f3d752d6bd3cbbd
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri Aug 11 10:10:49 2023 -0700

    feat: command to test DB engine specs (#24918)
---
 superset/cli/test_db.py            | 417 ++++++++++++++++++++
 superset/db_engine_specs/README.md | 763 +++++++++++++++++++++++++++++++++++++
 superset/db_engine_specs/base.py   |   2 +-
 superset/db_engine_specs/lib.py    | 314 +++++++++++++++
 superset/db_engine_specs/sqlite.py |   2 +-
 5 files changed, 1496 insertions(+), 2 deletions(-)

diff --git a/superset/cli/test_db.py b/superset/cli/test_db.py
new file mode 100644
index 0000000000..dc8787c723
--- /dev/null
+++ b/superset/cli/test_db.py
@@ -0,0 +1,417 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import sys
+from collections import defaultdict
+from datetime import datetime
+from typing import Any, Callable
+
+import click
+import yaml
+from rich.console import Console
+from sqlalchemy import (
+    Column,
+    create_engine,
+    DateTime,
+    ForeignKey,
+    insert,
+    Integer,
+    MetaData,
+    select,
+    String,
+    Table,
+)
+from sqlalchemy.engine import Engine
+from sqlalchemy.exc import NoSuchModuleError
+
+from superset.databases.utils import make_url_safe
+from superset.db_engine_specs import load_engine_specs
+from superset.db_engine_specs.base import BaseEngineSpec
+from superset.db_engine_specs.lib import (
+    ADVANCED_FEATURES,
+    BASIC_FEATURES,
+    DATABASE_DETAILS,
+    diagnose,
+    LIMIT_METHODS,
+    NICE_TO_HAVE_FEATURES,
+)
+
+metadata_obj = MetaData()
+
+user = Table(
+    "tmp_superset_test_table_user",
+    metadata_obj,
+    Column("user_id", Integer, primary_key=True),
+    Column("user_name", String(16), nullable=False),
+    Column("email_address", String(60), key="email"),
+    Column("nickname", String(50), nullable=False),
+)
+
+user_prefs = Table(
+    "tmp_superset_test_table_user_prefs",
+    metadata_obj,
+    Column("pref_id", Integer, primary_key=True),
+    Column("user_id", Integer, ForeignKey("user.user_id"), nullable=False),
+    Column("pref_name", String(40), nullable=False),
+    Column("pref_value", String(100)),
+)
+
+
+TestType = Callable[[Console, Engine], None]
+
+
+class TestRegistry:
+    def __init__(self) -> None:
+        self.tests: dict[str, Any] = defaultdict(list)
+
+    def add(self, *dialects: str) -> Callable[[TestType], TestType]:
+        def decorator(func: TestType) -> TestType:
+            for dialect in dialects:
+                self.tests[dialect].append(func)
+
+            return func
+
+        return decorator
+
+    def get_tests(self, dialect: str) -> list[TestType]:
+        return self.tests[dialect]
+
+
+registry = TestRegistry()
+
+
[email protected]("sqlite", "postgresql")
+def test_datetime(console: Console, engine: Engine) -> None:
+    """
+    Create a table with a timestamp column.
+    """
+    console.print("[bold]Testing datetime support...")
+
+    md = MetaData()
+    table = Table(
+        "test",
+        md,
+        Column("ts", DateTime),
+    )
+
+    try:
+        console.print("Creating a table with a timestamp column...")
+        md.create_all(engine)
+        console.print("[green]Table created!")
+
+        now = datetime.now()
+
+        console.print("Inserting timestamp value...")
+        stmt = insert(table).values(ts=now)
+        engine.execute(stmt)
+
+        console.print("Reading timestamp value...")
+        stmt = select(table)
+        row = engine.execute(stmt).fetchone()
+        assert row[0] == now
+        console.print(":thumbs_up: [green]Succcess!")
+    except Exception as ex:  # pylint: disable=broad-except
+        console.print(f"[red]Test failed: {ex}")
+        console.print("[bold]Exiting...")
+        sys.exit(1)
+
+
[email protected]()
[email protected]("sqlalchemy_uri")
[email protected](
+    "--connect-args",
+    "-c",
+    "raw_connect_args",
+    help="Connect args as JSON or YAML",
+)
+def test_db(sqlalchemy_uri: str, raw_connect_args: str | None = None) -> None:
+    """
+    Run a series of tests against an analytical database.
+
+    This command tests:
+
+      1. The Superset DB engine spec.
+      2. The SQLAlchemy dialect.
+      3. The database connectivity and performance.
+
+    It's useful for people developing DB engine specs and/or SQLAlchemy 
dialects, and
+    also to test new versions of DB API 2.0 drivers.
+
+    TODO:
+
+      - implement SSH tunneling
+      - implement server certificates
+
+    """
+    console = Console()
+    console.clear()
+
+    console.print("[bold]Collecting additional connection information...")
+    connect_args = collect_connection_info(console, sqlalchemy_uri, 
raw_connect_args)
+
+    console.print("[bold]\nChecking for a DB engine spec...")
+    test_db_engine_spec(console, sqlalchemy_uri)
+
+    console.print("[bold]\nTesting the SQLAlchemy dialect...")
+    engine = test_sqlalchemy_dialect(console, sqlalchemy_uri, connect_args)
+
+    console.print("[bold]\nTesting the database connectivity...")
+    test_database_connectivity(console, engine)
+
+
+def collect_connection_info(
+    console: Console,
+    sqlalchemy_uri: str,
+    raw_connect_args: str | None = None,
+) -> dict[str, Any]:
+    """
+    Collect ``connect_args`` if needed.
+    """
+    console.print(f"[green]SQLAlchemy URI: [bold]{sqlalchemy_uri}")
+    if raw_connect_args is None:
+        configure_connect_args = input(
+            "> Do you want to configure connection arguments? [y/N] "
+        )
+        if configure_connect_args.strip().lower() == "y":
+            console.print(
+                "Please paste the connect_args as JSON or YAML and press 
CTRL-D when "
+                "finished"
+            )
+            raw_connect_args = sys.stdin.read()
+        else:
+            raw_connect_args = "{}"
+
+    return yaml.safe_load(raw_connect_args)
+
+
+def test_db_engine_spec(
+    console: Console,
+    sqlalchemy_uri: str,
+) -> type[BaseEngineSpec] | None:
+    """
+    Test the DB engine spec, if available.
+    """
+    spec: type[BaseEngineSpec] | None = None
+    for spec in load_engine_specs():
+        try:
+            supported = spec.supports_url(make_url_safe(sqlalchemy_uri))
+        except NoSuchModuleError:
+            console.print("[red]No SQLAlchemy dialect found for the URI!")
+            console.print("[bold]Exiting...")
+            sys.exit(1)
+
+        if supported:
+            if spec.__module__.startswith("superset.db_engine_specs"):
+                console.print(
+                    f":thumbs_up: [green]Found DB engine spec: 
[bold]{spec.engine_name}"
+                )
+            else:
+                console.print(
+                    ":warning: [yellow]Found 3rd party DB engine spec: "
+                    f"[bold]{spec.engine_name} ({spec.__module__})"
+                )
+            break
+    else:
+        console.print(
+            ":thumbs_down: [red]No DB engine spec found for the SQLAlchemy 
URI. The "
+            "database can still be used with Superset, but some functionality 
may be "
+            "limited."
+        )
+
+    if spec is None:
+        return None
+
+    info = diagnose(spec)
+
+    console.print("About the database:")
+    console.print("  - Method used to apply LIMIT to queries:", 
info["limit_method"])
+    for k, v in LIMIT_METHODS.items():
+        console.print(f"    - {k}: {v}")
+    for key, feature in DATABASE_DETAILS.items():
+        console.print(f"  - {feature}:", info[key])
+
+    console.print("[bold]Checking for basic features...")
+    console.print("Supported time grains:")
+    for k, v in info["time_grains"].items():
+        score = " (+1)" if v else ""
+        console.print(f"  - {k}: {v}{score}")
+    for k, v in BASIC_FEATURES.items():
+        score = " (+10)" if info[k] else ""
+        console.print(f"{v}: {info[k]}{score}")
+
+    console.print("[bold]Checking for nice-to-have features...")
+    for k, v in NICE_TO_HAVE_FEATURES.items():
+        score = " (+10)" if info[k] else ""
+        console.print(f"{v}: {info[k]}{score}")
+
+    console.print("[bold]Checking for advanced features...")
+    for k, v in ADVANCED_FEATURES.items():
+        score = " (+10)" if info[k] else ""
+        console.print(f"{v}: {info[k]}{score}")
+
+    # pylint: disable=consider-using-f-string
+    console.print("[bold]Overall score: {score}/{max_score}".format(**info))
+
+    return spec
+
+
+def test_sqlalchemy_dialect(
+    console: Console,
+    sqlalchemy_uri: str,
+    connect_args: dict[str, Any],
+) -> Engine:
+    """
+    Test the SQLAlchemy dialect, making sure it supports everything Superset 
needs.
+    """
+    engine = create_engine(sqlalchemy_uri, connect_args=connect_args)
+    dialect = engine.dialect
+
+    console.print("[bold]Checking functions used by the inspector...")
+    keys = [
+        "get_schema_names",
+        "get_table_names",
+        "get_view_names",
+        "get_indexes",
+        "get_table_comment",
+        "get_columns",
+        "get_unique_constraints",
+        "get_check_constraints",
+        "get_pk_constraint",
+        "get_foreign_keys",
+    ]
+    for key in keys:
+        console.print(f"  - {key}:", hasattr(dialect, key))
+
+    console.print("[bold]Checking dialect attributes...")
+    if hasattr(dialect, "dbapi"):
+        console.print(f"  - dbapi: [bold]{dialect.dbapi.__name__}")
+    else:
+        console.print("  - dbapi:", None)
+
+    attrs = [
+        "name",
+        "driver",
+        "supports_multivalues_insert",
+    ]
+    for attr in attrs:
+        console.print(f"  - {attr}:", getattr(dialect, attr, None))
+
+    console.print("Supports do_ping:", hasattr(dialect, "do_ping"))
+    console.print(
+        "Can quote identifiers:",
+        hasattr(dialect, "identifier_preparer")
+        and hasattr(dialect.identifier_preparer, "quote"),
+    )
+
+    console.print(
+        "Doesn't require name normalization:",
+        not dialect.requires_name_normalize,
+    )
+    if dialect.requires_name_normalize:
+        console.print(
+            "  - Implements denormalize_name:", hasattr(dialect, 
"denormalize_name")
+        )
+
+    return engine
+
+
+# pylint: disable=too-many-statements
+def test_database_connectivity(console: Console, engine: Engine) -> None:
+    """
+    Tests the DB API 2.0 driver.
+    """
+    with console.status("[bold green]Connecting to database..."):
+        try:
+            conn = engine.raw_connection()
+            engine.dialect.do_ping(conn)
+            console.print(":thumbs_up: [green]Connected successfully!")
+        except Exception as ex:  # pylint: disable=broad-except
+            console.print(f":thumbs_down: [red]Failed to connect: {ex}")
+            console.print("[bold]Exiting...")
+            sys.exit(1)
+
+    cursor = conn.cursor()
+
+    console.print("[bold]Checking that we can run queries...")
+    console.print("sql> SELECT 1;")
+    cursor.execute("SELECT 1")
+    result = cursor.fetchone()[0]
+    color = "green" if result == 1 else "red"
+    console.print(f"[{color}]> {result}")
+
+    console.print("[bold]Checking that we can create tables...")
+    try:
+        metadata_obj.create_all(engine)
+        console.print("[green]Tables created!")
+    except Exception as ex:  # pylint: disable=broad-except
+        console.print(f"[red]Unable to create tables: {ex}")
+        console.print("[bold]Exiting...")
+        sys.exit(1)
+
+    console.print("[bold]Checking that we can insert data...")
+    stmt = insert(user).values(
+        user_name="beto",
+        email="[email protected]",
+        nickname="Beto",
+    )
+    try:
+        console.print(
+            "sql>",
+            stmt.compile(
+                dialect=engine.dialect,
+                compile_kwargs={"literal_binds": True},
+            ),
+        )
+        engine.execute(stmt)
+    except Exception as ex:  # pylint: disable=broad-except
+        console.print(f"[red]Unable to insert data: {ex}")
+        console.print("[bold]Exiting...")
+        sys.exit(1)
+
+    console.print("[bold]Checking that we can read data...")
+    stmt = select(user).where(user.c.user_name == "beto")
+    try:
+        console.print(
+            "sql>",
+            stmt.compile(
+                dialect=engine.dialect,
+                compile_kwargs={"literal_binds": True},
+            ),
+        )
+        result = engine.execute(stmt).fetchall()
+        console.print(f"[green]> {result}")
+    except Exception as ex:  # pylint: disable=broad-except
+        console.print(f"[red]Unable to read data: {ex}")
+        console.print("[bold]Exiting...")
+        sys.exit(1)
+
+    console.print("[bold]Checking that we can drop tables...")
+    try:
+        metadata_obj.drop_all(engine)
+        console.print("[green]Done!")
+    except Exception as ex:  # pylint: disable=broad-except
+        console.print(f"[red]Unable to drop tables: {ex}")
+        console.print("[bold]Exiting...")
+        sys.exit(1)
+
+    # run engine-specific tests
+    if tests := registry.get_tests(engine.dialect.name):
+        console.print("[bold]Running engine-specific tests...")
+        for test in tests:
+            test(console, engine)
diff --git a/superset/db_engine_specs/README.md 
b/superset/db_engine_specs/README.md
new file mode 100644
index 0000000000..1744160768
--- /dev/null
+++ b/superset/db_engine_specs/README.md
@@ -0,0 +1,763 @@
+# Database engine specifications
+
+Superset uses [SQLAlchemy](https://www.sqlalchemy.org/) as an abstraction 
layer for running queries and fetching metadata from tables (like column names 
and types). Unfortunately, while SQLAlchemy offers enough functionality to 
allow connecting Superset to dozens of databases, there are still 
implementation details that differ across them. Because of this, Superset has 
an additional abstraction on top of SQLAlchemy, called a "database engine 
specification" or, simply, "DB engine spec".
+
+DB engine specs were created initially because there's no SQL standard for 
computing aggregations at different time grains. For example, to compute a 
daily metric in Trino or Postgres we could run a query like this:
+
+```sql
+SELECT
+  date_trunc('day', CAST(time_column) AS TIMESTAMP) AS day,
+  COUNT(*) AS metric
+FROM
+  some_table
+GROUP BY
+  1
+```
+
+For MySQL, instead of using the `date_trunc` function, we would need to write:
+
+```sql
+SELECT
+  DATE(time_column) AS day,
+  COUNT(*) AS metric
+FROM
+  some_table
+GROUP BY
+  1
+```
+
+Over time, more and more functionality was added to DB engine specs, including 
validating SQL, estimating the cost of queries before they are run, and 
understanding the semantics of error messages. These are all described in 
detail in this document, and in the table below you can see a summary of what 
features are supported by each database.
+
+Note that DB engine specs are completely optional. Superset can connect to any 
database supported by SQLAlchemy (or 3rd party dialects) even if there's no DB 
engine spec associated with it. But DB engine specs greatly improve the 
experience of working with a database in Superset.
+
+## Features
+
+The table below (generated via `python superset/db_engine_specs/lib.py`) 
summarizes the information about the status of all DB engine specs in Superset 
(note that this excludes 3rd party DB engine specs):
+
+| Feature | Amazon Athena | Amazon DynamoDB | Amazon Redshift | Apache Drill | 
Apache Druid | Apache Hive | Apache Impala | Apache Kylin | Apache Pinot | 
Apache Solr | Apache Spark SQL | Ascend | Aurora MySQL (Data API) | Aurora 
PostgreSQL (Data API) | Azure Synapse | ClickHouse | ClickHouse Connect 
(Superset) | CockroachDB | CrateDB | Databricks | Databricks Interactive 
Cluster | Databricks SQL Endpoint | Dremio | DuckDB | ElasticSearch (OpenDistro 
SQL) | ElasticSearch (SQL API) | Exaso [...]
+|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  
---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  
---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  
---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  ---|  
---|  ---|
+| Module | superset.db_engine_specs.athena | superset.db_engine_specs.dynamodb 
| superset.db_engine_specs.redshift | superset.db_engine_specs.drill | 
superset.db_engine_specs.druid | superset.db_engine_specs.hive | 
superset.db_engine_specs.impala | superset.db_engine_specs.kylin | 
superset.db_engine_specs.pinot | superset.db_engine_specs.solr | 
superset.db_engine_specs.spark | superset.db_engine_specs.ascend | 
superset.db_engine_specs.aurora | superset.db_engine_specs.aurora | superset.d 
[...]
+| Method used to limit the rows in the subquery | FORCE_LIMIT | FORCE_LIMIT | 
FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | 
FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | 
FORCE_LIMIT | FORCE_LIMIT | WRAP_SQL | FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT 
| FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | 
FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | FETCH_MANY | 
FORCE_LIMIT | FORCE_LIMIT | FORCE_LIMIT | WRAP_SQL | F [...]
+| Supports JOINs | True | True | True | True | False | True | True | True | 
False | False | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | False | False | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True |
+| Supports subqueries | True | True | True | True | True | True | True | True 
| False | False | True | True | True | True | True | True | True | True | True 
| True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True |
+| Allows aliases in the SELECT statement | True | True | True | True | True | 
True | True | True | False | True | True | True | True | True | True | True | 
True | True | True | True | True | True | False | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True |
+| Allows referencing aliases in the ORDER BY statement | True | True | True | 
True | True | True | True | True | False | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True |
+| Supports secondary time columns | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
True | True | False | False | False | False | False | False | False | True | 
True | False | False | False | False | False | False | False | True | True | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False |
+| Allows ommiting time filters from inline GROUP BYs | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | True | True | False | False | False | False | False | False | 
False | True | True | False | False | False | False | False | False | False | 
True | True | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False |
+| Able to use source column when an alias overshadows it | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | True | False | 
False | False | False | False | False | False | False | True | False | False |
+| Allows aggregations in ORDER BY not present in the SELECT | True | True | 
True | True | True | False | True | True | True | True | False | True | True | 
True | True | True | True | True | True | True | False | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True |
+| Allows expressions in ORDER BY | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | True | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False |
+| Allows CTE as a subquery | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | False | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | False | True | False | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True |
+| Allows LIMIT clause (instead of TOP) | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | False | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | False | True | True | 
True | True | True | True | True | True | True | True | True | True | False | 
True | True | True |
+| Maximum column name | None | None | 127 | None | None | 767 | None | None | 
None | None | 767 | None | 64 | 63 | 128 | None | None | 63 | None | None | 767 
| None | None | None | None | None | 128 | None | None | 128 | None | 30 | None 
| None | None | 128 | 64 | 30 | 30 | None | None | 63 | None | 30 | None | None 
| 256 | 64 | 30 | None | None | None |
+| Allows comments | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | False | False | True | True | True | True | 
True | True | True | False | False | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True |
+| Colons must be escaped | False | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True |
+| Has time grain SECOND | True | True | True | True | True | True | False | 
True | True | False | True | True | True | True | True | False | False | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | False | True | True | 
True |
+| Has time grain FIVE_SECONDS | False | False | False | False | True | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | True | False | False | False | False | False | 
False | False | False | False | False | False | False | False | True | True | 
False | False | False | False | False | False |
+| Has time grain THIRTY_SECONDS | False | False | False | False | True | False 
| False | False | False | False | False | False | False | False | False | False 
| False | False | False | False | False | False | False | False | False | False 
| False | False | False | False | True | False | False | False | False | False 
| False | False | False | False | False | False | False | False | True | True | 
False | False | False | False | False | False |
+| Has time grain MINUTE | True | True | True | True | True | True | True | 
True | True | False | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True |
+| Has time grain FIVE_MINUTES | False | False | False | False | True | False | 
False | False | True | False | False | False | False | False | True | True | 
True | False | False | False | False | False | False | False | False | False | 
False | False | False | True | True | False | False | False | True | True | 
False | False | False | False | False | False | False | False | True | True | 
True | False | False | False | False | False |
+| Has time grain TEN_MINUTES | False | False | False | False | True | False | 
False | False | True | False | False | False | False | False | True | True | 
True | False | False | False | False | False | False | False | False | False | 
False | False | False | True | True | False | False | False | True | True | 
False | False | False | False | False | False | False | False | True | True | 
True | False | False | False | False | False |
+| Has time grain FIFTEEN_MINUTES | False | False | False | True | True | False 
| False | False | True | False | False | False | False | False | True | True | 
True | False | False | False | False | False | False | False | False | False | 
False | False | False | True | True | False | False | False | True | True | 
False | False | False | False | False | False | False | False | True | True | 
True | False | False | False | False | False |
+| Has time grain THIRTY_MINUTES | False | False | False | True | True | False 
| False | False | True | False | False | False | False | False | True | True | 
True | False | False | False | False | False | False | False | False | False | 
False | False | False | True | True | False | False | False | False | True | 
False | False | False | False | False | False | False | False | True | True | 
True | False | False | False | False | False |
+| Has time grain HALF_HOUR | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | True | False | False | False | True | False | 
False | False | False | False | False | False | False | False | True | True | 
False | False | False | False | False | False |
+| Has time grain HOUR | True | True | True | True | True | True | True | True 
| True | False | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True |
+| Has time grain SIX_HOURS | False | False | False | False | True | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | True | False | False | False | False | False | 
False | False | False | False | False | False | False | False | True | True | 
False | False | False | False | False | False |
+| Has time grain DAY | True | True | True | True | True | True | True | True | 
True | False | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True |
+| Has time grain WEEK | True | True | True | True | True | True | True | True 
| True | False | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | False | False | True | False | True | True | 
True | True | True | False | True | True | True | True | True | True | True | 
True | True | False | True | True | True | True | True | True | True | True |
+| Has time grain WEEK_STARTING_SUNDAY | True | True | False | False | True | 
True | False | False | False | False | True | False | False | False | True | 
False | False | False | False | True | True | True | False | False | False | 
False | False | False | False | False | True | False | False | False | True | 
True | False | False | False | False | True | False | False | False | True | 
True | False | False | False | True | False | True |
+| Has time grain WEEK_STARTING_MONDAY | False | True | False | False | False | 
False | False | False | False | False | False | False | True | False | True | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | True | True | False | False | False | True | 
True | True | False | False | False | True | False | False | False | True | 
True | False | True | False | True | False | True |
+| Has time grain WEEK_ENDING_SATURDAY | True | True | False | False | True | 
True | False | False | False | False | True | False | False | False | False | 
False | False | False | False | True | True | True | False | False | False | 
False | False | False | False | False | True | False | False | False | False | 
False | False | False | False | False | True | False | False | False | True | 
True | False | False | False | True | False | True |
+| Has time grain WEEK_ENDING_SUNDAY | False | True | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | True | False | False | False | False | 
False | False | False | False | False | True | False | False | False | True | 
True | False | False | False | True | False | True |
+| Has time grain MONTH | True | True | True | True | True | True | True | True 
| True | False | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True |
+| Has time grain QUARTER | True | True | True | True | True | True | True | 
True | True | False | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | False | False | True | False | True | 
True | True | True | True | False | True | True | True | False | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True |
+| Has time grain QUARTER_YEAR | False | False | False | False | False | False 
| False | False | False | False | False | False | False | False | False | False 
| False | False | False | False | False | False | False | False | False | False 
| False | False | False | False | True | False | False | False | False | False 
| False | True | False | False | False | False | False | False | True | True | 
False | False | False | False | False | False |
+| Has time grain YEAR | True | True | True | True | True | True | True | True 
| True | False | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True |
+| Masks/unmasks encrypted_extra | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | True | True | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False |
+| Has column type mappings | False | False | False | False | False | True | 
False | False | False | False | True | False | True | True | True | True | True 
| True | False | False | True | False | False | False | False | False | False | 
False | False | False | False | False | False | False | True | True | True | 
False | False | False | True | True | False | False | False | False | False | 
True | False | True | False | True |
+| Returns a list of function names | False | False | False | False | False | 
True | False | False | False | False | True | False | False | False | False | 
True | True | False | False | False | True | False | False | False | False | 
False | False | False | False | False | True | False | False | False | False | 
False | False | False | False | False | True | False | False | False | True | 
True | False | False | False | True | False | True |
+| Supports user impersonation | False | False | False | True | False | True | 
False | False | False | False | True | False | False | False | False | False | 
False | False | False | False | True | False | False | False | False | False | 
False | False | False | False | True | False | False | False | False | False | 
False | False | False | False | True | False | False | False | False | False | 
False | False | False | True | False | False |
+| Support file upload | True | True | True | True | True | True | True | True 
| True | True | True | True | True | True | True | False | False | True | True 
| True | True | True | True | True | True | True | True | True | True | True | 
False | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True |
+| Returns extra table metadata | False | False | False | False | False | True 
| False | False | False | False | True | False | False | False | False | False 
| False | False | False | False | True | False | False | False | False | False 
| False | False | False | True | True | False | False | False | False | False | 
False | False | False | False | True | False | False | False | False | False | 
False | False | False | True | False | False |
+| Maps driver exceptions to Superset exceptions | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False |
+| Parses error messages and returns Superset errors | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | True | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False |
+| Supports changing the schema per-query | False | False | False | True | 
False | True | False | False | False | False | True | False | True | True | 
False | False | False | True | False | False | True | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | True | False | False | False | True | True | False | False | 
False | False | True | True | False | True | False | True |
+| Supports catalogs | False | False | True | False | False | False | False | 
False | False | False | False | False | False | True | False | False | False | 
True | False | False | False | False | False | False | False | False | False | 
False | False | True | False | False | True | False | False | False | False | 
False | False | True | False | True | False | True | False | False | True | 
False | False | False | True | False |
+| Supports changing the catalog per-query | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False |
+| Can be connected thru an SSH tunnel | False | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | True | True | True | True | True | True | True | True | True | 
True | True | False | False | True | True | True | True | True | True | True | 
True | True | True | True | True | True | False | False | True | True | True | 
True | True | True |
+| Allows query to be canceled | False | False | True | False | False | True | 
True | False | False | False | True | True | True | True | False | False | 
False | True | False | False | True | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
True | True | False | False | True | True | False | False | False | False | 
True | True | False | True | False | False |
+| Returns additional metrics on dataset creation | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | False |
+| Supports querying the latest partition only | False | False | False | False 
| False | True | False | False | False | False | True | False | False | False | 
False | False | False | False | False | False | True | False | False | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | True | False | False | False | 
False | False | False | False | False | True | False | True |
+| Expands complex types (arrays, structs) into rows/columns | False | False | 
False | False | False | True | False | False | False | False | True | False | 
False | False | False | False | False | False | False | False | True | False | 
False | False | False | False | False | False | False | False | False | False | 
False | False | False | False | False | False | False | False | True | False | 
False | False | False | False | False | False | False | False | False | False |
+| Supports query cost estimation | False | False | False | False | False | 
True | False | False | False | False | True | False | False | True | False | 
False | False | True | False | False | True | False | False | False | False | 
False | False | False | False | True | False | False | False | False | False | 
False | False | False | False | False | True | True | False | False | False | 
False | False | False | False | True | False | True |
+| Supports validating SQL before running query | False | False | False | False 
| False | False | False | False | False | False | False | False | False | True 
| False | False | False | False | False | False | False | False | False | False 
| False | False | False | False | False | False | False | False | False | False 
| False | False | False | False | False | False | True | False | False | False 
| False | False | False | False | False | False | False | False |
+| Score | 20 | 32 | 48 | 50 | 37 | 120 | 37 | 28 | 32 | 20 | 120 | 38 | 59 | 
88 | 44 | 41 | 41 | 78 | 28 | 40 | 120 | 30 | 28 | 28 | 26 | 26 | 28 | 26 | 28 
| 63 | 61 | 28 | 38 | 26 | 44 | 44 | 59 | 38 | 28 | 38 | 132 | 78 | 28 | 37 | 
41 | 41 | 62 | 59 | 27 | 112 | 38 | 82 |
+
+(Note, this table is generated via: `python superset/db_engine_specs/lib.py`.)
+
+## Database information
+
+A DB engine spec has attributes that describe the underlying database engine, 
so that Superset can know how to build and run queries. For example, some 
databases don't support subqueries, which are needed for some of the queries 
produced by Superset for certain charts. When a database doesn't support 
subqueries the query is run in two-steps, using the results from the first 
query to build the second query.
+
+These attributes and their default values (set in the base class, 
`BaseEngineSpec`) are described below:
+
+### `limit_method = LimitMethod.FORCE_LIMIT`
+
+When running user queries in SQL Lab, Superset needs to limit the number of 
rows returned. The reason for that is cost and performance: there's no point in 
running a query that produces millions of rows when they can't be loaded into 
the browser.
+
+For most databases this is done by parsing the user submitted query and 
applying a limit, if one is not present, or replacing the existing limit if 
it's larger. This is called the `FORCE_LIMIT` method, and is the most 
efficient, since the database will produce at most the number of rows that 
Superset will display.
+
+For some databases this method might not work, and they can use the `WRAP_SQL` 
method, which wraps the original query in a `SELECT *` and applies a limit via 
the SQLAlchemy dialect, which should get translated to the correct syntax. This 
method might be inneficient, since the database optimizer might not be able to 
push the limit to the inner query.
+
+Finally, as a last resource there is the `FETCH_MANY` method. When a DB engine 
spec uses this method the query runs unmodified, but Superset fetches only a 
certain number of rows from the cursor. It's possible that a database using 
this method can optimize the query execution and compute rows as they are being 
read by the cursor, but it's unlikely. This makes this method the least 
efficient of the three.
+
+Note that when Superset runs a query with a given limit, say 100, it always 
modifies the query to request one additional row (`LIMIT 101`, in this case). 
This extra row is dropped before the results are returned to the user, but it 
allows Superset to inform the users that the query was indeed limited. 
Otherwise a query with `LIMIT 100` that returns exactly 100 rows would seem 
like it was limited, when in fact it was not.
+
+### `allows_joins = True`
+
+Not all databases support `JOIN`s. When building complex charts, Superset will 
try to join the table to itself in order to compute `top_n` groups, for 
example. If the database doesn't support joins Superset will instead run a 
prequery, and use the results to build the final query.
+
+### `allows_subqueries = True`
+
+Similarly, not all databases support subqueries. For more complex charts 
Superset will build subqueries if possible, or run the query in two-steps 
otherwise.
+
+### `allows_alias_in_select = True`
+
+Does the DB support aliases in the projection of a query, eg:
+
+```sql
+SELECT COUNT(*) AS cnt
+```
+
+Superset will try to use aliases whenever possible, in order to give friendly 
names to expressions.
+
+### `allows_alias_in_orderby = True`
+
+Does the DB support referencing alias in the `GROUP BY`, eg:
+
+```sql
+SELECT
+  UPPER(country_of_origin) AS country
+  COUNT(*) AS cnt
+FROM
+  some_table
+GROUP BY
+  country
+```
+
+Otherwise the query is written as:
+
+```sql
+SELECT
+  UPPER(country_of_origin) AS country
+  COUNT(*) AS cnt
+FROM
+  some_table
+GROUP BY
+  UPPER(country_of_origin)
+```
+
+### `time_secondary_columns = False`
+
+Datasets can have a main datatime column (`main_dttm_col`), but can also have 
secondary time columns. When this attribute is true, wheneve the secondary 
columns are filtered, the same filter is applied to the main datetime column.
+
+This might be useful if you have a table partitioned on a daily `ds` column in 
Hive (which doesn't support indexes), and a secondary column with the timestamp 
of the events, ie:
+
+|     ds     |        event        | ... |
+| ---------- | ------------------- | --- |
+| 2023-01-01 | 2023-01-01 23:58:41 | ... |
+| 2023-01-02 | 2023-01-02 00:03:17 | ... |
+| 2023-01-02 | 2023-01-02 00:14:02 | ... |
+
+With the table above, filtering only on `event` can be very innefective. For 
example, this query:
+
+```sql
+SELECT
+  *
+FROM
+  some_table
+WHERE
+  event BETWEEN '2023-01-02 00:00:00' AND '2023-01-02 01:00:00'
+```
+
+Would scan all the `ds` partitions, even though only one is needed! By setting 
the attribute to true, if `ds` is set as the main datetime column then the 
query would be generated as:
+
+```sql
+SELECT
+  *
+FROM
+  some_table
+WHERE
+  event BETWEEN '2023-01-02 00:00:00' AND '2023-01-02 01:00:00' AND
+  ds BETWEEN '2023-01-02 00:00:00' AND '2023-01-02 01:00:00'
+```
+
+Which reads data from a single partition instead.
+
+### `time_groupby_inline = False`
+
+In theory this attribute should be used to ommit time filters from the 
self-joins. When the attribute is false the time attribute will be present in 
the subquery used to compute limited series, eg:
+
+```sql
+SELECT DATE_TRUNC('day', ts) AS ts,
+       team AS team,
+       COUNT(*) AS count
+FROM public.threads
+JOIN
+  (SELECT team AS team__,
+          COUNT(*) AS mme_inner__
+   FROM public.threads
+   -- this is added when `time_groupby_inline = False`
+   WHERE ts >= TO_TIMESTAMP('2022-07-27 00:00:00.000000', 'YYYY-MM-DD 
HH24:MI:SS.US')
+     AND ts < TO_TIMESTAMP('2023-07-27 00:00:00.000000', 'YYYY-MM-DD 
HH24:MI:SS.US')
+   --
+   GROUP BY team
+   ORDER BY mme_inner__ DESC
+   LIMIT 5) AS anon_1 ON team = team__
+WHERE ts >= TO_TIMESTAMP('2022-07-27 00:00:00.000000', 'YYYY-MM-DD 
HH24:MI:SS.US')
+  AND ts < TO_TIMESTAMP('2023-07-27 00:00:00.000000', 'YYYY-MM-DD 
HH24:MI:SS.US')
+GROUP BY DATE_TRUNC('day', ts),
+         team
+ORDER BY count DESC
+LIMIT 10000;
+```
+
+In practice, the attribute doesn't seem to be working as of 2023-07-27.
+
+### `allows_alias_to_source_column = True`
+
+When this is true the database allows queries where alias can overshadow 
existing column names. For example, in this query:
+
+```sql
+SELECT
+  foo + 1 AS foo
+FROM
+  some_table
+ORDER BY
+  foo  -- references the alias `foo + 1`, not the column `foo`
+```
+
+### `allows_hidden_orderby_agg = True`
+
+If set to true the database allows expressions in the `GROUP BY` that are not 
present in the projection (`SELECT`), eg:
+
+```sql
+SELECT
+  country,
+  COUNT(*)
+FROM
+  some_table
+GROUP BY
+  country
+ORDER BY
+  SUM(population)  -- not present in the `SELECT`
+```
+
+### `allows_hidden_cc_in_orderby = False`
+
+This the opposite of `allows_alias_in_orderby`, for databases that require 
aliases in the `ORDER BY`. For example, BigQuery doesn't like this query:
+
+```sql
+SELECT
+  CASE
+    WHEN type = 'feature' THEN 'f'
+    WHEN type = 'bug' THEN 'b'
+    ELSE 'o'
+  END AS cc_type
+FROM
+  some_table
+GROUP BY
+  cc_type
+ORDER BY
+  CASE
+    WHEN type = 'feature' THEN 'f'
+    WHEN type = 'bug' THEN 'b'
+    ELSE 'o'
+  END
+```
+
+Instead, it must be written as:
+
+```sql
+SELECT
+  CASE
+    WHEN type = 'feature' THEN 'f'
+    WHEN type = 'bug' THEN 'b'
+    ELSE 'o'
+  END AS cc_type
+FROM
+  some_table
+GROUP BY
+  cc_type
+ORDER BY
+  cc_type
+```
+
+### `allows_cte_in_subquery = True`
+
+When a virtual dataset is used in a chart the original query is converted into 
a subquery, and is wrapped in an outer query that is generated based on the 
chart controls. The virtual dataset query might have a CTE, and some databases 
don't like subqueries with CTEs in them.
+
+When this attribute is false Superset will extract the CTE and move it outside 
of the subquery when generating SQL for charts. The name of the new CTE will be 
`cte_alias`, also defined in the DB engine spec.
+
+### `allow_limit_clause = True`
+
+Allows for the `LIMIT` clause. Otherwise, the database probably uses `TOP` to 
limit rows.
+
+### `max_column_name_length: int | None = None`
+
+Most databases have a well defined limit for the maximum length of a column 
name (SQLite is probably the one exception). While the can be set (and 
defaults) to `None,` it's highly recommended to set a value to prevent errors.
+
+### `allows_sql_comments = True`
+
+Are comments supported in the DB? In general SQL in comments are defined by 
double dashes:
+
+```sql
+-- this is a comment
+SELECT *  -- we need everything
+FROM some_table
+```
+
+### `allows_escaped_colons = True`
+
+SQLAlchemy recommends escaping colons to prevent them from being interpreted 
as bindings to parameters. Because of this, when building queries from virtual 
datasets Superset will escape all colons with `\:`.
+
+This works for most databases except Athena. The `allows_escaped_colons` 
attribute specifies if the database supports the escape colon.
+
+## Basic features
+
+These are features that all DB engine specs should support, as the name 
suggests. They provide a much better user experience for the user.
+
+### Time grains
+
+The most basic feature that DB engine specs need to support is defining time 
grain expressions. These are dialect-specific SQL expressions that are used to 
compute metrics on a given time grain when building charts. For example, when 
computing the metric `COUNT(*)` on a daily basis, Superset will generate the 
following query:
+
+```sql
+SELECT
+  <DB engine spec expression for TimeGrain.DAY>,
+  COUNT(*)
+...
+GROUP BY
+  <DB engine spec expression for TimeGrain.DAY>
+```
+
+For some databases with support for `DATE_TRUNC` or `TIME_FLOOR` this is easy. 
Here's how Apache Druid computes 15 minute aggregations:
+
+```sql
+TIME_FLOOR(CAST({col} AS TIMESTAMP), 'PT15M')
+```
+
+Where `{col}` is the time column being aggregated — the expression is actually 
a Jinja2 template. Druid uses the ISO standard for durations, with `PT15M` 
representing 15 minutes.
+
+On the other and, here's the same for SQLite:
+
+```sql
+DATETIME(
+  STRFTIME(
+    '%Y-%m-%dT%H:%M:00',
+    {col}
+  ),
+  printf(
+    '-%d minutes',
+    CAST(strftime('%M', {col}) AS INT) % 15
+  )
+)
+```
+
+The SQLite version has to truncate the column down to the minute, and then 
subtract a number of minutes equals to the modulo 15.
+
+Time grain expressions are defined in the `_time_grain_expressions` class 
attribute, which maps from a `superset.constants.TimeGrain` to the SQL 
expression. The dictionary has a special key `None`, that should map to the 
column directly, for when no time grain is specified.
+
+Note that it's possible to add new time grains via configuration. For example, 
if you want to add a "2 seconds" time grain to your installation you can add it 
to `TIME_GRAIN_ADDONS`, and implement it in `TIME_GRAIN_ADDON_EXPRESSIONS`:
+
+```python
+# superset_config.py
+TIME_GRAIN_ADDONS = {"PT2S": "2 second"}
+
+TIME_GRAIN_ADDON_EXPRESSIONS = {
+    "clickhouse": {
+        "PT2S": "toDateTime(intDiv(toUInt32(toDateTime({col})), 2)*2)",
+    }
+}
+```
+
+### Column type mapping
+
+Column type mapping, defined in the `column_type_mappings` class attribute, is 
just a way of mapping type names from the database to types Superset 
understand. The default values in `BaseEngineSpec` are sane:
+
+```python
+_default_column_type_mappings: tuple[ColumnTypeMapping, ...] = (
+    (
+        re.compile(r"^string", re.IGNORECASE),
+        types.String(),
+        GenericDataType.STRING,
+    ),
+    (
+        re.compile(r"^float", re.IGNORECASE),
+        types.Float(),
+        GenericDataType.NUMERIC,
+    ),
+    (
+        re.compile(r"^date", re.IGNORECASE),
+        types.Date(),
+        GenericDataType.TEMPORAL,
+    ),
+    (
+        re.compile(r"^bool(ean)?", re.IGNORECASE),
+        types.Boolean(),
+        GenericDataType.BOOLEAN,
+    ),
+    ...
+)
+```
+
+But you might want to implement more specific types in the DB engine spec, or 
complex types. For example, for MSSQL we have:
+
+```python
+from sqlalchemy.dialects.mssql.base import SMALLDATETIME
+
+class MssqlEngineSpec(BaseEngineSpec):
+    ...
+    column_type_mappings = (
+        (
+            re.compile(r"^smalldatetime.*", re.IGNORECASE),
+            SMALLDATETIME(),
+            GenericDataType.TEMPORAL,
+        ),
+    )
+```
+
+### Function names
+
+DB engine specs should implement a class method called `get_function_names` 
that retuns a list of strings, representing all the function names that the 
database supports. This is used for autocomplete in SQL Lab.
+
+### Masked encrypted extra
+
+Superset does a good job in keeping credentials secure. When you add a 
database with a password, for example:
+
+```
+postgresql://admin:[email protected]:5432/db
+```
+
+The password is sent over the network only when the database is created. When 
you edit the database later, Superset will return this as the SQLAlchemy URI:
+
+```
+postgresql://admin:[email protected]:5432/db
+```
+
+The password will be masked in the API response; it's not just masked in the 
browser UI. This is done in order to avoid sending the password unnecessarily 
over the network. Also, if a non-admin user has access to the API response, 
they won't be able to know the database password.
+
+When the database is edited, the Superset backend is smart enough to replace 
the masked password with the actual password, unless the password has changed. 
That is, if you change the database in the URI from `db` to `db2` the 
SQLAlchemy URI will be stored in the backend as:
+
+```
+postgresql://admin:[email protected]:5432/db2
+```
+
+The password is not the only piece of information where security is critical. 
For many databases (like BigQuery), sensitive information is stored in the 
credentials JSON payload. For example:
+
+```json
+{
+  "type": "service_account",
+  "project_id": "dbt-tutorial-347100",
+  "private_key_id": "4bc71f06990c864a590fad8b94be6a5904fc171f",
+  "private_key": "<SENSITIVE INFORMATION>",
+  "client_email": "[email protected]",
+  "client_id": "115666988796889519425",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth";,
+  "token_uri": "https://oauth2.googleapis.com/token";,
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs";,
+  "client_x509_cert_url": 
"https://www.googleapis.com/robot/v1/metadata/x509/dbt-user-278%40dbt-tutorial-347100.iam.gserviceaccount.com";
+}
+```
+
+Similarly to password, we don't want to send `private_key` to the client when 
a database is edited; the Superset API should never return its actual contents. 
Instead, Superset should return a masked value, and users should be able to 
edit the JSON without having to type in the `private_key` on every edit.
+
+To do this, DB engine specs and implement 2 methods, `mask_encrypted_extra` 
and `unmask_encrypted_extra`. They have these names because the credentials are 
stored in an encrypted column called `encrypted_extra`. Here's how these 
methods look like for BigQuery:
+
+```python
+from superset.constants import PASSWORD_MASK
+
+
+class BigQueryEngineSpec(BaseEngineSpec):
+
+    @classmethod
+    def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None:
+        if encrypted_extra is None:
+            return encrypted_extra
+
+        try:
+            config = json.loads(encrypted_extra)
+        except (json.JSONDecodeError, TypeError):
+            return encrypted_extra
+
+        try:
+            config["credentials_info"]["private_key"] = PASSWORD_MASK
+        except KeyError:
+            pass
+
+        return json.dumps(config)
+
+    @classmethod
+    def unmask_encrypted_extra(
+        cls,
+        old: str | None,
+        new: str | None
+    ) -> str | None:
+        if old is None or new is None:
+            return new
+
+        try:
+            old_config = json.loads(old)
+            new_config = json.loads(new)
+        except (TypeError, json.JSONDecodeError):
+            return new
+
+        if "credentials_info" not in new_config:
+            return new
+
+        if "private_key" not in new_config["credentials_info"]:
+            return new
+
+        if new_config["credentials_info"]["private_key"] == PASSWORD_MASK:
+            new_config["credentials_info"]["private_key"] = old_config[
+                "credentials_info"
+            ]["private_key"]
+
+        return json.dumps(new_config)
+```
+
+This way, when a user edits an existing BigQuery connection, the `private_key` 
is shown as `XXXXXXXXXX`. Everything else in the JSON is still displayed, and 
the user can change any of the fields without having to provide the private key.
+
+Note that while this is a basic feature that should be implemented for 
security reasons, it only makes sense in DB engine specs that use 
`encrypted_extra` to store connection information.
+
+## Nice to have features
+
+The next set of features are nice to have. They don't apply to all databases, 
and are not strictly needed for security or usability.
+
+### User impersonation
+
+In general there's no user-level granularity when accessing a database in 
Superset. A single database connection is shared by all users who have access 
to that database. There are many use cases when this is not desirable, and some 
databases implement mechanisms in which they can **impersonate users**, 
potentially reducing the scope of permissions available to run the query.
+
+For example, the Google Sheets DB engine spec implements this via the 
`get_url_for_impersonation` class method:
+
+```python
+class GSheetsEngineSpec(ShillelaghEngineSpec):
+
+    @classmethod
+    def get_url_for_impersonation(
+        cls,
+        url: URL,
+        impersonate_user: bool,
+        username: str | None,
+    ) -> URL:
+        if impersonate_user and username is not None:
+            user = security_manager.find_user(username=username)
+            if user and user.email:
+                url = url.update_query_dict({"subject": user.email})
+
+        return url
+```
+
+The method `get_url_for_impersonation` updates the SQLAlchemy URI before every 
query. In this particular case, it will fetch the user's email and add it to 
the `subject` query argument. The driver will then lower the permissions to 
match that given user. This allows the connection to be configured with a 
service account that has access to all the spreadsheets, while giving users 
access to only the spreadsheets they own are have been shared with them (or 
with their organization — Google w [...]
+
+Alternatively, it's also possible to impersonate users by implemeneting the 
`update_impersonation_config`. This is a class method which modifies 
`connect_args` in place. You can use either method, and ideally they [should be 
consolidated in a single one](https://github.com/apache/superset/issues/24910).
+
+### File upload
+
+When a DB engine spec supports file upload it declares so via the 
`supports_file_upload` class attribute. The base class implementation is very 
generic and should work for any database that has support for `CREATE TABLE`. 
It leverages Pandas and the 
[`df_to_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html)
 method.
+
+For some databases the `df_to_sql` classmethod needs to be implemented. For 
example, for BigQuery the DB engine spec implements a custom method that uses 
the 
[`to_gbq`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_gbq.html)
 method.
+
+### Extra table metadata
+
+DB engine specs can return additional metadata associated with a table. This 
is done via the `extra_table_metadata` class method. Trino uses this to return 
information about the latest partition, for example, and Bigquery returns 
clustering information. This information is then surfaced in the SQL Lab UI, 
when browsing tables in the metadata explorer (on the left panel).
+
+### DB API exception mapping
+
+Different DB API 2.0 drivers implement different exceptions, even if they have 
the same name. The `get_dbapi_exception_mapping` class method returns a 
dictionary mapping these custom exceptions to Superset exceptions, so that 
Superset can return more specific errors when an exception is raised by the 
underlying driver.
+
+For example, for ClickHouse we have:
+
+```python
+from urllib3.exceptions import NewConnectionError
+
+from superset.db_engine_specs.exceptions import SupersetDBAPIDatabaseError
+
+
+class ClickHouseEngineSpec(ClickHouseBaseEngineSpec):
+
+    @classmethod
+    def get_dbapi_exception_mapping(cls) -> dict[type[Exception], 
type[Exception]]:
+        return {NewConnectionError: SupersetDBAPIDatabaseError}
+```
+
+This way, if the ClickHouse driver raises a `NewConnectionError` it would get 
wrapped in a `SupersetDBAPIDatabaseError`.
+
+### Custom errors
+
+Queries can fail in many different ways. For example, in SQLite:
+
+```sql
+sqlite> CREATE TABLE a (b INT);
+sqlite> SELECT c FROM a;
+Error: no such column: c
+sqlite>
+```
+
+When a query fails, Superset will return the message, "Error: no such column: 
c", to the user as a generic error.
+
+Since ideally we want to return specific and actionable error messages, DB 
engine specs can implement methods that map error messages to more specific 
errors. For example, the SQLite DB engine specs defines:
+
+```python
+COLUMN_DOES_NOT_EXIST_REGEX = re.compile("no such column: (?P<column_name>.+)")
+
+
+class SqliteEngineSpec(BaseEngineSpec):
+
+    custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, 
Any]]] =
+        COLUMN_DOES_NOT_EXIST_REGEX: (
+            __('We can\'t seem to resolve the column "%(column_name)s"'),
+            SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR,
+            {},
+        ),
+    }
+```
+
+This way, when a user selects a column that doesn't exist Superset can return 
a more informative error.
+
+### Dynamic schema
+
+In SQL Lab it's possible to select a database, and then a schema in that 
database. Ideally, when running a query in SQL Lab, any unqualified table names 
(eg, `table`, instead of `schema.table`) should be in the selected schema. For 
example, if the user select `dev` as the schema and then runs the following 
query:
+
+```sql
+SELECT * FROM my_table
+```
+
+The table `my_table` should live in the `dev` schema. In order to do that, 
it's necessary to modify the SQLAlchemy URI before running the query. Since 
different databases have different ways of doing that, this functionality is 
implemented via the `adjust_engine_params` class method. The method receives 
the SQLAlchemy URI and `connect_args`, as well as the schema in which the query 
should run. It then returns a potentially modified URI and `connect_args` to 
ensure that the query runs in  [...]
+
+When a DB engine specs implements `adjust_engine_params` it should have the 
class attribute `supports_dynamic_schema` set to true. This is critical for 
security, since **it allows Superset to know to which schema any unqualified 
table names belong to**. For example, in the query above, if the database 
supports dynamic schema, Superset would check to see if the user running the 
query has access to `dev.my_table`. On the other hand, if the database doesn't 
support dynamic schema, Superset  [...]
+
+Implementing this method is also important for usability. When the method is 
not implemented selecting the schema in SQL Lab has no effect on the schema in 
which the query runs, resulting in a confusing results when using unqualified 
table names.
+
+### Catalog
+
+In general, databases support a hierarchy of concepts of one-to-many concepts:
+
+1. Database
+2. Catalog
+3. Namespace
+4. Table
+5. Column
+
+These concepts have different names depending on the database. For example, 
Postgres uses the following terminology:
+
+1. Cluster (database)
+2. Database (catalog)
+3. Schema (namespace)
+4. Table
+5. Column
+
+BigQuery, on the other hand:
+
+1. Bigquery (database)
+2. Project (catalog)
+3. Schema (namespace)
+4. Table
+5. Column
+
+Hive and Trino:
+
+1. Database
+2. Catalog
+3. Schema
+4. Table
+5. Column
+
+If the database supports catalogs, then the DB engine spec should have the 
`supports_catalog` class attribute set to true.
+
+### Dynamic catalog
+
+Superset has no support for multiple catalogs. A given SQLAlchemy URI connects 
to a single catalog, and it's impossible to browse other catalogs, or change 
the catalog. This means that datasets can only be added for the main catalog of 
the database. For example, with this Postgres SQLAlchemy URI:
+
+```
+postgresql://admin:[email protected]:5432/db
+```
+
+Here, datasets can only be added to the `db` catalog (which Postgres calls a 
"database").
+
+One confusing problem is that many databases allow querying across catalogs in 
SQL Lab. For example, with BigQuery one can write:
+
+```sql
+SELECT * FROM project.schema.table
+```
+
+This means that **even though the database is configured for a given catalog 
(project), users can query other projects**. This is a common workaround for 
creating datasets in catalogs other than the catalog configured in the 
database: just create a virtual dataset.
+
+Ideally we would want users to be able to choose the catalog when using SQL 
Lab and when creating datasets. In order to do that, DB engine specs need to 
implement a method that rewrites the SQLAlchemy URI depending on the desired 
catalog. This method already exists, and is the same method used for dynamic 
schemas, `adjust_engine_params`, but currently there are no UI affordances for 
choosing a catalog.
+
+Before the UI is implemented Superset still needs to implement support for 
catalogs in its security manager. But in the meantime, it's possible for DB 
engine spec developers to support dynamic catalogs, by setting 
`supports_dynamic_catalog` to true and implementing `adjust_engine_params` to 
handle a catalog.
+
+### SSH tunneling
+
+Superset can connect to databases via an SSH tunnel. For databases where this 
doesn't make sense (eg, SQLite or BigQuery) the DB engine spec should have 
`disable_ssh_tunneling` set to true.
+
+### Query cancelation
+
+Superset will try to cancel running queries if the users wants so, but it's up 
to the DB engine spec to handle this.
+
+Some databases have an implicit query cancelation. When a cursor stops being 
polled it will cancel the query. For databases that behave like this, the class 
method `has_implicit_cancel` (which should really be a class attribute) should 
return true.
+
+For other databases, DB engine specs can implement query cancelation via the 
`prepare_cancel_query` and `cancel_query` methods. Implementation of query 
cancelation is usually heavily dependent on the database, but the DB engine 
specs that support it can serve as an example.
+
+### Get metrics on dataset creation
+
+When a physical dataset is first created, the `get_metrics` class method is 
called on the table. The base implementation returns the `COUNT(*)` metric, but 
DB engine specs can override `get_metrics` to return other metrics. This method 
is useful for semantic layers that contain their own metrics definitions; when 
Superset connect to them it can automatically create those metrics when a 
dataset is added.
+
+This feature is still experimental, and ideally there would be a mechanism for 
calling it periodically or when a dataset is explored, in order to sync new 
metric definitions to the dataset.
+
+### `WHERE` on latest partition
+
+In some databases, running `SELECT *` can be a **very expensive** operation, 
since the query might scan all partitions for a given table. Because of that, 
some DB engine specs implement the `where_latest_partition` method, which 
returns a modified SQLAlchemy query with an additional predicate that filters 
on the latest partition.
+
+## Advanced features
+
+### Expand complex types
+
+Some databases will visually expand complex types (arrays and structures) when 
displaying results from queries. For example, the BigQuery UI is able to expand 
objects into columns and array into rows, so that this:
+
+|   array   |      struct      |
+| --------- | ---------------- |
+| [1, 2, 3] | {a: one, b: two} |
+
+Is shown as:
+
+| array |      struct      | struct.a | struct.b |
+| ----- | ---------------- | -------- | -------- |
+| 1     | {a: one, b: two} | one      | two      |
+| 2     |                  |          |          |
+| 3     |                  |          |          |
+
+A similar behavior has been implemented in Superset for Presto, and can be 
enabled via the `PRESTO_EXPAND_DATA` feature flag. To implement this feature a 
DB engine spec should implement the `expand_data` method, which takes the 
columns and rows and returns modified columns and rows.
+
+Note that despite being implemented only for Presto, this behavior has nothing 
that is Presto specific, and in theory could be implemented in a generic way 
for all database without requiring custom DB engine spec implementations (that 
is, the Presto `expand_data` method could be moved to the base class, after 
being cleaned up, and we could then enable the feature per DB in the 
configuration).
+
+### Query cost estimation
+
+Some databases allow uses to estimate the cost of running a query before 
running it. This is done via the `estimate_query_cost` method in DB engine 
specs, which receives the SQL and returns a list of "costs". The definition of 
what "cost" is varies from database to database (in the few that support this 
functionality), and it can be formatted via the `query_cost_formatter`.
+
+The `query_cost_formatter` can be overriden with an arbitrary function via the 
config `QUERY_COST_FORMATTERS_BY_ENGINE`. This allows custom deployments of 
Superset to format the results in different ways. For example, at some point in 
Lyft the cost for running Presto queries would also show the carbon footprint 
(in trees).
+
+### SQL validation
+
+A few databases support validating the syntax of the SQL as the user is typing 
it, indicating in SQL Lab any errors. This is usually done using an `EXPLAIN` 
query and, because it gets called every few seconds as the user types, it's 
important that the database returns the result quickly.
+
+This is currently implement for Presto and Postgres, via custom classes in 
`superset/sql_validators` that should be enabled in the configuration. 
Implementing this as custom classes, instead of a `validate_sql` method in the 
DB engine spec offerts no advantages, and ideally in the future we should move 
the logic to DB engine specs.
+
+## Testing DB engine specs
+
+Superset has a command to test the connection to a given database, as well as 
checking if the SQLAlchemy dialect implements all necessary methods used by 
Superset, and checking which features are supported by the DB engine spec (if 
one exists). To run the tool just call the `test-db` command with the 
SQLAlchemy URI to be tested:
+
+```bash
+superset test-db sqlite://
+```
+
+If the connection needs additional arguments thay can be passed when the 
command runs.
diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
index d3ffce018a..2572f779d3 100644
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@@ -364,7 +364,7 @@ class BaseEngineSpec:  # pylint: 
disable=too-many-public-methods
 
     force_column_alias_quotes = False
     arraysize = 0
-    max_column_name_length = 0
+    max_column_name_length: int | None = None
     try_remove_schema_from_table_name = True  # pylint: disable=invalid-name
     run_multiple_statements_as_one = False
     custom_errors: dict[
diff --git a/superset/db_engine_specs/lib.py b/superset/db_engine_specs/lib.py
new file mode 100644
index 0000000000..584cc4299c
--- /dev/null
+++ b/superset/db_engine_specs/lib.py
@@ -0,0 +1,314 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from typing import Any
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs import load_engine_specs
+from superset.db_engine_specs.base import BaseEngineSpec
+
+LIMIT_METHODS = {
+    "FORCE_LIMIT": (
+        "modifies the query, replacing an existing LIMIT or adding a new one"
+    ),  # E: line too long (89 > 79 characters)
+    "WRAP_SQL": "wraps the original query in a SELECT * with a LIMIT",
+    "FETCH_MANY": (
+        "runs the query unmodified but fetchs only LIMIT rows from the cursor"
+    ),  # E: line too long (89 > 79 characters)
+}
+
+DATABASE_DETAILS = {
+    "limit_method": "Method used to limit the rows in the subquery",
+    "joins": "Supports JOINs",
+    "subqueries": "Supports subqueries",
+    "alias_in_select": "Allows aliases in the SELECT statement",
+    "alias_in_orderby": "Allows referencing aliases in the ORDER BY statement",
+    "secondary_time_columns": "Supports secondary time columns",
+    "time_groupby_inline": (
+        "Allows ommiting time filters from inline GROUP BYs"
+    ),  # E: line too long (80 > 79 characters)
+    "alias_to_source_column": (
+        "Able to use source column when an alias overshadows it"
+    ),  # E: line too long (87 > 79 characters)
+    "order_by_not_in_select": (
+        "Allows aggregations in ORDER BY not present in the SELECT"
+    ),  # E: line too long (90 > 79 characters)
+    "expressions_in_orderby": "Allows expressions in ORDER BY",
+    "cte_in_subquery": "Allows CTE as a subquery",
+    "limit_clause": "Allows LIMIT clause (instead of TOP)",
+    "max_column_name": "Maximum column name",
+    "sql_comments": "Allows comments",
+    "escaped_colons": "Colons must be escaped",
+}
+BASIC_FEATURES = {
+    "masked_encrypted_extra": "Masks/unmasks encrypted_extra",
+    "column_type_mapping": "Has column type mappings",
+    "function_names": "Returns a list of function names",
+}
+NICE_TO_HAVE_FEATURES = {
+    "user_impersonation": "Supports user impersonation",
+    "file_upload": "Support file upload",
+    "extra_table_metadata": "Returns extra table metadata",
+    "dbapi_exception_mapping": "Maps driver exceptions to Superset exceptions",
+    "custom_errors": "Parses error messages and returns Superset errors",
+    "dynamic_schema": "Supports changing the schema per-query",
+    "catalog": "Supports catalogs",
+    "dynamic_catalog": "Supports changing the catalog per-query",
+    "ssh_tunneling": "Can be connected thru an SSH tunnel",
+    "query_cancelation": "Allows query to be canceled",
+    "get_metrics": "Returns additional metrics on dataset creation",
+    "where_latest_partition": "Supports querying the latest partition only",
+}
+ADVANCED_FEATURES = {
+    "expand_data": "Expands complex types (arrays, structs) into rows/columns",
+    "query_cost_estimation": "Supports query cost estimation",
+    "sql_validation": "Supports validating SQL before running query",
+}
+
+
+def has_custom_method(spec: type[BaseEngineSpec], method: str) -> bool:
+    """
+    Check if a class has a custom implementation of a method.
+
+    Since some classes don't inherit directly from ``BaseEngineSpec`` we need
+    to check the attributes of the spec and the base class.
+    """
+    return bool(
+        getattr(spec, method, False)
+        and getattr(BaseEngineSpec, method, False)
+        and getattr(spec, method).__qualname__
+        != getattr(BaseEngineSpec, method).__qualname__
+    )
+
+
+def diagnose(spec: type[BaseEngineSpec]) -> dict[str, Any]:
+    """
+    Run basic diagnostics on a given DB engine spec.
+    """
+    # pylint: disable=import-outside-toplevel
+    from superset.sql_validators.postgres import PostgreSQLValidator
+    from superset.sql_validators.presto_db import PrestoDBSQLValidator
+
+    sql_validators = {
+        "presto": PrestoDBSQLValidator,
+        "postgresql": PostgreSQLValidator,
+    }
+
+    output: dict[str, Any] = {}
+
+    output["time_grains"] = {}
+    supported_time_grains = spec.get_time_grain_expressions()
+    for time_grain in TimeGrain:
+        output["time_grains"][time_grain.name] = time_grain in 
supported_time_grains
+
+    output.update(
+        {
+            "module": spec.__module__,
+            "limit_method": spec.limit_method.upper(),
+            "joins": spec.allows_joins,
+            "subqueries": spec.allows_subqueries,
+            "alias_in_select": spec.allows_alias_in_select,
+            "alias_in_orderby": spec.allows_alias_in_orderby,
+            "secondary_time_columns": spec.time_secondary_columns,
+            "time_groupby_inline": spec.time_groupby_inline,
+            "alias_to_source_column": not spec.allows_alias_to_source_column,
+            "order_by_not_in_select": spec.allows_hidden_orderby_agg,
+            "expressions_in_orderby": spec.allows_hidden_cc_in_orderby,
+            "cte_in_subquery": spec.allows_cte_in_subquery,
+            "limit_clause": spec.allow_limit_clause,
+            "max_column_name": spec.max_column_name_length,
+            "sql_comments": spec.allows_sql_comments,
+            "escaped_colons": spec.allows_escaped_colons,
+            "masked_encrypted_extra": has_custom_method(spec, 
"mask_encrypted_extra"),
+            "column_type_mapping": bool(spec.column_type_mappings),
+            "function_names": has_custom_method(spec, "get_function_names"),
+            # there are multiple ways of implementing user impersonation
+            "user_impersonation": (
+                has_custom_method(spec, "update_impersonation_config")
+                or has_custom_method(spec, "get_url_for_impersonation")
+            ),
+            "file_upload": spec.supports_file_upload,
+            "extra_table_metadata": has_custom_method(spec, 
"extra_table_metadata"),
+            "dbapi_exception_mapping": has_custom_method(
+                spec, "get_dbapi_exception_mapping"
+            ),
+            "custom_errors": (
+                has_custom_method(spec, "extract_errors")
+                or has_custom_method(spec, "custom_errors")
+            ),
+            "dynamic_schema": spec.supports_dynamic_schema,
+            "catalog": spec.supports_catalog,
+            "dynamic_catalog": spec.supports_dynamic_catalog,
+            "ssh_tunneling": not spec.disable_ssh_tunneling,
+            "query_cancelation": (
+                has_custom_method(spec, "cancel_query") or 
spec.has_implicit_cancel()
+            ),
+            "get_metrics": has_custom_method(spec, "get_metrics"),
+            "where_latest_partition": has_custom_method(spec, 
"where_latest_partition"),
+            "expand_data": has_custom_method(spec, "expand_data"),
+            "query_cost_estimation": has_custom_method(spec, 
"estimate_query_cost")
+            or has_custom_method(spec, "estimate_statement_cost"),
+            # SQL validation is implemented in external classes
+            "sql_validation": spec.engine in sql_validators,
+        },
+    )
+
+    # compute score
+    score = 0
+
+    # each time grain is 1 point
+    score += sum(output["time_grains"][time_grain.name] for time_grain in 
TimeGrain)
+
+    basic = ["masked_encrypted_extra", "column_type_mapping", "function_names"]
+    nice_to_have = [
+        "user_impersonation",
+        "file_upload",
+        "extra_table_metadata",
+        "dbapi_exception_mapping",
+        "custom_errors",
+        "dynamic_schema",
+        "catalog",
+        "dynamic_catalog",
+        "ssh_tunneling",
+        "query_cancelation",
+        "get_metrics",
+        "where_latest_partition",
+    ]
+    advanced = ["expand_data", "query_cost_estimation", "sql_validation"]
+    score += sum(10 * int(output[key]) for key in basic)
+    score += sum(10 * int(output[key]) for key in nice_to_have)
+    score += sum(10 * int(output[key]) for key in advanced)
+    output["score"] = score
+    output["max_score"] = (
+        len(TimeGrain) + 10 * len(basic) + 10 * len(nice_to_have) + 10 * 
len(advanced)
+    )
+
+    return output
+
+
+def get_name(spec: type[BaseEngineSpec]) -> str:
+    """
+    Return a name for a given DB engine spec.
+    """
+    return spec.engine_name or spec.engine
+
+
+def generate_table() -> list[list[Any]]:
+    """
+    Generate a table showing info for all DB engine specs.
+    """
+    info = {}
+    for spec in sorted(load_engine_specs(), key=get_name):
+        info[get_name(spec)] = diagnose(spec)
+
+    # remove 3rd party DB engine specs
+    info = {k: v for k, v in info.items() if 
v["module"].startswith("superset")}
+
+    rows = []  # pylint: disable=redefined-outer-name
+    rows.append(["Feature"] + list(info))  # header row
+    rows.append(["Module"] + list(db_info["module"] for db_info in 
info.values()))
+
+    # descriptive
+    keys = [
+        "limit_method",
+        "joins",
+        "subqueries",
+        "alias_in_select",
+        "alias_in_orderby",
+        "secondary_time_columns",
+        "time_groupby_inline",
+        "alias_to_source_column",
+        "order_by_not_in_select",
+        "expressions_in_orderby",
+        "cte_in_subquery",
+        "limit_clause",
+        "max_column_name",
+        "sql_comments",
+        "escaped_colons",
+    ]
+    for key in keys:
+        rows.append(
+            [DATABASE_DETAILS[key]] + list(db_info[key] for db_info in 
info.values())
+        )
+
+    # basic
+    for time_grain in TimeGrain:
+        rows.append(
+            [f"Has time grain {time_grain.name}"]
+            + list(db_info["time_grains"][time_grain.name] for db_info in 
info.values())
+        )
+    keys = [
+        "masked_encrypted_extra",
+        "column_type_mapping",
+        "function_names",
+    ]
+    for key in keys:
+        rows.append(
+            [BASIC_FEATURES[key]] + list(db_info[key] for db_info in 
info.values())
+        )
+
+    # nice to have
+    keys = [
+        "user_impersonation",
+        "file_upload",
+        "extra_table_metadata",
+        "dbapi_exception_mapping",
+        "custom_errors",
+        "dynamic_schema",
+        "catalog",
+        "dynamic_catalog",
+        "ssh_tunneling",
+        "query_cancelation",
+        "get_metrics",
+        "where_latest_partition",
+    ]
+    for key in keys:
+        rows.append(
+            [NICE_TO_HAVE_FEATURES[key]]
+            + list(db_info[key] for db_info in info.values())
+        )
+
+    # advanced
+    keys = [
+        "expand_data",
+        "query_cost_estimation",
+        "sql_validation",
+    ]
+    for key in keys:
+        rows.append(
+            [ADVANCED_FEATURES[key]] + list(db_info[key] for db_info in 
info.values())
+        )
+
+    rows.append(["Score"] + list(db_info["score"] for db_info in 
info.values()))
+
+    return rows
+
+
+if __name__ == "__main__":
+    from superset.app import create_app
+
+    app = create_app()
+    with app.app_context():
+        rows = generate_table()
+
+    headers = rows.pop(0)
+    print("| " + " | ".join(headers) + " |")
+    print("| " + " ---| " * len(headers))
+    for row in rows:
+        print("| " + " | ".join(str(col) for col in row) + " |")
diff --git a/superset/db_engine_specs/sqlite.py 
b/superset/db_engine_specs/sqlite.py
index c06660abbd..dce21dc515 100644
--- a/superset/db_engine_specs/sqlite.py
+++ b/superset/db_engine_specs/sqlite.py
@@ -95,7 +95,7 @@ class SqliteEngineSpec(BaseEngineSpec):
             "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')"
         ),
     }
-    # not sure why these are diffenret
+    # not sure why these are different
     _time_grain_expressions.update(
         {
             TimeGrain.HALF_HOUR: 
_time_grain_expressions[TimeGrain.THIRTY_MINUTES],

[superset] branch master updated: feat: command to test DB engine specs (#24918)

Reply via email to