This is an automated email from the ASF dual-hosted git repository.

Vitor-Avila pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 4311a15eb2b feat(sqlglot): Vertica dialect (#39969)
4311a15eb2b is described below

commit 4311a15eb2bee9bc842396904e0bc3792c853449
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri May 8 13:34:34 2026 -0400

    feat(sqlglot): Vertica dialect (#39969)
---
 superset/sql/dialects/__init__.py              |  11 +-
 superset/sql/dialects/vertica.py               |  87 +++++++++++
 superset/sql/parse.py                          |   4 +-
 tests/unit_tests/sql/dialects/vertica_tests.py | 197 +++++++++++++++++++++++++
 4 files changed, 296 insertions(+), 3 deletions(-)

diff --git a/superset/sql/dialects/__init__.py 
b/superset/sql/dialects/__init__.py
index 0334efb5f11..f7b82538115 100644
--- a/superset/sql/dialects/__init__.py
+++ b/superset/sql/dialects/__init__.py
@@ -20,5 +20,14 @@ from .dremio import Dremio
 from .firebolt import Firebolt, FireboltOld
 from .opensearch import OpenSearch
 from .pinot import Pinot
+from .vertica import Vertica
 
-__all__ = ["DB2", "Dremio", "Firebolt", "FireboltOld", "OpenSearch", "Pinot"]
+__all__ = [
+    "DB2",
+    "Dremio",
+    "Firebolt",
+    "FireboltOld",
+    "OpenSearch",
+    "Pinot",
+    "Vertica",
+]
diff --git a/superset/sql/dialects/vertica.py b/superset/sql/dialects/vertica.py
new file mode 100644
index 00000000000..e26886f6ee8
--- /dev/null
+++ b/superset/sql/dialects/vertica.py
@@ -0,0 +1,87 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Vertica dialect.
+
+Vertica is wire-compatible with PostgreSQL but provides additional analytical
+functions natively. This dialect extends the Postgres dialect to preserve
+Vertica-native functions that the Postgres generator would otherwise rewrite.
+"""
+
+from __future__ import annotations
+
+from sqlglot import exp
+from sqlglot.dialects.dialect import date_delta_sql
+from sqlglot.dialects.postgres import Postgres
+from sqlglot.helper import seq_get
+
+
+def _build_datediff(args: list[exp.Expression]) -> exp.DateDiff:
+    # Vertica's signature is DATEDIFF(unit, start, end); the default sqlglot
+    # parser assumes (end, start, unit), so we remap the positional args.
+    return exp.DateDiff(
+        this=seq_get(args, 2),
+        expression=seq_get(args, 1),
+        unit=exp.var(seq_get(args, 0).name) if seq_get(args, 0) else None,
+    )
+
+
+class Vertica(Postgres):
+    """
+    Vertica dialect.
+
+    Extends PostgreSQL by keeping functions that Vertica supports natively but
+    Postgres does not (e.g. ``LAST_DAY``, ``DATEDIFF``, ``MEDIAN``, ``NVL2``).
+    """
+
+    class Parser(Postgres.Parser):
+        FUNCTIONS = {
+            **Postgres.Parser.FUNCTIONS,
+            "DATEDIFF": _build_datediff,
+            "TIMESTAMPDIFF": _build_datediff,
+        }
+
+    class Generator(Postgres.Generator):
+        # Vertica's LAST_DAY only accepts a date/timestamp; it does not take a
+        # date part argument like Snowflake's variant.
+        LAST_DAY_SUPPORTS_DATE_PART = False
+
+        # Vertica supports MEDIAN and NVL2 natively; Postgres does not, and the
+        # inherited generator rewrites them into PERCENTILE_CONT and CASE
+        # expressions respectively.
+        SUPPORTS_MEDIAN = True
+        NVL2_SUPPORTED = True
+
+        # Emit INTERVAL '<value>' <unit> (SQL-standard) instead of the
+        # Postgres-style INTERVAL '<value> <unit>'. Vertica miscomputes the
+        # combined-string form for MONTH/YEAR units (treats them as a fixed
+        # number of days). See https://forum.vertica.com/discussion/229329/.
+        SINGLE_STRING_INTERVAL = False
+
+        TRANSFORMS = {
+            **Postgres.Generator.TRANSFORMS,
+            # Postgres rewrites LAST_DAY into DATE_TRUNC + INTERVAL arithmetic
+            # because it lacks the function. Vertica supports it natively, so
+            # drop the rewrite and fall back to the base lastday_sql.
+            exp.LastDay: lambda self, e: self.function_fallback_sql(e),
+            # Postgres rewrites DATEDIFF into EXTRACT(epoch ...) / AGE() math.
+            # Vertica's native form is DATEDIFF(unit, start, end), matching
+            # Snowflake's signature.
+            exp.DateDiff: date_delta_sql("DATEDIFF"),
+            exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
+        }
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index bb3ef5e1c4b..aab3a61c50e 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -45,7 +45,7 @@ from sqlglot.optimizer.scope import (
 )
 
 from superset.exceptions import QueryClauseValidationException, 
SupersetParseError
-from superset.sql.dialects import DB2, Dremio, Firebolt, OpenSearch, Pinot
+from superset.sql.dialects import DB2, Dremio, Firebolt, OpenSearch, Pinot, 
Vertica
 
 if TYPE_CHECKING:
     from superset.models.core import Database
@@ -113,7 +113,7 @@ SQLGLOT_DIALECTS = {
     # "taosws": ???
     "teradatasql": Dialects.TERADATA,
     "trino": Dialects.TRINO,
-    "vertica": Dialects.POSTGRES,
+    "vertica": Vertica,
     "yql": Dialects.CLICKHOUSE,
 }
 
diff --git a/tests/unit_tests/sql/dialects/vertica_tests.py 
b/tests/unit_tests/sql/dialects/vertica_tests.py
new file mode 100644
index 00000000000..bf39cf76cde
--- /dev/null
+++ b/tests/unit_tests/sql/dialects/vertica_tests.py
@@ -0,0 +1,197 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from sqlglot import parse_one
+
+from superset.sql.dialects.vertica import Vertica
+
+
+def test_last_day_round_trips_natively() -> None:
+    """
+    Vertica supports LAST_DAY natively, unlike Postgres which rewrites it into
+    DATE_TRUNC + INTERVAL arithmetic.
+    """
+    sql = "SELECT LAST_DAY(DATE('2026-01-15'))"
+
+    ast = parse_one(sql, dialect=Vertica)
+    regenerated = ast.sql(dialect=Vertica)
+
+    assert regenerated == "SELECT LAST_DAY(DATE('2026-01-15'))"
+
+
+def test_last_day_not_rewritten_like_postgres() -> None:
+    """
+    Postgres rewrites LAST_DAY because it lacks the function. Verify Vertica
+    does not inherit that rewrite.
+    """
+    sql = "SELECT LAST_DAY(DATE('2026-01-15'))"
+
+    postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
+    vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
+
+    assert "DATE_TRUNC" in postgres_sql
+    assert "DATE_TRUNC" not in vertica_sql
+    assert "LAST_DAY" in vertica_sql
+
+
+def test_translate_from_postgres_to_vertica() -> None:
+    """
+    A LAST_DAY expression parsed from Postgres should generate the native
+    Vertica form, not the Postgres rewrite.
+    """
+    sql = "SELECT LAST_DAY(DATE('2026-01-15'))"
+
+    ast = parse_one(sql, dialect="postgres")
+    regenerated = ast.sql(dialect=Vertica)
+
+    assert regenerated == "SELECT LAST_DAY(DATE('2026-01-15'))"
+
+
+def test_postgres_features_still_work() -> None:
+    """
+    Vertica inherits from Postgres, so unrelated Postgres syntax should still
+    parse and regenerate correctly.
+    """
+    sql = "SELECT a::INT, b || c FROM t WHERE d ~ '^foo'"
+
+    ast = parse_one(sql, dialect=Vertica)
+    regenerated = ast.sql(dialect=Vertica)
+
+    assert regenerated == "SELECT CAST(a AS INT), b || c FROM t WHERE d ~ 
'^foo'"
+
+
+def test_datediff_round_trips_natively() -> None:
+    """
+    Vertica's DATEDIFF is ``DATEDIFF(unit, start, end)``. Postgres rewrites it
+    into ``EXTRACT(epoch ...) / N`` arithmetic.
+    """
+    sql = "SELECT DATEDIFF('day', a, b) FROM t"
+
+    ast = parse_one(sql, dialect=Vertica)
+    regenerated = ast.sql(dialect=Vertica)
+
+    assert regenerated == "SELECT DATEDIFF(DAY, a, b) FROM t"
+
+
+def test_datediff_month_round_trips_natively() -> None:
+    """
+    The MONTH unit exercises Postgres's ``AGE()`` rewrite branch, which Vertica
+    does not need.
+    """
+    sql = "SELECT DATEDIFF('month', a, b) FROM t"
+
+    ast = parse_one(sql, dialect=Vertica)
+    regenerated = ast.sql(dialect=Vertica)
+
+    assert regenerated == "SELECT DATEDIFF(MONTH, a, b) FROM t"
+
+
+def test_datediff_not_rewritten_like_postgres() -> None:
+    """
+    Postgres rewrites DATEDIFF; Vertica should keep the native call.
+    """
+    sql = "SELECT DATEDIFF('day', a, b) FROM t"
+
+    postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
+    vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
+
+    assert "DATEDIFF" not in postgres_sql
+    assert "DATEDIFF(DAY" in vertica_sql
+
+
+def test_median_round_trips_natively() -> None:
+    """
+    Vertica supports MEDIAN as a native analytic/aggregate function. Postgres
+    rewrites it into PERCENTILE_CONT + WITHIN GROUP.
+    """
+    sql = "SELECT MEDIAN(x) FROM t"
+
+    ast = parse_one(sql, dialect=Vertica)
+    regenerated = ast.sql(dialect=Vertica)
+
+    assert regenerated == "SELECT MEDIAN(x) FROM t"
+
+
+def test_median_not_rewritten_like_postgres() -> None:
+    sql = "SELECT MEDIAN(x) FROM t"
+
+    postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
+    vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
+
+    assert "PERCENTILE_CONT" in postgres_sql
+    assert "PERCENTILE_CONT" not in vertica_sql
+    assert "MEDIAN" in vertica_sql
+
+
+def test_nvl2_round_trips_natively() -> None:
+    """
+    Vertica supports NVL2 natively; Postgres rewrites it into a CASE.
+    """
+    sql = "SELECT NVL2(a, b, c) FROM t"
+
+    ast = parse_one(sql, dialect=Vertica)
+    regenerated = ast.sql(dialect=Vertica)
+
+    assert regenerated == "SELECT NVL2(a, b, c) FROM t"
+
+
+def test_nvl2_not_rewritten_like_postgres() -> None:
+    sql = "SELECT NVL2(a, b, c) FROM t"
+
+    postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
+    vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
+
+    assert "CASE" in postgres_sql
+    assert "CASE" not in vertica_sql
+    assert "NVL2" in vertica_sql
+
+
+def test_interval_uses_sql_standard_form() -> None:
+    """
+    Vertica miscomputes month/year arithmetic when given the Postgres-style
+    single-string interval (``INTERVAL '2 MONTH'``). The SQL-standard form
+    ``INTERVAL '2' MONTH`` is correct on all Vertica versions.
+    See https://forum.vertica.com/discussion/229329/.
+    """
+    sql = "SELECT date_col + INTERVAL '2 MONTH' FROM t"
+
+    ast = parse_one(sql, dialect=Vertica)
+    regenerated = ast.sql(dialect=Vertica)
+
+    assert regenerated == "SELECT date_col + INTERVAL '2' MONTH FROM t"
+
+
+def test_interval_year_uses_sql_standard_form() -> None:
+    sql = "SELECT date_col + INTERVAL '1 YEAR' FROM t"
+
+    ast = parse_one(sql, dialect=Vertica)
+    regenerated = ast.sql(dialect=Vertica)
+
+    assert regenerated == "SELECT date_col + INTERVAL '1' YEAR FROM t"
+
+
+def test_interval_diverges_from_postgres() -> None:
+    """
+    Postgres emits the combined-string form; Vertica should not.
+    """
+    sql = "SELECT date_col + INTERVAL '2 MONTH' FROM t"
+
+    postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
+    vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
+
+    assert "INTERVAL '2 MONTH'" in postgres_sql
+    assert "INTERVAL '2' MONTH" in vertica_sql

Reply via email to