[superset] branch master updated: feat: query datasets from SQL Lab (#15241)

beto Wed, 01 Jun 2022 15:19:03 -0700

This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git



The following commit(s) were added to refs/heads/master by this push:
     new 05a138a191 feat: query datasets from SQL Lab (#15241)
05a138a191 is described below

commit 05a138a191cb0e22cf7b6b9cce6fa985843d1809
Author: Beto Dealmeida <[email protected]>
AuthorDate: Wed Jun 1 15:18:40 2022 -0700

    feat: query datasets from SQL Lab (#15241)
    
    * feat: Jinja2 macro for querying datasets
    
    * Add docs
    
    * Address comments
---
 docs/docs/installation/sql-templating.mdx |  24 +++++++
 superset/connectors/base/models.py        |   2 +-
 superset/jinja_context.py                 |  33 ++++++++++
 tests/unit_tests/jinja_context_test.py    | 101 +++++++++++++++++++++++++++++-
 4 files changed, 158 insertions(+), 2 deletions(-)

diff --git a/docs/docs/installation/sql-templating.mdx 
b/docs/docs/installation/sql-templating.mdx
index 8908d39f02..905f99cb9f 100644
--- a/docs/docs/installation/sql-templating.mdx
+++ b/docs/docs/installation/sql-templating.mdx
@@ -273,3 +273,27 @@ Here's a concrete example:
     superiors
     order by lineage, level
 ```
+
+**Datasets**
+
+It's possible to query physical and virtual datasets using the `dataset` 
macro. This is useful if you've defined computed columns and metrics on your 
datasets, and want to reuse the definition in adhoc SQL Lab queries.
+
+To use the macro, first you need to find the ID of the dataset. This can be 
done by going to the view showing all the datasets, hovering over the dataset 
you're interested in, and looking at its URL. For example, if the URL for a 
dataset is https://superset.example.org/superset/explore/table/42/ its ID is 42.
+
+Once you have the ID you can query it as if it were a table:
+
+```
+SELECT * FROM {{ dataset(42) }} LIMIT 10
+```
+
+If you want to select the metric definitions as well, in addition to the 
columns, you need to pass an additional keyword argument:
+
+```
+SELECT * FROM {{ dataset(42, include_metrics=True) }} LIMIT 10
+```
+
+Since metrics are aggregations, the resulting SQL expression will be grouped 
by all non-metric columns. You can specify a subset of columns to group by 
instead:
+
+```
+SELECT * FROM {{ dataset(42, include_metrics=True, columns=["ds", "category"]) 
}} LIMIT 10
+```
diff --git a/superset/connectors/base/models.py 
b/superset/connectors/base/models.py
index 73b841ac16..7809dab4ae 100644
--- a/superset/connectors/base/models.py
+++ b/superset/connectors/base/models.py
@@ -449,7 +449,7 @@ class BaseDatasource(
     def get_query_str(self, query_obj: QueryObjectDict) -> str:
         """Returns a query as a string
 
-        This is used to be displayed to the user so that she/he can
+        This is used to be displayed to the user so that they can
         understand what is taking place behind the scene"""
         raise NotImplementedError()
 
diff --git a/superset/jinja_context.py b/superset/jinja_context.py
index e365b9a708..42f6809c74 100644
--- a/superset/jinja_context.py
+++ b/superset/jinja_context.py
@@ -38,6 +38,7 @@ from sqlalchemy.engine.interfaces import Dialect
 from sqlalchemy.types import String
 from typing_extensions import TypedDict
 
+from superset.datasets.commands.exceptions import DatasetNotFoundError
 from superset.exceptions import SupersetTemplateException
 from superset.extensions import feature_flag_manager
 from superset.utils.core import convert_legacy_filters_into_adhoc, 
merge_extra_filters
@@ -490,6 +491,7 @@ class JinjaTemplateProcessor(BaseTemplateProcessor):
                 "cache_key_wrapper": partial(safe_proxy, 
extra_cache.cache_key_wrapper),
                 "filter_values": partial(safe_proxy, 
extra_cache.filter_values),
                 "get_filters": partial(safe_proxy, extra_cache.get_filters),
+                "dataset": partial(safe_proxy, dataset_macro),
             }
         )
 
@@ -602,3 +604,34 @@ def get_template_processor(
     else:
         template_processor = NoOpTemplateProcessor
     return template_processor(database=database, table=table, query=query, 
**kwargs)
+
+
+def dataset_macro(
+    dataset_id: int,
+    include_metrics: bool = False,
+    columns: Optional[List[str]] = None,
+) -> str:
+    """
+    Given a dataset ID, return the SQL that represents it.
+
+    The generated SQL includes all columns (including computed) by default. 
Optionally
+    the user can also request metrics to be included, and columns to group by.
+    """
+    # pylint: disable=import-outside-toplevel
+    from superset.datasets.dao import DatasetDAO
+
+    dataset = DatasetDAO.find_by_id(dataset_id)
+    if not dataset:
+        raise DatasetNotFoundError(f"Dataset {dataset_id} not found!")
+
+    columns = columns or [column.column_name for column in dataset.columns]
+    metrics = [metric.metric_name for metric in dataset.metrics]
+    query_obj = {
+        "is_timeseries": False,
+        "filter": [],
+        "metrics": metrics if include_metrics else None,
+        "columns": columns,
+    }
+    sqla_query = dataset.get_query_str_extended(query_obj)
+    sql = sqla_query.sql
+    return f"({sql}) AS dataset_{dataset_id}"
diff --git a/tests/unit_tests/jinja_context_test.py 
b/tests/unit_tests/jinja_context_test.py
index 1f88f4f1a9..75c49f0977 100644
--- a/tests/unit_tests/jinja_context_test.py
+++ b/tests/unit_tests/jinja_context_test.py
@@ -14,8 +14,15 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+# pylint: disable=invalid-name, unused-argument
 
-from superset.jinja_context import where_in
+import json
+
+import pytest
+from pytest_mock import MockFixture
+
+from superset.datasets.commands.exceptions import DatasetNotFoundError
+from superset.jinja_context import dataset_macro, where_in
 
 
 def test_where_in() -> None:
@@ -25,3 +32,95 @@ def test_where_in() -> None:
     assert where_in([1, "b", 3]) == "(1, 'b', 3)"
     assert where_in([1, "b", 3], '"') == '(1, "b", 3)'
     assert where_in(["O'Malley's"]) == "('O''Malley''s')"
+
+
+def test_dataset_macro(mocker: MockFixture, app_context: None) -> None:
+    """
+    Test the ``dataset_macro`` macro.
+    """
+    # pylint: disable=import-outside-toplevel
+    from superset.connectors.sqla.models import SqlaTable, SqlMetric, 
TableColumn
+    from superset.models.core import Database
+
+    columns = [
+        TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"),
+        TableColumn(column_name="num_boys", type="INTEGER"),
+        TableColumn(column_name="revenue", type="INTEGER"),
+        TableColumn(column_name="expenses", type="INTEGER"),
+        TableColumn(
+            column_name="profit", type="INTEGER", expression="revenue-expenses"
+        ),
+    ]
+    metrics = [
+        SqlMetric(metric_name="cnt", expression="COUNT(*)"),
+    ]
+
+    dataset = SqlaTable(
+        table_name="old_dataset",
+        columns=columns,
+        metrics=metrics,
+        main_dttm_col="ds",
+        default_endpoint="https://www.youtube.com/watch?v=dQw4w9WgXcQ";,  # not 
used
+        database=Database(database_name="my_database", 
sqlalchemy_uri="sqlite://"),
+        offset=-8,
+        description="This is the description",
+        is_featured=1,
+        cache_timeout=3600,
+        schema="my_schema",
+        sql=None,
+        params=json.dumps(
+            {
+                "remote_id": 64,
+                "database_name": "examples",
+                "import_time": 1606677834,
+            }
+        ),
+        perm=None,
+        filter_select_enabled=1,
+        fetch_values_predicate="foo IN (1, 2)",
+        is_sqllab_view=0,  # no longer used?
+        template_params=json.dumps({"answer": "42"}),
+        schema_perm=None,
+        extra=json.dumps({"warning_markdown": "*WARNING*"}),
+    )
+    DatasetDAO = mocker.patch("superset.datasets.dao.DatasetDAO")
+    DatasetDAO.find_by_id.return_value = dataset
+
+    assert (
+        dataset_macro(1)
+        == """(SELECT ds AS ds,
+       num_boys AS num_boys,
+       revenue AS revenue,
+       expenses AS expenses,
+       revenue-expenses AS profit
+FROM my_schema.old_dataset) AS dataset_1"""
+    )
+
+    assert (
+        dataset_macro(1, include_metrics=True)
+        == """(SELECT ds AS ds,
+       num_boys AS num_boys,
+       revenue AS revenue,
+       expenses AS expenses,
+       revenue-expenses AS profit,
+       COUNT(*) AS cnt
+FROM my_schema.old_dataset
+GROUP BY ds,
+         num_boys,
+         revenue,
+         expenses,
+         revenue-expenses) AS dataset_1"""
+    )
+
+    assert (
+        dataset_macro(1, include_metrics=True, columns=["ds"])
+        == """(SELECT ds AS ds,
+       COUNT(*) AS cnt
+FROM my_schema.old_dataset
+GROUP BY ds) AS dataset_1"""
+    )
+
+    DatasetDAO.find_by_id.return_value = None
+    with pytest.raises(DatasetNotFoundError) as excinfo:
+        dataset_macro(1)
+    assert str(excinfo.value) == "Dataset 1 not found!"

[superset] branch master updated: feat: query datasets from SQL Lab (#15241)

Reply via email to