This is an automated email from the ASF dual-hosted git repository. erikrit pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push: new f80fadf Reduce dashboard bootstrap payload (#9284) f80fadf is described below commit f80fadff0e0635742714f9de1aa9ae93ef2ef7a7 Author: Erik Ritter <erik.rit...@airbnb.com> AuthorDate: Fri Mar 13 12:18:22 2020 -0700 Reduce dashboard bootstrap payload (#9284) --- superset/config.py | 1 + superset/connectors/base/models.py | 94 ++++++++++++++++++++++++++++++++++++++ superset/utils/core.py | 11 +++++ superset/views/core.py | 16 +++++-- tests/model_tests.py | 16 ++++++- tests/utils_tests.py | 6 +++ 6 files changed, 140 insertions(+), 4 deletions(-) diff --git a/superset/config.py b/superset/config.py index 443a92d..a44a671 100644 --- a/superset/config.py +++ b/superset/config.py @@ -282,6 +282,7 @@ DEFAULT_FEATURE_FLAGS = { "ENABLE_EXPLORE_JSON_CSRF_PROTECTION": False, "KV_STORE": False, "PRESTO_EXPAND_DATA": False, + "REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD": False, "SHARE_QUERIES_VIA_KV_STORE": False, "TAGGING_SYSTEM": False, } diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py index eac6cbb..dfcafbf 100644 --- a/superset/connectors/base/models.py +++ b/superset/connectors/base/models.py @@ -27,6 +27,28 @@ from superset.models.helpers import AuditMixinNullable, ImportMixin, QueryResult from superset.models.slice import Slice from superset.utils import core as utils +METRIC_FORM_DATA_PARAMS = [ + "metric", + "metrics", + "metric_2", + "percent_metrics", + "secondary_metric", + "size", + "timeseries_limit_metric", + "x", + "y", +] + +COLUMN_FORM_DATA_PARAMS = [ + "all_columns", + "all_columns_x", + "columns", + "entity", + "groupby", + "order_by_cols", + "series", +] + class BaseDatasource( AuditMixinNullable, ImportMixin @@ -213,6 +235,70 @@ class BaseDatasource( "select_star": self.select_star, } + def data_for_slices(self, slices: List[Slice]) -> Dict[str, Any]: + """ + The representation of the datasource containing only the required data + to render the provided slices. + + Used to reduce the payload when loading a dashboard. + """ + data = self.data + metric_names = set() + column_names = set() + for slc in slices: + form_data = slc.form_data + + # pull out all required metrics from the form_data + for param in METRIC_FORM_DATA_PARAMS: + for metric in utils.get_iterable(form_data.get(param) or []): + metric_names.add(utils.get_metric_name(metric)) + + if utils.is_adhoc_metric(metric): + column_names.add( + (metric.get("column") or {}).get("column_name") + ) + + # pull out all required columns from the form_data + for filter_ in form_data.get("adhoc_filters") or []: + if filter_["clause"] == "WHERE" and filter_.get("subject"): + column_names.add(filter_.get("subject")) + + for param in COLUMN_FORM_DATA_PARAMS: + for column in utils.get_iterable(form_data.get(param) or []): + column_names.add(column) + + filtered_metrics = [ + metric + for metric in data["metrics"] + if metric["metric_name"] in metric_names + ] + + filtered_columns = [ + column + for column in data["columns"] + if column["column_name"] in column_names + ] + + del data["description"] + data.update({"metrics": filtered_metrics}) + data.update({"columns": filtered_columns}) + verbose_map = {"__timestamp": "Time"} + verbose_map.update( + { + metric["metric_name"]: metric["verbose_name"] or metric["metric_name"] + for metric in filtered_metrics + } + ) + verbose_map.update( + { + column["column_name"]: column["verbose_name"] or column["column_name"] + for column in filtered_columns + } + ) + data["verbose_map"] = verbose_map + + return data + @staticmethod def filter_values_handler( values, target_column_is_numeric=False, is_list_target=False @@ -353,6 +439,14 @@ class BaseDatasource( """ return [] + def __hash__(self) -> int: + return hash(self.uid) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, BaseDatasource): + return NotImplemented + return self.uid == other.uid + class BaseColumn(AuditMixinNullable, ImportMixin): """Interface for column""" diff --git a/superset/utils/core.py b/superset/utils/core.py index 0e820e1..23d6d4e 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -1211,6 +1211,17 @@ def split( yield s[i:] +def get_iterable(x: Any) -> List: + """ + Get an iterable (list) representation of the object. + + :param x: The object + :returns: An iterable representation + """ + + return x if isinstance(x, list) else [x] + + class TimeRangeEndpoint(str, Enum): """ The time range endpoint types which represent inclusive, exclusive, or unknown. diff --git a/superset/views/core.py b/superset/views/core.py index d5ef4ce..4690009 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -17,6 +17,7 @@ # pylint: disable=C,R,W import logging import re +from collections import defaultdict from contextlib import closing from datetime import datetime, timedelta from typing import Any, Callable, cast, Dict, List, Optional, Union @@ -1791,11 +1792,12 @@ class Superset(BaseSupersetView): dash = qry.one_or_none() if not dash: abort(404) - datasources = set() + + datasources = defaultdict(list) for slc in dash.slices: datasource = slc.datasource if datasource: - datasources.add(datasource) + datasources[datasource].append(slc) if config["ENABLE_ACCESS_REQUEST"]: for datasource in datasources: @@ -1810,6 +1812,14 @@ class Superset(BaseSupersetView): "superset/request_access/?" f"dashboard_id={dash.id}&" ) + # Filter out unneeded fields from the datasource payload + datasources_payload = { + datasource.uid: datasource.data_for_slices(slices) + if is_feature_enabled("REDUCE_DASHBOARD_BOOTSTRAP_PAYLOAD") + else datasource.data + for datasource, slices in datasources.items() + } + dash_edit_perm = check_ownership( dash, raise_if_false=False ) and security_manager.can_access("can_save_dash", "Superset") @@ -1857,7 +1867,7 @@ class Superset(BaseSupersetView): bootstrap_data = { "user_id": g.user.get_id(), "dashboard_data": dashboard_data, - "datasources": {ds.uid: ds.data for ds in datasources}, + "datasources": datasources_payload, "common": common_bootstrap_payload(), "editMode": edit_mode, "urlParams": url_params, diff --git a/tests/model_tests.py b/tests/model_tests.py index 6c93ed8..4a203a9 100644 --- a/tests/model_tests.py +++ b/tests/model_tests.py @@ -22,8 +22,9 @@ import pandas from sqlalchemy.engine.url import make_url import tests.test_app -from superset import app +from superset import app, db as metadata_db from superset.models.core import Database +from superset.models.slice import Slice from superset.utils.core import get_example_database, QueryStatus from .base_tests import SupersetTestCase @@ -318,3 +319,16 @@ class SqlaTableModelTestCase(SupersetTestCase): tbl.get_query_str(query_obj) self.assertTrue("Metric 'invalid' does not exist", context.exception) + + def test_data_for_slices(self): + tbl = self.get_table_by_name("birth_names") + slc = ( + metadata_db.session.query(Slice) + .filter_by(datasource_id=tbl.id, datasource_type=tbl.type) + .first() + ) + + data_for_slices = tbl.data_for_slices([slc]) + self.assertEquals(len(data_for_slices["columns"]), 0) + self.assertEquals(len(data_for_slices["metrics"]), 1) + self.assertEquals(len(data_for_slices["verbose_map"].keys()), 2) diff --git a/tests/utils_tests.py b/tests/utils_tests.py index 81cc37c..cc19c48 100644 --- a/tests/utils_tests.py +++ b/tests/utils_tests.py @@ -36,6 +36,7 @@ from superset.utils.core import ( convert_legacy_filters_into_adhoc, datetime_f, format_timedelta, + get_iterable, get_or_create_db, get_since_until, get_stacktrace, @@ -950,3 +951,8 @@ class UtilsTestCase(SupersetTestCase): get_time_range_endpoints(form_data={"datasource": "1__table"}, slc=slc), (TimeRangeEndpoint.INCLUSIVE, TimeRangeEndpoint.EXCLUSIVE), ) + + def test_get_iterable(self): + self.assertListEqual(get_iterable(123), [123]) + self.assertListEqual(get_iterable([123]), [123]) + self.assertListEqual(get_iterable("foo"), ["foo"])