Copilot commented on code in PR #40473:
URL: https://github.com/apache/superset/pull/40473#discussion_r3327885004


##########
superset/mcp_service/dashboard/schemas.py:
##########
@@ -312,6 +339,28 @@ class GetDashboardInfoRequest(MetadataCacheControl):
             "from that permalink."
         ),
     )
+    select_columns: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: list(DEFAULT_GET_DASHBOARD_INFO_COLUMNS),
+            description=(
+                "Top-level fields to include in the response. Defaults to a 
lean "
+                "set that excludes 'css' (raw CSS, can be many KB) and 
'filter_state' "
+                "(only relevant when permalink_key is provided). Pass an 
explicit list "
+                "to override, e.g. ['id','dashboard_title','charts'] for 
minimal "
+                "output, or add 'css' to include raw dashboard CSS."
+            ),
+        ),
+    ]
+
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DASHBOARD_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")

Review Comment:
   _parse_select_columns treats an empty string / empty list as a valid value, 
which results in select_columns=[] and disables filtering (because the model 
serializer checks truthiness). That can unexpectedly return the full 
(potentially huge) payload when a caller passes "" or [] for select_columns.



##########
superset/mcp_service/dashboard/schemas.py:
##########
@@ -312,6 +339,28 @@ class GetDashboardInfoRequest(MetadataCacheControl):
             "from that permalink."
         ),
     )
+    select_columns: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: list(DEFAULT_GET_DASHBOARD_INFO_COLUMNS),
+            description=(
+                "Top-level fields to include in the response. Defaults to a 
lean "
+                "set that excludes 'css' (raw CSS, can be many KB) and 
'filter_state' "
+                "(only relevant when permalink_key is provided). Pass an 
explicit list "
+                "to override, e.g. ['id','dashboard_title','charts'] for 
minimal "
+                "output, or add 'css' to include raw dashboard CSS."
+            ),
+        ),
+    ]
+
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DASHBOARD_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")

Review Comment:
   _parse_select_columns treats an empty string / empty list as a valid value, 
which results in select_columns=[] and disables filtering (because the model 
serializer checks truthiness). That can unexpectedly return the full 
(potentially huge) payload when a caller passes "" or [] for select_columns.



##########
superset/mcp_service/chart/schemas.py:
##########
@@ -297,6 +331,15 @@ def validate_identifier_or_form_data_key(self) -> 
"GetChartInfoRequest":
             )
         return self
 
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_CHART_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")

Review Comment:
   _parse_select_columns treats "" / [] as select_columns=[], which disables 
filtering (because ChartInfo._filter_fields_by_context checks truthiness). This 
can accidentally re-enable full responses if a client sends an empty value; 
consider defaulting empty to the lean default (or raising).



##########
superset/mcp_service/dataset/schemas.py:
##########
@@ -315,13 +334,78 @@ def create(cls, error: str, error_type: str) -> 
"DatasetError":
         )
 
 
+DEFAULT_GET_DATASET_INFO_COLUMNS: List[str] = [
+    "id",
+    "table_name",
+    "schema",
+    "database_name",
+    "database_id",
+    "uuid",
+    "is_virtual",
+    "description",
+    "main_dttm_col",
+    "sql",
+    "url",
+    "columns",
+    "metrics",
+]
+
+DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS: List[str] = [
+    "column_name",
+    "type",
+    "is_dttm",
+]
+
+
 class GetDatasetInfoRequest(MetadataCacheControl):
     """Request schema for get_dataset_info with support for ID or UUID."""
 
     identifier: Annotated[
         int | str,
         Field(description="Dataset identifier - can be numeric ID or UUID 
string"),
     ]
+    select_columns: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: list(DEFAULT_GET_DATASET_INFO_COLUMNS),
+            description=(
+                "Top-level fields to include in the response. Defaults to a 
lean "
+                "set that excludes verbose fields like params, 
template_params, "
+                "extra, tags, certification_details. Pass an explicit list to "
+                "override (e.g. ['id','table_name','columns'] for minimal 
output)."
+            ),
+        ),
+    ]
+    column_fields: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: 
list(DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS),
+            description=(
+                "Per-column fields to include for entries in 'columns'. 
Defaults "
+                "to ['column_name','type','is_dttm']. Pass a wider list to "
+                "include 'verbose_name','groupby','filterable','description' "
+                "when needed."
+            ),
+        ),
+    ]
+
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DATASET_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")
+
+    @field_validator("column_fields", mode="before")
+    @classmethod
+    def _parse_column_fields(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS)
+        return parse_json_or_list(value, "column_fields")

Review Comment:
   GetDatasetInfoRequest._parse_column_fields treats "" / [] as 
column_fields=[], which disables the TableColumnInfo per-column filtering (the 
serializer checks truthiness) and can massively bloat datasets with many 
columns. Consider defaulting empty to DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS 
(or raising).



##########
superset/mcp_service/chart/schemas.py:
##########
@@ -297,6 +331,15 @@ def validate_identifier_or_form_data_key(self) -> 
"GetChartInfoRequest":
             )
         return self
 
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_CHART_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")

Review Comment:
   _parse_select_columns treats "" / [] as select_columns=[], which disables 
filtering (because ChartInfo._filter_fields_by_context checks truthiness). This 
can accidentally re-enable full responses if a client sends an empty value; 
consider defaulting empty to the lean default (or raising).



##########
superset/mcp_service/dashboard/schemas.py:
##########
@@ -288,6 +288,33 @@ def validate_search_and_filters(self) -> 
"ListDashboardsRequest":
         return self
 
 
+DEFAULT_GET_DASHBOARD_INFO_COLUMNS: List[str] = [
+    "id",
+    "dashboard_title",
+    "slug",
+    "description",
+    "certified_by",
+    "certification_details",
+    "published",
+    "is_managed_externally",
+    "external_url",
+    "created_on",
+    "changed_on",
+    "uuid",
+    "url",
+    "created_on_humanized",
+    "changed_on_humanized",
+    "chart_count",
+    "tags",
+    "charts",
+    "native_filters",
+    "cross_filters_enabled",
+    "is_permalink_state",
+    "permalink_key",
+    "filter_state",
+]

Review Comment:
   DEFAULT_GET_DASHBOARD_INFO_COLUMNS currently includes "filter_state", but 
the field docstring for GetDashboardInfoRequest says the default lean set 
excludes it (and the PR description indicates it should be excluded by default 
to avoid huge permalink payloads). Including it defeats the size reduction when 
permalink_key is provided.



##########
superset/mcp_service/chart/tool/get_chart_info.py:
##########
@@ -213,7 +214,7 @@ def _apply_unsaved_state_override(result: ChartInfo, 
form_data_key: str) -> None
 )
 async def get_chart_info(
     request: GetChartInfoRequest, ctx: Context
-) -> ChartInfo | ChartError:
+) -> dict[str, Any] | ChartError:

Review Comment:
   get_chart_info now returns a dict for the saved-chart path (model_dump with 
context), but the form_data_key-only (unsaved chart) path still returns a 
ChartInfo/ChartError directly and bypasses the select_columns filtering. This 
makes response shape inconsistent and can reintroduce large outputs for unsaved 
charts.



##########
superset/mcp_service/dashboard/schemas.py:
##########
@@ -288,6 +288,33 @@ def validate_search_and_filters(self) -> 
"ListDashboardsRequest":
         return self
 
 
+DEFAULT_GET_DASHBOARD_INFO_COLUMNS: List[str] = [
+    "id",
+    "dashboard_title",
+    "slug",
+    "description",
+    "certified_by",
+    "certification_details",
+    "published",
+    "is_managed_externally",
+    "external_url",
+    "created_on",
+    "changed_on",
+    "uuid",
+    "url",
+    "created_on_humanized",
+    "changed_on_humanized",
+    "chart_count",
+    "tags",
+    "charts",
+    "native_filters",
+    "cross_filters_enabled",
+    "is_permalink_state",
+    "permalink_key",
+    "filter_state",
+]

Review Comment:
   DEFAULT_GET_DASHBOARD_INFO_COLUMNS currently includes "filter_state", but 
the field docstring for GetDashboardInfoRequest says the default lean set 
excludes it (and the PR description indicates it should be excluded by default 
to avoid huge permalink payloads). Including it defeats the size reduction when 
permalink_key is provided.



##########
superset/mcp_service/dataset/schemas.py:
##########
@@ -315,13 +334,78 @@ def create(cls, error: str, error_type: str) -> 
"DatasetError":
         )
 
 
+DEFAULT_GET_DATASET_INFO_COLUMNS: List[str] = [
+    "id",
+    "table_name",
+    "schema",
+    "database_name",
+    "database_id",
+    "uuid",
+    "is_virtual",
+    "description",
+    "main_dttm_col",
+    "sql",
+    "url",
+    "columns",
+    "metrics",
+]
+
+DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS: List[str] = [
+    "column_name",
+    "type",
+    "is_dttm",
+]
+
+
 class GetDatasetInfoRequest(MetadataCacheControl):
     """Request schema for get_dataset_info with support for ID or UUID."""
 
     identifier: Annotated[
         int | str,
         Field(description="Dataset identifier - can be numeric ID or UUID 
string"),
     ]
+    select_columns: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: list(DEFAULT_GET_DATASET_INFO_COLUMNS),
+            description=(
+                "Top-level fields to include in the response. Defaults to a 
lean "
+                "set that excludes verbose fields like params, 
template_params, "
+                "extra, tags, certification_details. Pass an explicit list to "
+                "override (e.g. ['id','table_name','columns'] for minimal 
output)."
+            ),
+        ),
+    ]
+    column_fields: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: 
list(DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS),
+            description=(
+                "Per-column fields to include for entries in 'columns'. 
Defaults "
+                "to ['column_name','type','is_dttm']. Pass a wider list to "
+                "include 'verbose_name','groupby','filterable','description' "
+                "when needed."
+            ),
+        ),
+    ]
+
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DATASET_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")

Review Comment:
   GetDatasetInfoRequest._parse_select_columns treats "" / [] as 
select_columns=[], which disables DatasetInfo filtering (the serializer checks 
truthiness). That can unexpectedly return the full dataset payload; consider 
defaulting empty to DEFAULT_GET_DATASET_INFO_COLUMNS (or raising).



##########
superset/mcp_service/dashboard/schemas.py:
##########
@@ -288,6 +288,33 @@ def validate_search_and_filters(self) -> 
"ListDashboardsRequest":
         return self
 
 
+DEFAULT_GET_DASHBOARD_INFO_COLUMNS: List[str] = [
+    "id",
+    "dashboard_title",
+    "slug",
+    "description",
+    "certified_by",
+    "certification_details",
+    "published",
+    "is_managed_externally",
+    "external_url",
+    "created_on",
+    "changed_on",
+    "uuid",
+    "url",
+    "created_on_humanized",
+    "changed_on_humanized",
+    "chart_count",
+    "tags",
+    "charts",
+    "native_filters",
+    "cross_filters_enabled",
+    "is_permalink_state",
+    "permalink_key",
+    "filter_state",
+]

Review Comment:
   DEFAULT_GET_DASHBOARD_INFO_COLUMNS currently includes "filter_state", but 
the field docstring for GetDashboardInfoRequest says the default lean set 
excludes it (and the PR description indicates it should be excluded by default 
to avoid huge permalink payloads). Including it defeats the size reduction when 
permalink_key is provided.



##########
superset/mcp_service/dataset/schemas.py:
##########
@@ -315,13 +334,78 @@ def create(cls, error: str, error_type: str) -> 
"DatasetError":
         )
 
 
+DEFAULT_GET_DATASET_INFO_COLUMNS: List[str] = [
+    "id",
+    "table_name",
+    "schema",
+    "database_name",
+    "database_id",
+    "uuid",
+    "is_virtual",
+    "description",
+    "main_dttm_col",
+    "sql",
+    "url",
+    "columns",
+    "metrics",
+]
+
+DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS: List[str] = [
+    "column_name",
+    "type",
+    "is_dttm",
+]
+
+
 class GetDatasetInfoRequest(MetadataCacheControl):
     """Request schema for get_dataset_info with support for ID or UUID."""
 
     identifier: Annotated[
         int | str,
         Field(description="Dataset identifier - can be numeric ID or UUID 
string"),
     ]
+    select_columns: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: list(DEFAULT_GET_DATASET_INFO_COLUMNS),
+            description=(
+                "Top-level fields to include in the response. Defaults to a 
lean "
+                "set that excludes verbose fields like params, 
template_params, "
+                "extra, tags, certification_details. Pass an explicit list to "
+                "override (e.g. ['id','table_name','columns'] for minimal 
output)."
+            ),
+        ),
+    ]
+    column_fields: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: 
list(DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS),
+            description=(
+                "Per-column fields to include for entries in 'columns'. 
Defaults "
+                "to ['column_name','type','is_dttm']. Pass a wider list to "
+                "include 'verbose_name','groupby','filterable','description' "
+                "when needed."
+            ),
+        ),
+    ]
+
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DATASET_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")

Review Comment:
   GetDatasetInfoRequest._parse_select_columns treats "" / [] as 
select_columns=[], which disables DatasetInfo filtering (the serializer checks 
truthiness). That can unexpectedly return the full dataset payload; consider 
defaulting empty to DEFAULT_GET_DATASET_INFO_COLUMNS (or raising).



##########
superset/mcp_service/chart/tool/get_chart_info.py:
##########
@@ -213,7 +214,7 @@ def _apply_unsaved_state_override(result: ChartInfo, 
form_data_key: str) -> None
 )
 async def get_chart_info(
     request: GetChartInfoRequest, ctx: Context
-) -> ChartInfo | ChartError:
+) -> dict[str, Any] | ChartError:

Review Comment:
   get_chart_info now returns a dict for the saved-chart path (model_dump with 
context), but the form_data_key-only (unsaved chart) path still returns a 
ChartInfo/ChartError directly and bypasses the select_columns filtering. This 
makes response shape inconsistent and can reintroduce large outputs for unsaved 
charts.



##########
superset/mcp_service/dashboard/schemas.py:
##########
@@ -312,6 +339,28 @@ class GetDashboardInfoRequest(MetadataCacheControl):
             "from that permalink."
         ),
     )
+    select_columns: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: list(DEFAULT_GET_DASHBOARD_INFO_COLUMNS),
+            description=(
+                "Top-level fields to include in the response. Defaults to a 
lean "
+                "set that excludes 'css' (raw CSS, can be many KB) and 
'filter_state' "
+                "(only relevant when permalink_key is provided). Pass an 
explicit list "
+                "to override, e.g. ['id','dashboard_title','charts'] for 
minimal "
+                "output, or add 'css' to include raw dashboard CSS."
+            ),
+        ),
+    ]
+
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DASHBOARD_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")

Review Comment:
   _parse_select_columns treats an empty string / empty list as a valid value, 
which results in select_columns=[] and disables filtering (because the model 
serializer checks truthiness). That can unexpectedly return the full 
(potentially huge) payload when a caller passes "" or [] for select_columns.



##########
superset/mcp_service/dataset/schemas.py:
##########
@@ -315,13 +334,78 @@ def create(cls, error: str, error_type: str) -> 
"DatasetError":
         )
 
 
+DEFAULT_GET_DATASET_INFO_COLUMNS: List[str] = [
+    "id",
+    "table_name",
+    "schema",
+    "database_name",
+    "database_id",
+    "uuid",
+    "is_virtual",
+    "description",
+    "main_dttm_col",
+    "sql",
+    "url",
+    "columns",
+    "metrics",
+]
+
+DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS: List[str] = [
+    "column_name",
+    "type",
+    "is_dttm",
+]
+
+
 class GetDatasetInfoRequest(MetadataCacheControl):
     """Request schema for get_dataset_info with support for ID or UUID."""
 
     identifier: Annotated[
         int | str,
         Field(description="Dataset identifier - can be numeric ID or UUID 
string"),
     ]
+    select_columns: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: list(DEFAULT_GET_DATASET_INFO_COLUMNS),
+            description=(
+                "Top-level fields to include in the response. Defaults to a 
lean "
+                "set that excludes verbose fields like params, 
template_params, "
+                "extra, tags, certification_details. Pass an explicit list to "
+                "override (e.g. ['id','table_name','columns'] for minimal 
output)."
+            ),
+        ),
+    ]
+    column_fields: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: 
list(DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS),
+            description=(
+                "Per-column fields to include for entries in 'columns'. 
Defaults "
+                "to ['column_name','type','is_dttm']. Pass a wider list to "
+                "include 'verbose_name','groupby','filterable','description' "
+                "when needed."
+            ),
+        ),
+    ]
+
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DATASET_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")
+
+    @field_validator("column_fields", mode="before")
+    @classmethod
+    def _parse_column_fields(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS)
+        return parse_json_or_list(value, "column_fields")

Review Comment:
   GetDatasetInfoRequest._parse_column_fields treats "" / [] as 
column_fields=[], which disables the TableColumnInfo per-column filtering (the 
serializer checks truthiness) and can massively bloat datasets with many 
columns. Consider defaulting empty to DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS 
(or raising).



##########
superset/mcp_service/dataset/schemas.py:
##########
@@ -315,13 +334,78 @@ def create(cls, error: str, error_type: str) -> 
"DatasetError":
         )
 
 
+DEFAULT_GET_DATASET_INFO_COLUMNS: List[str] = [
+    "id",
+    "table_name",
+    "schema",
+    "database_name",
+    "database_id",
+    "uuid",
+    "is_virtual",
+    "description",
+    "main_dttm_col",
+    "sql",
+    "url",
+    "columns",
+    "metrics",
+]
+
+DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS: List[str] = [
+    "column_name",
+    "type",
+    "is_dttm",
+]
+
+
 class GetDatasetInfoRequest(MetadataCacheControl):
     """Request schema for get_dataset_info with support for ID or UUID."""
 
     identifier: Annotated[
         int | str,
         Field(description="Dataset identifier - can be numeric ID or UUID 
string"),
     ]
+    select_columns: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: list(DEFAULT_GET_DATASET_INFO_COLUMNS),
+            description=(
+                "Top-level fields to include in the response. Defaults to a 
lean "
+                "set that excludes verbose fields like params, 
template_params, "
+                "extra, tags, certification_details. Pass an explicit list to "
+                "override (e.g. ['id','table_name','columns'] for minimal 
output)."
+            ),
+        ),
+    ]
+    column_fields: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: 
list(DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS),
+            description=(
+                "Per-column fields to include for entries in 'columns'. 
Defaults "
+                "to ['column_name','type','is_dttm']. Pass a wider list to "
+                "include 'verbose_name','groupby','filterable','description' "
+                "when needed."
+            ),
+        ),
+    ]
+
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DATASET_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")

Review Comment:
   GetDatasetInfoRequest._parse_select_columns treats "" / [] as 
select_columns=[], which disables DatasetInfo filtering (the serializer checks 
truthiness). That can unexpectedly return the full dataset payload; consider 
defaulting empty to DEFAULT_GET_DATASET_INFO_COLUMNS (or raising).



##########
superset/mcp_service/chart/schemas.py:
##########
@@ -297,6 +331,15 @@ def validate_identifier_or_form_data_key(self) -> 
"GetChartInfoRequest":
             )
         return self
 
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_CHART_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")

Review Comment:
   _parse_select_columns treats "" / [] as select_columns=[], which disables 
filtering (because ChartInfo._filter_fields_by_context checks truthiness). This 
can accidentally re-enable full responses if a client sends an empty value; 
consider defaulting empty to the lean default (or raising).



##########
superset/mcp_service/chart/tool/get_chart_info.py:
##########
@@ -213,7 +214,7 @@ def _apply_unsaved_state_override(result: ChartInfo, 
form_data_key: str) -> None
 )
 async def get_chart_info(
     request: GetChartInfoRequest, ctx: Context
-) -> ChartInfo | ChartError:
+) -> dict[str, Any] | ChartError:

Review Comment:
   get_chart_info now returns a dict for the saved-chart path (model_dump with 
context), but the form_data_key-only (unsaved chart) path still returns a 
ChartInfo/ChartError directly and bypasses the select_columns filtering. This 
makes response shape inconsistent and can reintroduce large outputs for unsaved 
charts.



##########
superset/mcp_service/dataset/schemas.py:
##########
@@ -315,13 +334,78 @@ def create(cls, error: str, error_type: str) -> 
"DatasetError":
         )
 
 
+DEFAULT_GET_DATASET_INFO_COLUMNS: List[str] = [
+    "id",
+    "table_name",
+    "schema",
+    "database_name",
+    "database_id",
+    "uuid",
+    "is_virtual",
+    "description",
+    "main_dttm_col",
+    "sql",
+    "url",
+    "columns",
+    "metrics",
+]
+
+DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS: List[str] = [
+    "column_name",
+    "type",
+    "is_dttm",
+]
+
+
 class GetDatasetInfoRequest(MetadataCacheControl):
     """Request schema for get_dataset_info with support for ID or UUID."""
 
     identifier: Annotated[
         int | str,
         Field(description="Dataset identifier - can be numeric ID or UUID 
string"),
     ]
+    select_columns: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: list(DEFAULT_GET_DATASET_INFO_COLUMNS),
+            description=(
+                "Top-level fields to include in the response. Defaults to a 
lean "
+                "set that excludes verbose fields like params, 
template_params, "
+                "extra, tags, certification_details. Pass an explicit list to "
+                "override (e.g. ['id','table_name','columns'] for minimal 
output)."
+            ),
+        ),
+    ]
+    column_fields: Annotated[
+        List[str],
+        Field(
+            default_factory=lambda: 
list(DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS),
+            description=(
+                "Per-column fields to include for entries in 'columns'. 
Defaults "
+                "to ['column_name','type','is_dttm']. Pass a wider list to "
+                "include 'verbose_name','groupby','filterable','description' "
+                "when needed."
+            ),
+        ),
+    ]
+
+    @field_validator("select_columns", mode="before")
+    @classmethod
+    def _parse_select_columns(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DATASET_INFO_COLUMNS)
+        return parse_json_or_list(value, "select_columns")
+
+    @field_validator("column_fields", mode="before")
+    @classmethod
+    def _parse_column_fields(cls, value: Any) -> Any:
+        from superset.mcp_service.utils.schema_utils import parse_json_or_list
+
+        if value is None:
+            return list(DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS)
+        return parse_json_or_list(value, "column_fields")

Review Comment:
   GetDatasetInfoRequest._parse_column_fields treats "" / [] as 
column_fields=[], which disables the TableColumnInfo per-column filtering (the 
serializer checks truthiness) and can massively bloat datasets with many 
columns. Consider defaulting empty to DEFAULT_GET_DATASET_INFO_COLUMN_FIELDS 
(or raising).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to