Re: [PR] feat: Add BaseDAO improvements and test reorganization [superset]

via GitHub Tue, 09 Sep 2025 13:29:13 -0700


aminghadersohi commented on code in PR #35018:
URL: https://github.com/apache/superset/pull/35018#discussion_r2334689726



##########
superset/daos/base.py:
##########
@@ -251,3 +455,205 @@ def filter_by(cls, **filter_by: Any) -> list[T]:
                 cls.id_column_name, data_model
             ).apply(query, None)
         return query.filter_by(**filter_by).all()
+
+    @classmethod
+    def apply_column_operators(
+        cls, query: Any, column_operators: Optional[List[ColumnOperator]] = 
None
+    ) -> Any:
+        """
+        Apply column operators (list of ColumnOperator) to the query using
+        ColumnOperatorEnum logic. Raises ValueError if a filter references a
+        non-existent column.
+        """
+        if not column_operators:
+            return query
+        for c in column_operators:
+            if not isinstance(c, ColumnOperator):
+                continue
+            col = c.col
+            opr = c.opr
+            value = c.value
+            if not col or not hasattr(cls.model_cls, col):
+                model_name = cls.model_cls.__name__ if cls.model_cls else 
"Unknown"
+                logging.error(
+                    f"Invalid filter: column '{col}' does not exist on 
{model_name}"
+                )
+                raise ValueError(
+                    f"Invalid filter: column '{col}' does not exist on 
{model_name}"
+                )
+            column = getattr(cls.model_cls, col)
+            try:
+                # Always use ColumnOperatorEnum's apply method
+                operator_enum = ColumnOperatorEnum(opr)
+                query = query.filter(operator_enum.apply(column, value))
+            except Exception as e:
+                logging.error(f"Error applying filter on column '{col}': {e}")
+                raise
+        return query
+
+    @classmethod
+    def get_filterable_columns_and_operators(cls) -> Dict[str, List[str]]:
+        """
+        Returns a dict mapping filterable columns (including hybrid/computed 
fields if
+        present) to their supported operators. Used by MCP tools to 
dynamically expose
+        filter options. Custom fields supported by the DAO but not present on 
the model
+        should be documented here.
+        """
+
+        mapper = inspect(cls.model_cls)
+        columns = {c.key: c for c in mapper.columns}
+        # Add hybrid properties
+        hybrids = {
+            name: attr
+            for name, attr in vars(cls.model_cls).items()
+            if isinstance(attr, hybrid_property)
+        }
+        # You may add custom fields here, e.g.:
+        # custom_fields = {"tags": ["eq", "in_", "like"], ...}
+        custom_fields: Dict[str, List[str]] = {}
+
+        filterable = {}
+        for name, col in columns.items():
+            if isinstance(col.type, (sa.String, sa.Text)):
+                filterable[name] = TYPE_OPERATOR_MAP["string"]
+            elif isinstance(col.type, (sa.Boolean,)):
+                filterable[name] = TYPE_OPERATOR_MAP["boolean"]
+            elif isinstance(col.type, (sa.Integer, sa.Float, sa.Numeric)):
+                filterable[name] = TYPE_OPERATOR_MAP["number"]
+            elif isinstance(col.type, (sa.DateTime, sa.Date, sa.Time)):
+                filterable[name] = TYPE_OPERATOR_MAP["datetime"]
+            else:
+                # Fallback to eq/ne/null
+                filterable[name] = ["eq", "ne", "is_null", "is_not_null"]
+        # Add hybrid properties as string fields by default
+        for name in hybrids:
+            filterable[name] = TYPE_OPERATOR_MAP["string"]
+        # Add custom fields
+        filterable.update(custom_fields)
+        return filterable
+
+    @classmethod
+    def _build_query(
+        cls,
+        column_operators: Optional[List[ColumnOperator]] = None,
+        search: Optional[str] = None,
+        search_columns: Optional[List[str]] = None,
+        custom_filters: Optional[Dict[str, BaseFilter]] = None,
+        skip_base_filter: bool = False,
+        data_model: Optional[SQLAInterface] = None,
+    ) -> Any:
+        """
+        Build a SQLAlchemy query with base filter, column operators, search, 
and
+        custom filters.
+        """
+        if data_model is None:
+            data_model = SQLAInterface(cls.model_cls, db.session)
+        query = data_model.session.query(cls.model_cls)
+        query = cls._apply_base_filter(
+            query, skip_base_filter=skip_base_filter, data_model=data_model
+        )
+        if search and search_columns:
+            search_filters = []
+            for column_name in search_columns:
+                if hasattr(cls.model_cls, column_name):
+                    column = getattr(cls.model_cls, column_name)
+                    search_filters.append(cast(column, 
Text).ilike(f"%{search}%"))
+            if search_filters:
+                query = query.filter(or_(*search_filters))
+        if custom_filters:
+            for filter_class in custom_filters.values():
+                query = filter_class.apply(query, None)
+        if column_operators:
+            query = cls.apply_column_operators(query, column_operators)
+        return query
+
+    @classmethod
+    def list(  # noqa: C901
+        cls,
+        column_operators: Optional[List[ColumnOperator]] = None,
+        order_column: str = "changed_on",
+        order_direction: str = "desc",
+        page: int = 0,
+        page_size: int = 100,
+        search: Optional[str] = None,
+        search_columns: Optional[List[str]] = None,
+        custom_filters: Optional[Dict[str, BaseFilter]] = None,
+        columns: Optional[List[str]] = None,
+    ) -> Tuple[List[Any], int]:
+        """
+        Generic list method for filtered, sorted, and paginated results.
+        If columns is specified, returns a list of tuples (one per row),
+        otherwise returns model instances.
+        """
+        data_model = SQLAInterface(cls.model_cls, db.session)
+
+        column_attrs = []
+        relationship_loads = []
+        if columns is None:
+            columns = []
+        for name in columns:
+            attr = getattr(cls.model_cls, name, None)
+            if attr is None:
+                continue
+            prop = getattr(attr, "property", None)
+            if isinstance(prop, ColumnProperty):
+                column_attrs.append(attr)
+            elif isinstance(prop, RelationshipProperty):
+                relationship_loads.append(joinedload(attr))

Review Comment:
   Good catch! You're right - joinedload with many-to-many relationships
     would break pagination. I've fixed it by counting before adding the
     relationship joins. This ensures:
     - Accurate counts (not inflated by joins)
     - Correct pagination (LIMIT/OFFSET on entities, not joined rows)
   
     We don't have tests for relationship loading in the list method - that
     should be added to prevent regression.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org
For additional commands, e-mail: notifications-h...@superset.apache.org

Re: [PR] feat: Add BaseDAO improvements and test reorganization [superset]

Reply via email to