korbit-ai[bot] commented on code in PR #34375: URL: https://github.com/apache/superset/pull/34375#discussion_r2245500381
########## superset/common/query_object.py: ########## @@ -302,6 +303,37 @@ def _validate_no_have_duplicate_labels(self) -> None: ) ) + def _validate_time_offsets(self) -> None: + """Validate time_offsets configuration""" + if not self.time_offsets: + return + + for offset in self.time_offsets: + # Check if this is a date range offset (YYYY-MM-DD : YYYY-MM-DD format) + if self._is_valid_date_range(offset): + if not feature_flag_manager.is_feature_enabled( + "DATE_RANGE_TIMESHIFTS_ENABLED" + ): + raise QueryObjectValidationError( Review Comment: ### Incomplete Time Offsets Validation Docstring <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? The docstring doesn't explain why the validation is needed or what the time_offsets configuration should look like. ###### Why this matters Without understanding the purpose and requirements of time_offsets validation, developers may misuse the feature or miss important constraints. ###### Suggested change ∙ *Feature Preview* """Validate time_offsets configuration Validates that date range time shifts (in YYYY-MM-DD:YYYY-MM-DD format) are only used when the DATE_RANGE_TIMESHIFTS_ENABLED feature flag is enabled. This prevents usage of date range time shifts when the feature is not properly configured. """ ###### Provide feedback to improve future suggestions [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/3d3a44d0-b215-472e-b10c-51741b2492f7/upvote) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/3d3a44d0-b215-472e-b10c-51741b2492f7?what_not_true=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/3d3a44d0-b215-472e-b10c-51741b2492f7?what_out_of_scope=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/3d3a44d0-b215-472e-b10c-51741b2492f7?what_not_in_standard=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/3d3a44d0-b215-472e-b10c-51741b2492f7) </details> <sub> 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:ff1928f3-fb0a-400b-8be2-9e90ed2026aa --> [](ff1928f3-fb0a-400b-8be2-9e90ed2026aa) ########## superset/common/query_context_processor.py: ########## @@ -642,15 +782,164 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme offset_dfs[offset] = offset_metrics_df if offset_dfs: + logger.info("Before joining offset DataFrames:") + logger.info(f"Main DataFrame shape: {df.shape}") + logger.info(f"Main DataFrame:\n{df.to_string()}") + for offset_key, offset_df in offset_dfs.items(): + logger.info(f"Offset '{offset_key}' DataFrame shape: {offset_df.shape}") + logger.info( + f"Offset '{offset_key}' DataFrame:\n{offset_df.to_string()}" + ) + df = self.join_offset_dfs( df, offset_dfs, time_grain, join_keys, ) + logger.info("After joining offset DataFrames:") + logger.info(f"Final DataFrame shape: {df.shape}") + logger.info(f"Final DataFrame:\n{df.to_string()}") + return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys) + def _process_date_range_offset( + self, offset_df: pd.DataFrame, join_keys: list[str] + ) -> tuple[pd.DataFrame, list[str]]: + """Process date range offset data and return modified DataFrame and keys.""" + temporal_cols = ["ds", "__timestamp", "dttm"] + non_temporal_join_keys = [key for key in join_keys if key not in temporal_cols] + + logger.info( + f"Non-temporal join keys for date range offset: {non_temporal_join_keys}" + ) + + if non_temporal_join_keys: + return offset_df, non_temporal_join_keys + + # Aggregate offset data for cross join + logger.info("No non-temporal join keys found, aggregating offset data") + + metric_columns = [col for col in offset_df.columns if col not in temporal_cols] + logger.info(f"Aggregating metric columns: {metric_columns}") + + if metric_columns: + aggregated_values = {} + for col in metric_columns: + if pd.api.types.is_numeric_dtype(offset_df[col]): + aggregated_values[col] = offset_df[col].sum() + else: + aggregated_values[col] = ( + offset_df[col].iloc[0] if not offset_df.empty else None + ) + + offset_df = pd.DataFrame([aggregated_values]) + logger.info(f"Aggregated offset DataFrame:\n{offset_df.to_string()}") + + return offset_df, [] + + def _apply_cleanup_logic( + self, + df: pd.DataFrame, + offset: str, + time_grain: str | None, + join_keys: list[str], + is_date_range_offset: bool, + ) -> pd.DataFrame: Review Comment: ### Vague Cleanup Logic Documentation <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? The existing docstring doesn't explain what cleanup is being performed or the significance of is_date_range_offset parameter. ###### Why this matters The current docstring is too vague to understand why different cleanup logic is needed for different offset types. ###### Suggested change ∙ *Feature Preview* """Apply appropriate cleanup logic based on offset type. Different cleanup is performed depending on whether the offset is a date range or relative offset: - For relative offsets with time_grain: Reorders columns and drops temporary join columns - For date range offsets: Only drops temporary right suffix columns - For other cases: Only drops right suffix columns """ ###### Provide feedback to improve future suggestions [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/3c043d8c-73e6-4eee-bb6f-10321c6f0bef/upvote) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/3c043d8c-73e6-4eee-bb6f-10321c6f0bef?what_not_true=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/3c043d8c-73e6-4eee-bb6f-10321c6f0bef?what_out_of_scope=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/3c043d8c-73e6-4eee-bb6f-10321c6f0bef?what_not_in_standard=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/3c043d8c-73e6-4eee-bb6f-10321c6f0bef) </details> <sub> 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:2de8d635-b1cc-405e-95e6-4dfc20c0daf5 --> [](2de8d635-b1cc-405e-95e6-4dfc20c0daf5) ########## superset/common/query_object.py: ########## @@ -302,6 +303,37 @@ ) ) + def _validate_time_offsets(self) -> None: + """Validate time_offsets configuration""" + if not self.time_offsets: + return + + for offset in self.time_offsets: + # Check if this is a date range offset (YYYY-MM-DD : YYYY-MM-DD format) + if self._is_valid_date_range(offset): + if not feature_flag_manager.is_feature_enabled( + "DATE_RANGE_TIMESHIFTS_ENABLED" + ): + raise QueryObjectValidationError( + "Date range timeshifts are not enabled. " + "Please contact your administrator to enable the " + "DATE_RANGE_TIMESHIFTS_ENABLED feature flag." + ) + + def _is_valid_date_range(self, date_range: str) -> bool: + """Check if string is a valid date range in YYYY-MM-DD : YYYY-MM-DD format""" + try: + # Attempt to parse the string as a date range in the format + # YYYY-MM-DD:YYYY-MM-DD + start_date, end_date = date_range.split(":") + datetime.strptime(start_date.strip(), "%Y-%m-%d") + datetime.strptime(end_date.strip(), "%Y-%m-%d") Review Comment: ### Missing Date Range Order Validation <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? The date validation in _is_valid_date_range doesn't check if the start date is before the end date. ###### Why this matters Without this validation, users could input invalid date ranges where the end date precedes the start date, potentially causing unexpected behavior in time series analysis. ###### Suggested change ∙ *Feature Preview* ```python def _is_valid_date_range(self, date_range: str) -> bool: try: start_date_str, end_date_str = date_range.split(":") start_date = datetime.strptime(start_date_str.strip(), "%Y-%m-%d") end_date = datetime.strptime(end_date_str.strip(), "%Y-%m-%d") return start_date <= end_date except ValueError: return False ``` ###### Provide feedback to improve future suggestions [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/9ce2b6d0-dd50-427a-b126-2cbb01869527/upvote) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/9ce2b6d0-dd50-427a-b126-2cbb01869527?what_not_true=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/9ce2b6d0-dd50-427a-b126-2cbb01869527?what_out_of_scope=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/9ce2b6d0-dd50-427a-b126-2cbb01869527?what_not_in_standard=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/9ce2b6d0-dd50-427a-b126-2cbb01869527) </details> <sub> 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:0f8b8908-ed3d-4683-b925-7f3afa6aa4ee --> [](0f8b8908-ed3d-4683-b925-7f3afa6aa4ee) ########## superset/common/query_context_processor.py: ########## @@ -522,34 +545,92 @@ offset, outer_to_dttm ) + # For relative offsets, keep the original behavior + # BUT: Don't set inner bounds that conflict with the main query + # range. The inner bounds should match the shifted range, not the + # original range + query_object_clone.inner_from_dttm = query_object_clone.from_dttm + query_object_clone.inner_to_dttm = query_object_clone.to_dttm Review Comment: ### Conflicting Time Range Bounds <sub></sub> <details> <summary>Tell me more</summary> ###### What is the issue? For relative offsets, the inner bounds are set to match the shifted range, but this could cause conflicts with the main query range in certain scenarios. ###### Why this matters Setting inner bounds to match the shifted range may create incorrect results when the shifted range overlaps with the main query range, leading to unexpected data filtering. ###### Suggested change ∙ *Feature Preview* Remove the inner bounds setting for relative offsets to avoid conflicts: ```python # For relative offsets, don't set inner bounds at all query_object_clone.inner_from_dttm = None query_object_clone.inner_to_dttm = None ``` ###### Provide feedback to improve future suggestions [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/63239539-8dcf-4f72-a825-3567c01fbbf4/upvote) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/63239539-8dcf-4f72-a825-3567c01fbbf4?what_not_true=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/63239539-8dcf-4f72-a825-3567c01fbbf4?what_out_of_scope=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/63239539-8dcf-4f72-a825-3567c01fbbf4?what_not_in_standard=true) [](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/63239539-8dcf-4f72-a825-3567c01fbbf4) </details> <sub> 💬 Looking for more details? Reply to this comment to chat with Korbit. </sub> <!--- korbi internal id:8ef2208e-6d24-4754-9e81-75bc5887a06f --> [](8ef2208e-6d24-4754-9e81-75bc5887a06f) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For additional commands, e-mail: notifications-h...@superset.apache.org