github-advanced-security[bot] commented on code in PR #35478:
URL: https://github.com/apache/superset/pull/35478#discussion_r2405054205


##########
superset/charts/data/api.py:
##########
@@ -462,3 +485,108 @@
             return ChartDataQueryContextSchema().load(form_data)
         except KeyError as ex:
             raise ValidationError("Request is incorrect") from ex
+
+    def _should_use_streaming(
+        self, result: dict[Any, Any], form_data: dict[str, Any] | None = None
+    ) -> bool:
+        """Determine if streaming should be used based on actual row count 
threshold."""
+        from flask import current_app as app
+
+        query_context = result["query_context"]
+        result_format = query_context.result_format
+
+        # Only support CSV streaming currently
+        if result_format.lower() != "csv":
+            return False
+
+        # Get streaming threshold from config
+        threshold = app.config.get("CSV_STREAMING_ROW_THRESHOLD", 100000)
+
+        # Extract actual row count (same logic as frontend)
+        actual_row_count = None
+        viz_type = form_data.get("viz_type") if form_data else None
+
+        # For table viz, try to get actual row count from query results
+        if viz_type == "table" and result.get("queries"):
+            # Check if we have rowcount in the second query result (like 
frontend does)
+            queries = result.get("queries", [])
+            if len(queries) > 1 and queries[1].get("data"):
+                data = queries[1]["data"]
+                if isinstance(data, list) and len(data) > 0:
+                    actual_row_count = data[0].get("rowcount")
+
+        # Fallback to row_limit if actual count not available
+        if actual_row_count is None:
+            if form_data and "row_limit" in form_data:
+                actual_row_count = form_data.get("row_limit", 0)
+            elif query_context.form_data and "row_limit" in 
query_context.form_data:
+                actual_row_count = query_context.form_data.get("row_limit", 0)
+
+        # Use streaming if row count meets or exceeds threshold
+        if actual_row_count is not None and actual_row_count >= threshold:
+            return True
+
+        return False
+
+    def _create_streaming_csv_response(
+        self, result: dict[Any, Any], form_data: dict[str, Any] | None = None, 
filename: str | None = None, expected_rows: int | None = None
+    ) -> Response:
+        """Create a streaming CSV response for large datasets."""
+        from datetime import datetime
+
+        from flask import Response
+
+        from superset.commands.chart.data.streaming_export_command import (
+            StreamingCSVExportCommand,
+        )
+
+        query_context = result["query_context"]
+
+        # Use filename from frontend if provided, otherwise generate one
+        if not filename:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            chart_name = "export"
+
+            if form_data and form_data.get("slice_name"):
+                chart_name = form_data["slice_name"]
+            elif form_data and form_data.get("viz_type"):
+                chart_name = form_data["viz_type"]
+
+            # Sanitize chart name for filename
+            safe_chart_name = "".join(
+                c for c in chart_name if c.isalnum() or c in ("-", "_")
+            )
+            filename = f"superset_{safe_chart_name}_{timestamp}.csv"
+
+        logger.info("Creating streaming CSV response: %s (from frontend: %s)", 
filename, filename is not None)
+        if expected_rows:
+            logger.info("📊 Using expected_rows from frontend: %d", 
expected_rows)
+
+        # Execute streaming command
+        chunk_size = 1000
+        command = StreamingCSVExportCommand(query_context, chunk_size)
+        command.validate()
+
+        # Get the callable that returns the generator
+        csv_generator_callable = command.run()
+
+        # Get encoding from config
+        encoding = app.config.get("CSV_EXPORT", {}).get("encoding", "utf-8")
+
+        # Create response with streaming headers
+        response = Response(
+            csv_generator_callable(),  # Call the callable to get generator

Review Comment:
   ## Information exposure through an exception
   
   [Stack trace information](1) flows to this location and may be exposed to an 
external user.
   
   [Show more 
details](https://github.com/apache/superset/security/code-scanning/2061)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to