Re: [PR] feat(streaming): Streaming CSV uploads for over 100k records for constant memory usage [superset]

via GitHub Mon, 17 Nov 2025 13:47:45 -0800


betodealmeida commented on code in PR #35478:
URL: https://github.com/apache/superset/pull/35478#discussion_r2535575478



##########
superset/charts/data/api.py:
##########
@@ -507,3 +549,105 @@ def _create_query_context_from_form(
             return ChartDataQueryContextSchema().load(form_data)
         except KeyError as ex:
             raise ValidationError("Request is incorrect") from ex
+
+    def _should_use_streaming(
+        self, result: dict[Any, Any], form_data: dict[str, Any] | None = None
+    ) -> bool:
+        """Determine if streaming should be used based on actual row count 
threshold."""
+        query_context = result["query_context"]
+        result_format = query_context.result_format
+
+        # Only support CSV streaming currently
+        if result_format.lower() != "csv":
+            return False
+
+        # Get streaming threshold from config
+        threshold = app.config.get("CSV_STREAMING_ROW_THRESHOLD", 100000)
+
+        # Extract actual row count (same logic as frontend)
+        actual_row_count: int | None = None
+        viz_type = form_data.get("viz_type") if form_data else None
+
+        # For table viz, try to get actual row count from query results
+        if viz_type == "table" and result.get("queries"):
+            # Check if we have rowcount in the second query result (like 
frontend does)
+            queries = result.get("queries", [])
+            if len(queries) > 1 and queries[1].get("data"):
+                data = queries[1]["data"]
+                if isinstance(data, list) and len(data) > 0:
+                    rowcount = data[0].get("rowcount")
+                    actual_row_count = int(rowcount) if rowcount else None
+
+        # Fallback to row_limit if actual count not available
+        if actual_row_count is None:
+            if form_data and "row_limit" in form_data:
+                row_limit = form_data.get("row_limit", 0)
+                actual_row_count = int(row_limit) if row_limit else 0
+            elif query_context.form_data and "row_limit" in 
query_context.form_data:
+                row_limit = query_context.form_data.get("row_limit", 0)
+                actual_row_count = int(row_limit) if row_limit else 0
+
+        # Use streaming if row count meets or exceeds threshold
+        if actual_row_count is not None and actual_row_count >= threshold:
+            return True
+
+        return False
+
+    def _create_streaming_csv_response(
+        self,
+        result: dict[Any, Any],
+        form_data: dict[str, Any] | None = None,
+        filename: str | None = None,
+        expected_rows: int | None = None,
+    ) -> Response:
+        """Create a streaming CSV response for large datasets."""
+        query_context = result["query_context"]
+
+        # Use filename from frontend if provided, otherwise generate one
+        if not filename:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            chart_name = "export"
+
+            if form_data and form_data.get("slice_name"):
+                chart_name = form_data["slice_name"]
+            elif form_data and form_data.get("viz_type"):
+                chart_name = form_data["viz_type"]
+
+            # Sanitize chart name for filename
+            filename = 
secure_filename(f"superset_{chart_name}_{timestamp}.csv")
+
+        logger.info(
+            "Creating streaming CSV response: %s (from frontend: %s)",
+            filename,
+            filename is not None,

Review Comment:
   `filename` can never be `None`, since it's either a truthy string or the 
result from `secure_filename`.



##########
superset-frontend/src/components/StreamingExportModal/StreamingExportModal.tsx:
##########
@@ -0,0 +1,381 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import { t } from '@superset-ui/core';
+import { styled, useTheme } from '@apache-superset/core/ui';
+import {
+  Modal,
+  Button,
+  Typography,
+  Progress,
+} from '@superset-ui/core/components';
+import { Icons } from '@superset-ui/core/components/Icons';
+
+const { Text } = Typography;
+
+export enum ExportStatus {
+  STREAMING = 'streaming',
+  COMPLETED = 'completed',
+  ERROR = 'error',
+  CANCELLED = 'cancelled',
+}
+
+const MAX_PROGRESS_PERCENT = 99;
+const COMPLETED_PERCENT = 100;
+
+export interface StreamingProgress {
+  totalRows?: number;
+  rowsProcessed: number;
+  totalSize: number;
+  status: ExportStatus;
+  downloadUrl?: string;
+  error?: string;
+  filename?: string;
+  speed?: number;
+  mbPerSecond?: number;
+  elapsedTime?: number;
+  retryCount?: number;
+}
+
+interface StreamingExportModalProps {
+  visible: boolean;
+  onCancel: () => void;
+  onRetry?: () => void;
+  onDownload?: () => void;
+  progress: StreamingProgress;
+}
+
+const ModalContent = styled.div`
+  ${({ theme }) => `
+    padding: ${theme.sizeUnit * 4}px 0 ${theme.sizeUnit * 2}px;
+  `}
+`;
+
+const ProgressSection = styled.div`
+  ${({ theme }) => `
+    margin: ${theme.sizeUnit * 6}px 0;
+    position: relative;
+  `}
+`;
+
+const ProgressWrapper = styled.div`
+  ${({ theme }) => `
+    display: flex;
+    align-items: center;
+    gap: ${theme.sizeUnit * 3}px;
+  `}
+`;
+
+const StyledProgress = styled(Progress)`
+  flex: 1;
+`;
+
+const SuccessIcon = styled(Icons.CheckCircleFilled)`
+  ${({ theme }) => `
+    color: ${theme.colorSuccess};
+    font-size: ${theme.sizeUnit * 6}px;
+    flex-shrink: 0;
+  `}
+`;
+
+const ErrorIconWrapper = styled.div`
+  ${({ theme }) => `
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    width: ${theme.sizeUnit * 4}px;
+    height: ${theme.sizeUnit * 4}px;
+    background-color: ${theme.colorError};
+    border-radius: 50%;
+    flex-shrink: 0;
+  `}
+`;
+
+const ErrorIconStyled = styled(Icons.CloseOutlined)`
+  ${({ theme }) => `
+    color: ${theme.colorWhite};
+    font-size: ${theme.sizeUnit * 2.5}px;
+  `}
+`;
+
+const ActionButtons = styled.div`
+  ${({ theme }) => `
+    display: flex;
+    gap: ${theme.sizeUnit * 2}px;
+    justify-content: flex-end;
+  `}
+`;
+
+const CenteredText = styled(Text)`
+  ${({ theme }) => `
+    display: block;
+    text-align: center;
+    margin-top: ${theme.sizeUnit * 4}px;
+  `}
+`;
+
+const ErrorText = styled(CenteredText)`
+  ${({ theme }) => `
+    color: ${theme.colorError};
+  `}
+`;
+
+const CancelButton = styled(Button)`
+  ${({ theme }) => `
+    background-color: ${theme.colorSuccessBg};
+    color: ${theme.colorSuccess};
+    border-color: ${theme.colorSuccessBg};
+
+    &:hover {
+      background-color: ${theme.colorSuccessBg};
+      color: ${theme.colorSuccess};
+      border-color: ${theme.colorSuccess};
+    }
+
+    &:focus {
+      background-color: ${theme.colorSuccessBg};
+      color: ${theme.colorSuccess};
+      border-color: ${theme.colorSuccess};
+    }
+  `}
+`;
+
+const DownloadButton = styled(Button)`
+  ${({ theme }) => `
+    background-color: ${theme.colorSuccess};
+    border-color: ${theme.colorSuccess};
+    color: ${theme.colorWhite};
+
+    &:hover:not(:disabled) {
+      background-color: ${theme.colorSuccessActive};
+      border-color: ${theme.colorSuccessActive};
+      color: ${theme.colorWhite};
+    }
+
+    &:focus:not(:disabled) {
+      background-color: ${theme.colorSuccess};
+      border-color: ${theme.colorSuccess};
+      color: ${theme.colorWhite};
+    }
+
+    &:disabled {
+      background-color: ${theme.colorBgContainerDisabled};
+      border-color: ${theme.colorBgContainerDisabled};
+      color: ${theme.colorTextDisabled};
+    }
+  `}
+`;
+
+const triggerFileDownload = (url: string, filename: string) => {
+  const link = document.createElement('a');
+  link.href = url;
+  link.download = filename;
+  document.body.appendChild(link);
+  link.click();
+  document.body.removeChild(link);
+};
+
+const calculateProgressPercentage = (
+  status: ExportStatus,
+  totalRows?: number,
+  rowsProcessed?: number,
+): number => {
+  if (status === ExportStatus.COMPLETED) return COMPLETED_PERCENT;
+
+  if (!totalRows || totalRows <= 0 || !rowsProcessed) return 0;
+
+  const percentage = (rowsProcessed / totalRows) * 100;
+  return Math.round(Math.min(MAX_PROGRESS_PERCENT, percentage));

Review Comment:
   Seems like you don't want to show 100% when the progress is, say, 99.7%. 
There's an easier way to do this that doesn't rely on `MAX_PROGRESS_PERCENT`:
   
   ```suggestion
     return Math.floor(percentage);
   ```



##########
superset/charts/data/api.py:
##########
@@ -467,10 +486,33 @@ def _get_data_response(
         except ChartDataQueryFailedError as exc:
             return self.response_400(message=exc.message)
 
-        # Log is_cached if extra payload callback is provided
-        self._log_is_cached(result, add_extra_log_payload)
+            # Log is_cached if extra payload callback is provided
+        if add_extra_log_payload and result and "queries" in result:
+            is_cached_values = [query.get("is_cached") for query in 
result["queries"]]
+            if len(is_cached_values) == 1:
+                add_extra_log_payload(is_cached=is_cached_values[0])
+            elif is_cached_values:
+                add_extra_log_payload(is_cached=is_cached_values)

Review Comment:
   I would just always log a list here:
   
   ```suggestion
               add_extra_log_payload(is_cached=is_cached_values)
   ```



##########
superset/commands/streaming_export/base.py:
##########
@@ -0,0 +1,219 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Base command for streaming CSV exports."""
+
+from __future__ import annotations
+
+import csv
+import io
+import logging
+import time
+from abc import abstractmethod
+from typing import Any, Callable, Generator
+
+from flask import current_app as app
+from sqlalchemy import text
+
+from superset import db
+from superset.commands.base import BaseCommand
+
+logger = logging.getLogger(__name__)
+
+
+class BaseStreamingCSVExportCommand(BaseCommand):
+    """
+    Base class for streaming CSV export commands.
+
+    Provides shared functionality for:
+    - Generating CSV data in chunks
+    - Managing database connections
+    - Buffering data for efficient streaming
+    - Error handling with user-friendly messages
+
+    Subclasses must implement:
+    - _get_sql_and_database(): Return SQL query string and database object
+    - _get_row_limit(): Return optional row limit for the export
+    """
+
+    def __init__(self, chunk_size: int = 1000):
+        """
+        Initialize the streaming export command.
+
+        Args:
+            chunk_size: Number of rows to fetch per database query (default: 
1000)
+        """
+        self._chunk_size = chunk_size
+        self._current_app = app._get_current_object()
+
+    @abstractmethod
+    def _get_sql_and_database(self) -> tuple[str, Any]:
+        """
+        Get the SQL query and database for execution.
+
+        Returns:
+            Tuple of (sql_query, database_object)
+        """
+
+    @abstractmethod
+    def _get_row_limit(self) -> int | None:
+        """
+        Get the row limit for the export.
+
+        Returns:
+            Row limit or None for unlimited
+        """
+
+    def _write_csv_header(
+        self, columns: list[str], csv_writer: Any, buffer: io.StringIO
+    ) -> tuple[str, int]:
+        """Write CSV header and return header data with byte count."""
+        csv_writer.writerow(columns)
+        header_data = buffer.getvalue()
+        total_bytes = len(header_data.encode("utf-8"))
+        buffer.seek(0)
+        buffer.truncate()
+        return header_data, total_bytes
+
+    def _process_rows(
+        self,
+        result_proxy: Any,
+        csv_writer: Any,
+        buffer: io.StringIO,
+        limit: int | None,
+    ) -> Generator[tuple[str, int, int], None, None]:
+        """
+        Process database rows and yield CSV data chunks.
+
+        Yields tuples of (data_chunk, row_count, byte_count).
+        """
+        row_count = 0
+        flush_threshold = 65536  # 64KB
+
+        while rows := result_proxy.fetchmany(self._chunk_size):
+            for row in rows:
+                # Apply limit if specified
+                if limit is not None and row_count >= limit:
+                    break
+
+                csv_writer.writerow(row)
+                row_count += 1
+
+                # Check buffer size and flush if needed
+                current_size = buffer.tell()
+                if current_size >= flush_threshold:
+                    data = buffer.getvalue()
+                    data_bytes = len(data.encode("utf-8"))
+                    yield data, row_count, data_bytes
+                    buffer.seek(0)
+                    buffer.truncate()
+
+            # Break outer loop if limit reached
+            if limit is not None and row_count >= limit:
+                break
+
+        # Flush remaining buffer
+        if remaining_data := buffer.getvalue():
+            data_bytes = len(remaining_data.encode("utf-8"))
+            yield remaining_data, row_count, data_bytes
+
+    def _execute_query_and_stream(
+        self, sql: str, database: Any, limit: int | None
+    ) -> Generator[str, None, None]:
+        """Execute query with streaming and yield CSV chunks."""
+        start_time = time.time()
+        total_bytes = 0
+
+        with db.session() as session:
+            # Merge database to prevent DetachedInstanceError
+            merged_database = session.merge(database)
+
+            # Execute query with streaming
+            with merged_database.get_sqla_engine() as engine:
+                connection = engine.connect()

Review Comment:
   You might be able to run this as a context manager, so you don't need the 
try/finally block to close it.



##########
superset/sqllab/api.py:
##########
@@ -294,6 +299,119 @@ def export_csv(self, client_id: str) -> CsvResponse:
         )
         return response
 
+    @expose("/export_streaming/", methods=("POST",))
+    @protect()
+    @permission_name("read")
+    @statsd_metrics
+    @event_logger.log_this_with_context(
+        action=lambda self,
+        *args,
+        **kwargs: f"{self.__class__.__name__}.export_streaming_csv",
+        log_to_statsd=False,
+    )
+    def export_streaming_csv(self) -> Response:
+        """Export SQL query results using streaming for large datasets.
+        ---
+        post:
+          summary: Export SQL query results to CSV with streaming
+          requestBody:
+            description: Export parameters
+            required: true
+            content:
+              application/x-www-form-urlencoded:
+                schema:
+                  type: object
+                  properties:
+                    client_id:
+                      type: string
+                      description: The SQL query result identifier
+                    filename:
+                      type: string
+                      description: Optional filename for the export
+                    expected_rows:
+                      type: integer
+                      description: Optional expected row count for progress 
tracking
+          responses:
+            200:
+              description: Streaming CSV export
+              content:
+                text/csv:
+                  schema:
+                    type: string
+            400:
+              $ref: '#/components/responses/400'
+            401:
+              $ref: '#/components/responses/401'
+            403:
+              $ref: '#/components/responses/403'
+            404:
+              $ref: '#/components/responses/404'
+            500:
+              $ref: '#/components/responses/500'
+        """
+        # Extract parameters from form data
+        client_id = request.form.get("client_id")
+        filename = request.form.get("filename")
+
+        if not client_id:
+            return self.response_400(message="client_id is required")
+
+        expected_rows = None
+        if expected_rows_str := request.form.get("expected_rows"):
+            try:
+                expected_rows = int(expected_rows_str)
+            except (ValueError, TypeError):
+                logger.warning("Invalid expected_rows value: %s", 
expected_rows_str)
+
+        return self._create_streaming_csv_response(client_id, filename, 
expected_rows)
+
+    def _create_streaming_csv_response(
+        self,
+        client_id: str,
+        filename: str | None = None,
+        expected_rows: int | None = None,
+    ) -> Response:
+        """Create a streaming CSV response for large SQL Lab result sets."""
+        # Execute streaming command
+        chunk_size = 1000
+        command = StreamingSqlResultExportCommand(client_id, chunk_size)
+        command.validate()
+
+        if not filename:
+            query = command._query
+            assert query is not None

Review Comment:
   This is not used?
   
   ```suggestion
   ```



##########
superset/sqllab/api.py:
##########
@@ -294,6 +299,119 @@ def export_csv(self, client_id: str) -> CsvResponse:
         )
         return response
 
+    @expose("/export_streaming/", methods=("POST",))
+    @protect()
+    @permission_name("read")
+    @statsd_metrics
+    @event_logger.log_this_with_context(
+        action=lambda self,
+        *args,
+        **kwargs: f"{self.__class__.__name__}.export_streaming_csv",
+        log_to_statsd=False,
+    )
+    def export_streaming_csv(self) -> Response:
+        """Export SQL query results using streaming for large datasets.
+        ---
+        post:
+          summary: Export SQL query results to CSV with streaming
+          requestBody:
+            description: Export parameters
+            required: true
+            content:
+              application/x-www-form-urlencoded:
+                schema:
+                  type: object
+                  properties:
+                    client_id:
+                      type: string
+                      description: The SQL query result identifier
+                    filename:
+                      type: string
+                      description: Optional filename for the export
+                    expected_rows:
+                      type: integer
+                      description: Optional expected row count for progress 
tracking
+          responses:
+            200:
+              description: Streaming CSV export
+              content:
+                text/csv:
+                  schema:
+                    type: string
+            400:
+              $ref: '#/components/responses/400'
+            401:
+              $ref: '#/components/responses/401'
+            403:
+              $ref: '#/components/responses/403'
+            404:
+              $ref: '#/components/responses/404'
+            500:
+              $ref: '#/components/responses/500'
+        """
+        # Extract parameters from form data
+        client_id = request.form.get("client_id")
+        filename = request.form.get("filename")
+
+        if not client_id:
+            return self.response_400(message="client_id is required")
+
+        expected_rows = None
+        if expected_rows_str := request.form.get("expected_rows"):
+            try:
+                expected_rows = int(expected_rows_str)
+            except (ValueError, TypeError):
+                logger.warning("Invalid expected_rows value: %s", 
expected_rows_str)
+
+        return self._create_streaming_csv_response(client_id, filename, 
expected_rows)
+
+    def _create_streaming_csv_response(
+        self,
+        client_id: str,
+        filename: str | None = None,
+        expected_rows: int | None = None,
+    ) -> Response:
+        """Create a streaming CSV response for large SQL Lab result sets."""
+        # Execute streaming command
+        chunk_size = 1000

Review Comment:
   I see this hardcoded as 1000 everywhere, we might want to make this 
configurable (we can do that in the future). I wonder if 1024 would be better 
due to page size alignment, eg.



##########
superset/commands/sql_lab/streaming_export_command.py:
##########
@@ -0,0 +1,145 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Command for streaming CSV exports of SQL Lab query results."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from flask_babel import gettext as __
+
+from superset import db
+from superset.commands.streaming_export.base import 
BaseStreamingCSVExportCommand
+from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
+from superset.exceptions import SupersetErrorException, 
SupersetSecurityException
+from superset.models.sql_lab import Query
+from superset.sql.parse import SQLScript
+from superset.sqllab.limiting_factor import LimitingFactor
+
+
+class StreamingSqlResultExportCommand(BaseStreamingCSVExportCommand):
+    """
+    Command to execute a streaming CSV export of SQL Lab query results.
+
+    This command handles SQL Lab-specific logic:
+    - Query validation and access control
+    - SQL parsing and limit extraction
+    - LimitingFactor-based row limit adjustment
+    """
+
+    def __init__(
+        self,
+        client_id: str,
+        chunk_size: int = 1000,
+    ):
+        """
+        Initialize the SQL Lab streaming export command.
+
+        Args:
+            client_id: The SQL Lab query client ID
+            chunk_size: Number of rows to fetch per database query (default: 
1000)
+        """
+        super().__init__(chunk_size)
+        self._client_id = client_id
+        self._query: Query | None = None
+
+    def validate(self) -> None:
+        """Validate permissions and query existence."""
+        self._query = (
+            
db.session.query(Query).filter_by(client_id=self._client_id).one_or_none()
+        )
+        if self._query is None:
+            raise SupersetErrorException(
+                SupersetError(
+                    message=__(
+                        "The query associated with these results could not be 
found. "
+                        "You need to re-run the original query."
+                    ),
+                    error_type=SupersetErrorType.RESULTS_BACKEND_ERROR,
+                    level=ErrorLevel.ERROR,
+                ),
+                status=404,
+            )
+
+        try:
+            self._query.raise_for_access()
+        except SupersetSecurityException as ex:
+            raise SupersetErrorException(
+                SupersetError(
+                    message=__("Cannot access the query"),
+                    error_type=SupersetErrorType.QUERY_SECURITY_ACCESS_ERROR,
+                    level=ErrorLevel.ERROR,
+                ),
+                status=403,
+            ) from ex
+
+    def _get_sql_and_database(self) -> tuple[str, Any]:
+        """
+        Get the SQL query and database for SQL Lab export.
+
+        Returns:
+            Tuple of (sql_query, database_object)
+        """
+        assert self._query is not None
+
+        select_sql = self._query.select_sql
+        executed_sql = self._query.executed_sql
+        database = self._query.database
+
+        # Get the SQL query
+        if select_sql:
+            sql = select_sql
+        else:
+            sql = executed_sql

Review Comment:
   More idiomatic:
   
   
   ```suggestion
           sql = select_sql or executed_sql
   ```



##########
superset/charts/data/api.py:
##########
@@ -507,3 +549,105 @@ def _create_query_context_from_form(
             return ChartDataQueryContextSchema().load(form_data)
         except KeyError as ex:
             raise ValidationError("Request is incorrect") from ex
+
+    def _should_use_streaming(
+        self, result: dict[Any, Any], form_data: dict[str, Any] | None = None
+    ) -> bool:
+        """Determine if streaming should be used based on actual row count 
threshold."""
+        query_context = result["query_context"]
+        result_format = query_context.result_format
+
+        # Only support CSV streaming currently
+        if result_format.lower() != "csv":
+            return False
+
+        # Get streaming threshold from config
+        threshold = app.config.get("CSV_STREAMING_ROW_THRESHOLD", 100000)
+
+        # Extract actual row count (same logic as frontend)
+        actual_row_count: int | None = None
+        viz_type = form_data.get("viz_type") if form_data else None
+
+        # For table viz, try to get actual row count from query results
+        if viz_type == "table" and result.get("queries"):
+            # Check if we have rowcount in the second query result (like 
frontend does)
+            queries = result.get("queries", [])
+            if len(queries) > 1 and queries[1].get("data"):
+                data = queries[1]["data"]
+                if isinstance(data, list) and len(data) > 0:
+                    rowcount = data[0].get("rowcount")
+                    actual_row_count = int(rowcount) if rowcount else None
+
+        # Fallback to row_limit if actual count not available
+        if actual_row_count is None:
+            if form_data and "row_limit" in form_data:
+                row_limit = form_data.get("row_limit", 0)
+                actual_row_count = int(row_limit) if row_limit else 0
+            elif query_context.form_data and "row_limit" in 
query_context.form_data:
+                row_limit = query_context.form_data.get("row_limit", 0)
+                actual_row_count = int(row_limit) if row_limit else 0
+
+        # Use streaming if row count meets or exceeds threshold
+        if actual_row_count is not None and actual_row_count >= threshold:
+            return True
+
+        return False

Review Comment:
   ```suggestion
           # Use streaming if row count meets or exceeds threshold
           return actual_row_count is not None and actual_row_count >= threshold
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] feat(streaming): Streaming CSV uploads for over 100k records for constant memory usage [superset]

Reply via email to