codeant-ai-for-open-source[bot] commented on code in PR #37791:
URL: https://github.com/apache/superset/pull/37791#discussion_r2779328870


##########
superset-frontend/src/features/databases/UploadDataModel/DataPreviewModal.tsx:
##########
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import { FunctionComponent, useEffect, useState } from 'react';
+import { t } from '@apache-superset/core';
+import { Modal, Table, TableSize } from '@superset-ui/core/components';
+import type { ColumnsType } from 'antd/es/table';
+import type { UploadType } from './uploadDataModalTour';
+
+const PREVIEW_ROW_LIMIT = 100;
+
+interface DataPreviewModalProps {
+  show: boolean;
+  onHide: () => void;
+  file: File | null;
+  type: UploadType;
+  delimiter?: string;
+  sheetName?: string;
+}
+
+async function parseCSV(
+  file: File,
+  delimiter: string,
+): Promise<{ columns: string[]; data: Record<string, string>[] }> {
+  const text = await file.text();
+  const lines = text.split(/\r?\n/).filter(line => line.trim().length > 0);
+  if (lines.length === 0) {
+    return { columns: [], data: [] };
+  }
+  const headerLine = lines[0];
+  const columns = headerLine
+    .split(delimiter)
+    .map((c, i) => c.trim() || `col_${i}`);
+  const dataRows = lines.slice(1, PREVIEW_ROW_LIMIT + 1);
+  const data = dataRows.map(row => {
+    const values = row.split(delimiter);
+    const obj: Record<string, string> = {};
+    columns.forEach((col, i) => {
+      obj[col] = values[i]?.trim() ?? '';
+    });
+    return obj;
+  });
+  return { columns, data };
+}
+
+async function parseExcel(
+  file: File,
+  sheetName?: string,
+): Promise<{ columns: string[]; data: Record<string, string>[] }> {
+  const XLSX = (await import(/* webpackChunkName: "xlsx" */ 'xlsx')).default;
+  const arrayBuffer = await file.arrayBuffer();
+  const workbook = XLSX.read(arrayBuffer, { type: 'array' });
+  const sheet = sheetName
+    ? workbook.Sheets[sheetName]
+    : workbook.Sheets[workbook.SheetNames[0]];
+  if (!sheet) {
+    return { columns: [], data: [] };
+  }
+  const jsonData = XLSX.utils.sheet_to_json<unknown[]>(sheet, {
+    header: 1,
+    defval: '',
+  }) as unknown[][];
+  if (jsonData.length === 0) {
+    return { columns: [], data: [] };
+  }
+  const headerRow = jsonData[0] as (string | number)[];
+  const columns = headerRow.map((c, i) => String(c ?? '').trim() || 
`col_${i}`);
+  const dataRows = jsonData.slice(1, PREVIEW_ROW_LIMIT + 1) as (
+    | string
+    | number
+  )[][];
+  const data = dataRows.map(row => {
+    const obj: Record<string, string> = {};
+    columns.forEach((col, i) => {
+      obj[col] = String(row[i] ?? '');
+    });
+    return obj;
+  });
+  return { columns, data };
+}
+
+export const DataPreviewModal: FunctionComponent<DataPreviewModalProps> = ({
+  show,
+  onHide,
+  file,
+  type,
+  delimiter = ',',
+  sheetName,
+}) => {
+  const [loading, setLoading] = useState(false);
+  const [columns, setColumns] = useState<string[]>([]);
+  const [data, setData] = useState<Record<string, string>[]>([]);
+  const [error, setError] = useState<string | null>(null);
+
+  useEffect(() => {
+    if (!show || !file) {
+      setColumns([]);
+      setData([]);
+      setError(null);
+      return;
+    }
+    if (type === 'columnar') {
+      setError(t('Data preview is not available for Parquet files.'));
+      setColumns([]);
+      setData([]);
+      return;
+    }
+    setLoading(true);
+    setError(null);
+    const parse =
+      type === 'csv' ? parseCSV(file, delimiter) : parseExcel(file, sheetName);
+    parse
+      .then(({ columns: cols, data: parsedData }) => {
+        setColumns(cols);
+        setData(parsedData);
+      })
+      .catch(err => {
+        setError(err?.message || t('Failed to parse file'));
+        setColumns([]);
+        setData([]);
+      })
+      .finally(() => setLoading(false));

Review Comment:
   **Suggestion:** The async parsing in the effect has no cancellation/guard, 
so if the user rapidly changes the file, closes the modal, or switches type 
while a previous parse is still in flight, the older promise can resolve later 
and overwrite state for the newer file or update state after unmount, causing 
incorrect previews and React warnings about state updates on unmounted 
components. Add a cancellation flag in the effect and check it in the 
then/catch/finally handlers, returning a cleanup function that flips this flag 
so outdated parses don't update state. [race condition]
   
   <details>
   <summary><b>Severity Level:</b> Major ⚠️</summary>
   
   ```mdx
   - ⚠️ Data preview may briefly show stale file contents.
   - ⚠️ Potential React warnings on rapid open/close of preview.
   - ⚠️ Affects CSV/Excel preview in upload-to-database flow.
   ```
   </details>
   
   ```suggestion
       let cancelled = false;
   
       if (!show || !file) {
         setColumns([]);
         setData([]);
         setError(null);
         return;
       }
       if (type === 'columnar') {
         setError(t('Data preview is not available for Parquet files.'));
         setColumns([]);
         setData([]);
         return;
       }
       setLoading(true);
       setError(null);
       const parse =
         type === 'csv' ? parseCSV(file, delimiter) : parseExcel(file, 
sheetName);
       parse
         .then(({ columns: cols, data: parsedData }) => {
           if (cancelled) {
             return;
           }
           setColumns(cols);
           setData(parsedData);
         })
         .catch(err => {
           if (cancelled) {
             return;
           }
           setError(err?.message || t('Failed to parse file'));
           setColumns([]);
           setData([]);
         })
         .finally(() => {
           if (!cancelled) {
             setLoading(false);
           }
         });
   
       return () => {
         cancelled = true;
       };
   ```
   <details>
   <summary><b>Steps of Reproduction ✅ </b></summary>
   
   ```mdx
   1. In the UI follow the PR test steps: click "+" → "Upload CSV or Excel to 
database"
   (feature entry point described in PR), then choose a CSV file and click the 
"Preview data"
   button so that `DataPreviewModal` from
   
`superset-frontend/src/features/databases/UploadDataModel/DataPreviewModal.tsx:97`
 is
   shown with `show=true` and `file` set.
   
   2. While the preview is loading (the `Loading...` state rendered at lines 
160–163 is
   visible), quickly close the modal (parent sets `show=false` and unmounts
   `DataPreviewModal`) or select a different file and re-open preview, which 
remounts the
   component with a new `file` prop.
   
   3. The original effect at lines 110–138 has already started an async parse 
via
   `parseCSV`/`parseExcel` and stored the resulting `Promise` in `parse`, but 
it does not
   register any cleanup or cancellation; when that promise resolves after 
unmount/remount,
   the `.then`/`.catch` handlers still invoke `setColumns`, `setData`, and 
`setError`.
   
   4. Because there is no `cancelled` guard or effect cleanup, the stale async 
handler can
   (a) update state on an unmounted component (leading to React dev warnings 
about state
   updates on unmounted components) or (b) overwrite the state for the newly 
mounted modal
   instance with results from the previous file/type, producing an incorrect 
data preview for
   the current upload session.
   ```
   </details>
   <details>
   <summary><b>Prompt for AI Agent 🤖 </b></summary>
   
   ```mdx
   This is a comment left during a code review.
   
   **Path:** 
superset-frontend/src/features/databases/UploadDataModel/DataPreviewModal.tsx
   **Line:** 111:137
   **Comment:**
        *Race Condition: The async parsing in the effect has no 
cancellation/guard, so if the user rapidly changes the file, closes the modal, 
or switches type while a previous parse is still in flight, the older promise 
can resolve later and overwrite state for the newer file or update state after 
unmount, causing incorrect previews and React warnings about state updates on 
unmounted components. Add a cancellation flag in the effect and check it in the 
then/catch/finally handlers, returning a cleanup function that flips this flag 
so outdated parses don't update state.
   
   Validate the correctness of the flagged issue. If correct, How can I resolve 
this? If you propose a fix, implement it and please make it concise.
   ```
   </details>
   <a 
href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37791&comment_hash=b986c473b339cda77ffcd1ce4283e4d1f53c82c813239f477e96bf19189ffe3f&reaction=like'>👍</a>
 | <a 
href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37791&comment_hash=b986c473b339cda77ffcd1ce4283e4d1f53c82c813239f477e96bf19189ffe3f&reaction=dislike'>👎</a>



##########
superset-frontend/src/features/databases/UploadDataModel/DataPreviewModal.tsx:
##########
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import { FunctionComponent, useEffect, useState } from 'react';
+import { t } from '@apache-superset/core';
+import { Modal, Table, TableSize } from '@superset-ui/core/components';
+import type { ColumnsType } from 'antd/es/table';
+import type { UploadType } from './uploadDataModalTour';
+
+const PREVIEW_ROW_LIMIT = 100;
+
+interface DataPreviewModalProps {
+  show: boolean;
+  onHide: () => void;
+  file: File | null;
+  type: UploadType;
+  delimiter?: string;
+  sheetName?: string;
+}
+
+async function parseCSV(
+  file: File,
+  delimiter: string,
+): Promise<{ columns: string[]; data: Record<string, string>[] }> {
+  const text = await file.text();
+  const lines = text.split(/\r?\n/).filter(line => line.trim().length > 0);
+  if (lines.length === 0) {
+    return { columns: [], data: [] };
+  }
+  const headerLine = lines[0];

Review Comment:
   **Suggestion:** CSV files saved with a UTF-8 BOM (e.g., from Excel) will 
include the BOM character at the start of the first header cell, so the first 
column name in the preview will contain an invisible character and not match 
the actual column name used elsewhere; stripping a leading BOM from the header 
line before splitting avoids this mismatch. Update the header line extraction 
to remove a potential BOM character. [logic error]
   
   <details>
   <summary><b>Severity Level:</b> Major ⚠️</summary>
   
   ```mdx
   - ⚠️ First CSV header in preview contains hidden BOM character.
   - ⚠️ Affects CSV preview in upload-to-database modal.
   - ⚠️ Can break string-based header comparisons or copies.
   ```
   </details>
   
   ```suggestion
     const headerLine = lines[0].replace(/^\uFEFF/, '');
   ```
   <details>
   <summary><b>Steps of Reproduction ✅ </b></summary>
   
   ```mdx
   1. Create or export a CSV file from a common tool (for example, Excel) that 
saves CSVs
   with a UTF-8 BOM, and ensure the file has at least one header row; this file 
will be used
   as input to the CSV preview path in `parseCSV`
   
(`superset-frontend/src/features/databases/UploadDataModel/DataPreviewModal.tsx:36–59`).
   
   2. In the UI, follow the PR steps: click "+" → "Upload CSV or Excel to 
database", upload
   the BOM-encoded CSV file, and click "Preview data" to open 
`DataPreviewModal` with
   `type='csv'` and `file` set (modal component defined at line 97).
   
   3. The `useEffect` at lines 110–138 detects `type === 'csv'` and calls 
`parseCSV(file,
   delimiter)`, which reads the file via `file.text()` and splits it into 
`lines` (lines
   40–41); the first element, `lines[0]`, still contains the leading BOM 
character.
   
   4. At line 45, `const headerLine = lines[0];` assigns the BOM-prefixed 
string directly,
   and the subsequent `headerLine.split(delimiter)` at lines 46–48 produces a 
first column
   name whose underlying value is `"\uFEFF<actualHeader>"`; this value is used 
as the column
   title/dataIndex in `tableColumns` (lines 140–148), so the first header in 
the preview
   table carries an invisible BOM character, which can cause subtle issues 
(e.g., copying the
   header text or matching by string) compared with a BOM-free header.
   ```
   </details>
   <details>
   <summary><b>Prompt for AI Agent 🤖 </b></summary>
   
   ```mdx
   This is a comment left during a code review.
   
   **Path:** 
superset-frontend/src/features/databases/UploadDataModel/DataPreviewModal.tsx
   **Line:** 45:45
   **Comment:**
        *Logic Error: CSV files saved with a UTF-8 BOM (e.g., from Excel) will 
include the BOM character at the start of the first header cell, so the first 
column name in the preview will contain an invisible character and not match 
the actual column name used elsewhere; stripping a leading BOM from the header 
line before splitting avoids this mismatch. Update the header line extraction 
to remove a potential BOM character.
   
   Validate the correctness of the flagged issue. If correct, How can I resolve 
this? If you propose a fix, implement it and please make it concise.
   ```
   </details>
   <a 
href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37791&comment_hash=57c2a067078688c4e1ad8c8452295a30a64332ccee40515bfc806469b14dc942&reaction=like'>👍</a>
 | <a 
href='https://app.codeant.ai/feedback?pr_url=https%3A%2F%2Fgithub.com%2Fapache%2Fsuperset%2Fpull%2F37791&comment_hash=57c2a067078688c4e1ad8c8452295a30a64332ccee40515bfc806469b14dc942&reaction=dislike'>👎</a>



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to