Copilot commented on code in PR #5260:
URL: https://github.com/apache/texera/pull/5260#discussion_r3455042342


##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.ts:
##########
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { createOpenAI } from "@ai-sdk/openai";
+import { generateText, type ModelMessage } from "ai";
+import { AppSettings } from "../../../common/app-setting";
+import { v4 as uuidv4 } from "uuid";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { OperatorPredicate } from "../../types/workflow-common.interface";

Review Comment:
   The LiteLLM proxy under `/api/chat/completions` is protected by Texera's JWT 
(Authorization: Bearer <access-token>). Using `apiKey` here will send the 
user's model key (or the default `dummy`) as the Bearer token, which will 401 
against the backend.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.ts:
##########
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { createOpenAI } from "@ai-sdk/openai";
+import { generateText, type ModelMessage } from "ai";
+import { AppSettings } from "../../../common/app-setting";
+import { v4 as uuidv4 } from "uuid";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { OperatorPredicate } from "../../types/workflow-common.interface";
+import {
+  TEXERA_OVERVIEW,
+  TUPLE_DOCUMENTATION,
+  TABLE_DOCUMENTATION,
+  OPERATOR_DOCUMENTATION,
+  UDF_INPUT_PORT_DOCUMENTATION,
+  EXAMPLE_OF_GOOD_CONVERSION,
+  VISUALIZER_DOCUMENTATION,
+  EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  WORKFLOW_PROMPT,
+  MAPPING_PROMPT,
+} from "./migration-prompts";
+
+interface Cell {
+  cell_type: string;
+  metadata: { [key: string]: any };
+  source: string;
+}
+
+export interface Notebook {
+  cells: Cell[];
+}
+
+interface WorkflowJSON {
+  operators: OperatorPredicate[];
+  operatorPositions: Record<string, { x: number; y: number }>;
+  links: any[];
+  commentBoxes: any[];
+  settings: {
+    dataTransferBatchSize: number;
+  };
+}
+
+interface CombinedMapping {
+  operator_to_cell: Record<string, string[]>;
+  cell_to_operator: Record<string, string[]>;
+}
+
+@Injectable()
+export class NotebookMigrationLLM {
+  private model: any;
+  private messages: ModelMessage[] = [];
+  private initialized = false;
+
+  private static readonly DOCUMENTATION: string[] = [
+    TEXERA_OVERVIEW,
+    TUPLE_DOCUMENTATION,
+    TABLE_DOCUMENTATION,
+    OPERATOR_DOCUMENTATION,
+    EXAMPLE_OF_GOOD_CONVERSION,
+    VISUALIZER_DOCUMENTATION,
+    UDF_INPUT_PORT_DOCUMENTATION,
+    EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  ];
+
+  constructor(
+    private config: GuiConfigService,
+    private workflowUtilService: WorkflowUtilService
+  ) {}
+
+  private get enabled(): boolean {
+    return this.config.env.pythonNotebookMigrationEnabled;
+  }
+
+  private assertEnabled(): void {
+    if (!this.enabled) {
+      throw new Error("Notebook migration feature is disabled");
+    }
+  }
+
+  private parseJsonResponse(raw: string, context: string): any {
+    // Trim first, then strip optional markdown code fences (```json ... ``` 
or ``` ... ```)
+    const cleaned = raw
+      .trim()
+      .replace(/^```[a-zA-Z]*\s*/, "")
+      .replace(/\s*```$/, "")
+      .trim();
+    try {
+      return JSON.parse(cleaned);
+    } catch (err) {
+      throw new Error(`Failed to parse LLM ${context} response as JSON: ${(err 
as Error).message}`);
+    }
+  }
+
+  /**
+   * Initialize a new LLM session with Texera documentation
+   */
+  public initialize(modelType: string = "gpt-5-mini", apiKey: string = 
"dummy"): void {
+    this.assertEnabled();
+    this.model = createOpenAI({
+      baseURL: new URL(`${AppSettings.getApiEndpoint()}`, 
document.baseURI).toString(),
+      // apiKey is required by the library for creating the OpenAI compatible 
client;
+      // For security reason, we store the apiKey at the backend, thus the 
value is dummy here.
+      apiKey: apiKey,
+    }).chat(modelType);
+

Review Comment:
   `initialize()` currently treats the caller-provided `apiKey` as the OpenAI 
Authorization token. Against Texera's `/api/chat/completions` proxy, this needs 
to be the Texera access token (JWT), otherwise all requests will be 
unauthorized.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.ts:
##########
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { createOpenAI } from "@ai-sdk/openai";
+import { generateText, type ModelMessage } from "ai";
+import { AppSettings } from "../../../common/app-setting";
+import { v4 as uuidv4 } from "uuid";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { OperatorPredicate } from "../../types/workflow-common.interface";
+import {
+  TEXERA_OVERVIEW,
+  TUPLE_DOCUMENTATION,
+  TABLE_DOCUMENTATION,
+  OPERATOR_DOCUMENTATION,
+  UDF_INPUT_PORT_DOCUMENTATION,
+  EXAMPLE_OF_GOOD_CONVERSION,
+  VISUALIZER_DOCUMENTATION,
+  EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  WORKFLOW_PROMPT,
+  MAPPING_PROMPT,
+} from "./migration-prompts";
+
+interface Cell {
+  cell_type: string;
+  metadata: { [key: string]: any };
+  source: string;
+}
+
+export interface Notebook {
+  cells: Cell[];
+}
+
+interface WorkflowJSON {
+  operators: OperatorPredicate[];
+  operatorPositions: Record<string, { x: number; y: number }>;
+  links: any[];
+  commentBoxes: any[];
+  settings: {
+    dataTransferBatchSize: number;
+  };
+}

Review Comment:
   `WorkflowJSON.settings` omits `executionMode`, but workflow settings in the 
frontend are defined as `{ dataTransferBatchSize, executionMode }`. Returning a 
workflow JSON without `executionMode` can break downstream consumers that 
assume the field exists.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.ts:
##########
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { createOpenAI } from "@ai-sdk/openai";
+import { generateText, type ModelMessage } from "ai";
+import { AppSettings } from "../../../common/app-setting";
+import { v4 as uuidv4 } from "uuid";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { OperatorPredicate } from "../../types/workflow-common.interface";
+import {
+  TEXERA_OVERVIEW,
+  TUPLE_DOCUMENTATION,
+  TABLE_DOCUMENTATION,
+  OPERATOR_DOCUMENTATION,
+  UDF_INPUT_PORT_DOCUMENTATION,
+  EXAMPLE_OF_GOOD_CONVERSION,
+  VISUALIZER_DOCUMENTATION,
+  EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  WORKFLOW_PROMPT,
+  MAPPING_PROMPT,
+} from "./migration-prompts";
+
+interface Cell {
+  cell_type: string;
+  metadata: { [key: string]: any };
+  source: string;
+}
+
+export interface Notebook {
+  cells: Cell[];
+}
+
+interface WorkflowJSON {
+  operators: OperatorPredicate[];
+  operatorPositions: Record<string, { x: number; y: number }>;
+  links: any[];
+  commentBoxes: any[];
+  settings: {
+    dataTransferBatchSize: number;
+  };
+}
+
+interface CombinedMapping {
+  operator_to_cell: Record<string, string[]>;
+  cell_to_operator: Record<string, string[]>;
+}
+
+@Injectable()
+export class NotebookMigrationLLM {
+  private model: any;
+  private messages: ModelMessage[] = [];
+  private initialized = false;
+
+  private static readonly DOCUMENTATION: string[] = [
+    TEXERA_OVERVIEW,
+    TUPLE_DOCUMENTATION,
+    TABLE_DOCUMENTATION,
+    OPERATOR_DOCUMENTATION,
+    EXAMPLE_OF_GOOD_CONVERSION,
+    VISUALIZER_DOCUMENTATION,
+    UDF_INPUT_PORT_DOCUMENTATION,
+    EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  ];
+
+  constructor(
+    private config: GuiConfigService,
+    private workflowUtilService: WorkflowUtilService
+  ) {}
+
+  private get enabled(): boolean {
+    return this.config.env.pythonNotebookMigrationEnabled;
+  }
+
+  private assertEnabled(): void {
+    if (!this.enabled) {
+      throw new Error("Notebook migration feature is disabled");
+    }
+  }
+
+  private parseJsonResponse(raw: string, context: string): any {
+    // Trim first, then strip optional markdown code fences (```json ... ``` 
or ``` ... ```)
+    const cleaned = raw
+      .trim()
+      .replace(/^```[a-zA-Z]*\s*/, "")
+      .replace(/\s*```$/, "")
+      .trim();
+    try {
+      return JSON.parse(cleaned);
+    } catch (err) {
+      throw new Error(`Failed to parse LLM ${context} response as JSON: ${(err 
as Error).message}`);
+    }
+  }
+
+  /**
+   * Initialize a new LLM session with Texera documentation
+   */
+  public initialize(modelType: string = "gpt-5-mini", apiKey: string = 
"dummy"): void {
+    this.assertEnabled();
+    this.model = createOpenAI({
+      baseURL: new URL(`${AppSettings.getApiEndpoint()}`, 
document.baseURI).toString(),
+      // apiKey is required by the library for creating the OpenAI compatible 
client;
+      // For security reason, we store the apiKey at the backend, thus the 
value is dummy here.
+      apiKey: apiKey,
+    }).chat(modelType);
+
+    this.messages = [
+      ...NotebookMigrationLLM.DOCUMENTATION.map(
+        (doc): ModelMessage => ({
+          role: "system",
+          content: doc,
+        })
+      ),
+    ];
+
+    this.initialized = true;
+  }
+
+  /**
+   * Verify the connection to the LLM using the given API key
+   */
+  public async verifyConnection(): Promise<boolean> {
+    if (!this.enabled) return false;
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    try {
+      await generateText({
+        model: this.model,
+        messages: [
+          {
+            role: "user",
+            content: "ping",
+          },
+        ],
+        maxOutputTokens: 10,
+      });
+
+      return true;
+    } catch (err) {
+      console.error("API key verification failed:", err);
+      return false;
+    }
+  }
+
+  /**
+   * Send a prompt and receive a response.
+   * All prior documentation and conversation is preserved.
+   */
+  private async sendPrompt(prompt: string): Promise<string> {
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    this.messages.push({
+      role: "user",
+      content: prompt,
+    });
+
+    const result = await generateText({
+      model: this.model,
+      messages: this.messages,
+    });
+
+    this.messages.push({
+      role: "assistant",
+      content: result.text,
+    });
+
+    return result.text;
+  }
+
+  /**
+   * Send a Jupyter Notebook to be converted into a workflow and mapping.
+   */
+  public async convertNotebookToWorkflow(notebook: Notebook): Promise<string> {
+    this.assertEnabled();
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    const codeCells = notebook.cells.filter(cell => cell.cell_type === "code");
+    const notebookString = codeCells
+      .map(cell => {
+        const uuid = String(cell.metadata.uuid);
+        return `# START ${uuid}\n${cell.source}\n# END ${uuid}`;
+      })
+      .join("\n\n");

Review Comment:
   `String(cell.metadata.uuid)` turns missing UUIDs into the literal marker 
`undefined`. If multiple cells lack UUIDs, they will collide in the prompt and 
make the cell↔operator mapping ambiguous. This should be validated (or 
synthesized) before prompting.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.ts:
##########
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { createOpenAI } from "@ai-sdk/openai";
+import { generateText, type ModelMessage } from "ai";
+import { AppSettings } from "../../../common/app-setting";
+import { v4 as uuidv4 } from "uuid";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { OperatorPredicate } from "../../types/workflow-common.interface";
+import {
+  TEXERA_OVERVIEW,
+  TUPLE_DOCUMENTATION,
+  TABLE_DOCUMENTATION,
+  OPERATOR_DOCUMENTATION,
+  UDF_INPUT_PORT_DOCUMENTATION,
+  EXAMPLE_OF_GOOD_CONVERSION,
+  VISUALIZER_DOCUMENTATION,
+  EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  WORKFLOW_PROMPT,
+  MAPPING_PROMPT,
+} from "./migration-prompts";
+
+interface Cell {
+  cell_type: string;
+  metadata: { [key: string]: any };
+  source: string;
+}
+
+export interface Notebook {
+  cells: Cell[];
+}
+
+interface WorkflowJSON {
+  operators: OperatorPredicate[];
+  operatorPositions: Record<string, { x: number; y: number }>;
+  links: any[];
+  commentBoxes: any[];
+  settings: {
+    dataTransferBatchSize: number;
+  };
+}
+
+interface CombinedMapping {
+  operator_to_cell: Record<string, string[]>;
+  cell_to_operator: Record<string, string[]>;
+}
+
+@Injectable()
+export class NotebookMigrationLLM {
+  private model: any;
+  private messages: ModelMessage[] = [];
+  private initialized = false;
+
+  private static readonly DOCUMENTATION: string[] = [
+    TEXERA_OVERVIEW,
+    TUPLE_DOCUMENTATION,
+    TABLE_DOCUMENTATION,
+    OPERATOR_DOCUMENTATION,
+    EXAMPLE_OF_GOOD_CONVERSION,
+    VISUALIZER_DOCUMENTATION,
+    UDF_INPUT_PORT_DOCUMENTATION,
+    EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  ];
+
+  constructor(
+    private config: GuiConfigService,
+    private workflowUtilService: WorkflowUtilService
+  ) {}
+
+  private get enabled(): boolean {
+    return this.config.env.pythonNotebookMigrationEnabled;
+  }
+
+  private assertEnabled(): void {
+    if (!this.enabled) {
+      throw new Error("Notebook migration feature is disabled");
+    }
+  }
+
+  private parseJsonResponse(raw: string, context: string): any {
+    // Trim first, then strip optional markdown code fences (```json ... ``` 
or ``` ... ```)
+    const cleaned = raw
+      .trim()
+      .replace(/^```[a-zA-Z]*\s*/, "")
+      .replace(/\s*```$/, "")
+      .trim();
+    try {
+      return JSON.parse(cleaned);
+    } catch (err) {
+      throw new Error(`Failed to parse LLM ${context} response as JSON: ${(err 
as Error).message}`);
+    }
+  }

Review Comment:
   `parseJsonResponse` only strips markdown fences when they are the very 
first/last tokens. In practice, LLMs often wrap JSON in prose (e.g. "Here is 
the JSON:```json…```"). Since this is a frontend feature, it's worth being more 
forgiving to avoid hard failures on minor formatting drift.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.spec.ts:
##########
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { NotebookMigrationLLM, Notebook } from "./migration-llm";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { generateText } from "ai";
+import type { Mock } from "vitest";
+
+// The LLM transport and OpenAI client are mocked so the tests exercise only 
the
+// deterministic transformation (parsing, operator/edge construction, 
cell<->operator mapping).
+vi.mock("ai", () => ({ generateText: vi.fn() }));
+vi.mock("@ai-sdk/openai", () => ({
+  createOpenAI: vi.fn(() => ({ chat: vi.fn(() => ({})) })),
+}));
+
+const mockGenerateText = generateText as unknown as Mock;
+
+describe("NotebookMigrationLLM", () => {
+  let opIdCounter = 0;
+  let stubUtil: WorkflowUtilService;
+
+  // Build a fresh, initialized session with stubbed dependencies. The stubbed
+  // getNewOperatorPredicate hands out deterministic ids (PythonUDFV2-0, -1, 
...).
+  function makeLLM(): NotebookMigrationLLM {
+    const stubConfig = {
+      env: { pythonNotebookMigrationEnabled: true },
+    } as unknown as GuiConfigService;
+
+    stubUtil = {
+      getNewOperatorPredicate: vi.fn((operatorType: string, 
customDisplayName?: string) => ({
+        operatorID: `${operatorType}-${opIdCounter++}`,
+        operatorType,
+        operatorVersion: "test-version",
+        operatorProperties: { workers: 1, defaultEnv: true, envName: "" },
+        inputPorts: [{ portID: "input-0", disallowMultiInputs: false }],
+        outputPorts: [{ portID: "output-0" }],
+        showAdvanced: false,
+        isDisabled: false,
+        customDisplayName,
+        dynamicInputPorts: true,
+        dynamicOutputPorts: true,
+      })),
+    } as unknown as WorkflowUtilService;
+
+    const llm = new NotebookMigrationLLM(stubConfig, stubUtil);
+    llm.initialize();
+    return llm;
+  }
+
+  function codeCell(uuid: string | undefined, source: string) {
+    return { cell_type: "code", metadata: uuid === undefined ? {} : { uuid }, 
source };
+  }
+
+  // Queue the two responses convertNotebookToWorkflow consumes, in order.
+  function mockResponses(workflowResponse: string, mappingResponse: string) {
+    mockGenerateText.mockResolvedValueOnce({ text: workflowResponse 
}).mockResolvedValueOnce({ text: mappingResponse });
+  }
+
+  beforeEach(() => {
+    opIdCounter = 0;
+    mockGenerateText.mockReset();
+  });
+
+  describe("convertNotebookToWorkflow", () => {
+    it("builds operators, links, positions, and a bidirectional mapping", 
async () => {
+      const notebook: Notebook = {
+        cells: [codeCell("CELL1", "print(1)"), codeCell("CELL2", "print(2)")],
+      };
+      mockResponses(
+        JSON.stringify({
+          code: { UDF1: "code1", UDF2: "code2" },
+          edges: [["UDF1", "UDF2"]],
+          outputs: { UDF1: ["a", "b"], UDF2: ["c"] },
+        }),
+        JSON.stringify({ UDF1: ["CELL1"], UDF2: ["CELL2"] })
+      );
+
+      const { workflowJSON, workflowNotebookMapping } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      expect(workflowJSON.operators.map((op: any) => 
op.operatorID)).toEqual(["PythonUDFV2-0", "PythonUDFV2-1"]);
+      expect(workflowJSON.operators[0].operatorProperties).toMatchObject({
+        code: "code1",
+        retainInputColumns: false,
+      });
+      expect(workflowJSON.operatorPositions).toEqual({
+        "PythonUDFV2-0": { x: 140, y: 0 },
+        "PythonUDFV2-1": { x: 280, y: 0 },
+      });
+      expect(workflowJSON.links).toHaveLength(1);
+      expect(workflowJSON.links[0].source).toEqual({ operatorID: 
"PythonUDFV2-0", portID: "output-0" });
+      expect(workflowJSON.links[0].target).toEqual({ operatorID: 
"PythonUDFV2-1", portID: "input-0" });
+      expect(workflowNotebookMapping.operator_to_cell).toEqual({
+        "PythonUDFV2-0": ["CELL1"],
+        "PythonUDFV2-1": ["CELL2"],
+      });
+      expect(workflowNotebookMapping.cell_to_operator).toEqual({
+        CELL1: ["PythonUDFV2-0"],
+        CELL2: ["PythonUDFV2-1"],
+      });
+    });
+
+    // NOTE (C2): the attributeType is currently hardcoded to "binary". If C2 
lands as
+    // "LLM returns per-column types", update the expected attributeType 
values here.
+    it("declares output columns with attributeType binary", async () => {
+      const notebook: Notebook = { cells: [codeCell("CELL1", "x = 1")] };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "code1" }, edges: [], outputs: { UDF1: 
["a", "b"] } }),
+        JSON.stringify({ UDF1: ["CELL1"] })
+      );
+
+      const { workflowJSON } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      
expect(workflowJSON.operators[0].operatorProperties.outputColumns).toEqual([
+        { attributeName: "a", attributeType: "binary" },
+        { attributeName: "b", attributeType: "binary" },
+      ]);
+    });
+
+    it("maps multiple cells onto the same UDF, and one cell onto multiple 
UDFs", async () => {
+      const notebook: Notebook = {
+        cells: [codeCell("CELL1", "a"), codeCell("CELL2", "b")],
+      };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "c1", UDF2: "c2" }, edges: [], outputs: 
{} }),
+        JSON.stringify({ UDF1: ["CELL1", "CELL2"], UDF2: ["CELL1"] })
+      );
+
+      const { workflowNotebookMapping } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      expect(workflowNotebookMapping.operator_to_cell).toEqual({
+        "PythonUDFV2-0": ["CELL1", "CELL2"],
+        "PythonUDFV2-1": ["CELL1"],
+      });
+      expect(workflowNotebookMapping.cell_to_operator).toEqual({
+        CELL1: ["PythonUDFV2-0", "PythonUDFV2-1"],
+        CELL2: ["PythonUDFV2-0"],
+      });
+    });
+
+    it("produces a link with an undefined endpoint when an edge references an 
unknown UDF id", async () => {
+      const notebook: Notebook = { cells: [codeCell("CELL1", "a")] };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "c1" }, edges: [["UDF1", "UDFX"]], 
outputs: {} }),
+        JSON.stringify({ UDF1: ["CELL1"] })
+      );
+
+      const { workflowJSON } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      // Documents current behavior: udfMappingToUUID["UDFX"] is undefined.
+      expect(workflowJSON.links[0].source.operatorID).toBe("PythonUDFV2-0");
+      expect(workflowJSON.links[0].target.operatorID).toBeUndefined();
+    });

Review Comment:
   If `convertNotebookToWorkflow` is updated to validate edges and throw on 
unknown UDF ids, this test should assert the error rather than documenting a 
workflow link with `operatorID: undefined` (which is not a usable workflow).



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.spec.ts:
##########
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { NotebookMigrationLLM, Notebook } from "./migration-llm";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { generateText } from "ai";
+import type { Mock } from "vitest";
+
+// The LLM transport and OpenAI client are mocked so the tests exercise only 
the
+// deterministic transformation (parsing, operator/edge construction, 
cell<->operator mapping).
+vi.mock("ai", () => ({ generateText: vi.fn() }));
+vi.mock("@ai-sdk/openai", () => ({
+  createOpenAI: vi.fn(() => ({ chat: vi.fn(() => ({})) })),
+}));
+
+const mockGenerateText = generateText as unknown as Mock;
+
+describe("NotebookMigrationLLM", () => {
+  let opIdCounter = 0;
+  let stubUtil: WorkflowUtilService;
+
+  // Build a fresh, initialized session with stubbed dependencies. The stubbed
+  // getNewOperatorPredicate hands out deterministic ids (PythonUDFV2-0, -1, 
...).
+  function makeLLM(): NotebookMigrationLLM {
+    const stubConfig = {
+      env: { pythonNotebookMigrationEnabled: true },
+    } as unknown as GuiConfigService;
+
+    stubUtil = {
+      getNewOperatorPredicate: vi.fn((operatorType: string, 
customDisplayName?: string) => ({
+        operatorID: `${operatorType}-${opIdCounter++}`,
+        operatorType,
+        operatorVersion: "test-version",
+        operatorProperties: { workers: 1, defaultEnv: true, envName: "" },
+        inputPorts: [{ portID: "input-0", disallowMultiInputs: false }],
+        outputPorts: [{ portID: "output-0" }],
+        showAdvanced: false,
+        isDisabled: false,
+        customDisplayName,
+        dynamicInputPorts: true,
+        dynamicOutputPorts: true,
+      })),
+    } as unknown as WorkflowUtilService;
+
+    const llm = new NotebookMigrationLLM(stubConfig, stubUtil);
+    llm.initialize();
+    return llm;
+  }
+
+  function codeCell(uuid: string | undefined, source: string) {
+    return { cell_type: "code", metadata: uuid === undefined ? {} : { uuid }, 
source };
+  }
+
+  // Queue the two responses convertNotebookToWorkflow consumes, in order.
+  function mockResponses(workflowResponse: string, mappingResponse: string) {
+    mockGenerateText.mockResolvedValueOnce({ text: workflowResponse 
}).mockResolvedValueOnce({ text: mappingResponse });
+  }
+
+  beforeEach(() => {
+    opIdCounter = 0;
+    mockGenerateText.mockReset();
+  });
+
+  describe("convertNotebookToWorkflow", () => {
+    it("builds operators, links, positions, and a bidirectional mapping", 
async () => {
+      const notebook: Notebook = {
+        cells: [codeCell("CELL1", "print(1)"), codeCell("CELL2", "print(2)")],
+      };
+      mockResponses(
+        JSON.stringify({
+          code: { UDF1: "code1", UDF2: "code2" },
+          edges: [["UDF1", "UDF2"]],
+          outputs: { UDF1: ["a", "b"], UDF2: ["c"] },
+        }),
+        JSON.stringify({ UDF1: ["CELL1"], UDF2: ["CELL2"] })
+      );
+
+      const { workflowJSON, workflowNotebookMapping } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      expect(workflowJSON.operators.map((op: any) => 
op.operatorID)).toEqual(["PythonUDFV2-0", "PythonUDFV2-1"]);
+      expect(workflowJSON.operators[0].operatorProperties).toMatchObject({
+        code: "code1",
+        retainInputColumns: false,
+      });
+      expect(workflowJSON.operatorPositions).toEqual({
+        "PythonUDFV2-0": { x: 140, y: 0 },
+        "PythonUDFV2-1": { x: 280, y: 0 },
+      });
+      expect(workflowJSON.links).toHaveLength(1);
+      expect(workflowJSON.links[0].source).toEqual({ operatorID: 
"PythonUDFV2-0", portID: "output-0" });
+      expect(workflowJSON.links[0].target).toEqual({ operatorID: 
"PythonUDFV2-1", portID: "input-0" });
+      expect(workflowNotebookMapping.operator_to_cell).toEqual({
+        "PythonUDFV2-0": ["CELL1"],
+        "PythonUDFV2-1": ["CELL2"],
+      });
+      expect(workflowNotebookMapping.cell_to_operator).toEqual({
+        CELL1: ["PythonUDFV2-0"],
+        CELL2: ["PythonUDFV2-1"],
+      });
+    });
+
+    // NOTE (C2): the attributeType is currently hardcoded to "binary". If C2 
lands as
+    // "LLM returns per-column types", update the expected attributeType 
values here.
+    it("declares output columns with attributeType binary", async () => {
+      const notebook: Notebook = { cells: [codeCell("CELL1", "x = 1")] };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "code1" }, edges: [], outputs: { UDF1: 
["a", "b"] } }),
+        JSON.stringify({ UDF1: ["CELL1"] })
+      );
+
+      const { workflowJSON } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      
expect(workflowJSON.operators[0].operatorProperties.outputColumns).toEqual([
+        { attributeName: "a", attributeType: "binary" },
+        { attributeName: "b", attributeType: "binary" },
+      ]);
+    });
+
+    it("maps multiple cells onto the same UDF, and one cell onto multiple 
UDFs", async () => {
+      const notebook: Notebook = {
+        cells: [codeCell("CELL1", "a"), codeCell("CELL2", "b")],
+      };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "c1", UDF2: "c2" }, edges: [], outputs: 
{} }),
+        JSON.stringify({ UDF1: ["CELL1", "CELL2"], UDF2: ["CELL1"] })
+      );
+
+      const { workflowNotebookMapping } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      expect(workflowNotebookMapping.operator_to_cell).toEqual({
+        "PythonUDFV2-0": ["CELL1", "CELL2"],
+        "PythonUDFV2-1": ["CELL1"],
+      });
+      expect(workflowNotebookMapping.cell_to_operator).toEqual({
+        CELL1: ["PythonUDFV2-0", "PythonUDFV2-1"],
+        CELL2: ["PythonUDFV2-0"],
+      });
+    });
+
+    it("produces a link with an undefined endpoint when an edge references an 
unknown UDF id", async () => {
+      const notebook: Notebook = { cells: [codeCell("CELL1", "a")] };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "c1" }, edges: [["UDF1", "UDFX"]], 
outputs: {} }),
+        JSON.stringify({ UDF1: ["CELL1"] })
+      );
+
+      const { workflowJSON } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      // Documents current behavior: udfMappingToUUID["UDFX"] is undefined.
+      expect(workflowJSON.links[0].source.operatorID).toBe("PythonUDFV2-0");
+      expect(workflowJSON.links[0].target.operatorID).toBeUndefined();
+    });
+
+    it("handles empty code, edges, and outputs", async () => {
+      const notebook: Notebook = { cells: [] };
+      mockResponses(JSON.stringify({ code: {}, edges: [], outputs: {} }), 
JSON.stringify({}));
+
+      const { workflowJSON, workflowNotebookMapping } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      expect(workflowJSON.operators).toEqual([]);
+      expect(workflowJSON.links).toEqual([]);
+      expect(workflowNotebookMapping.operator_to_cell).toEqual({});
+      expect(workflowNotebookMapping.cell_to_operator).toEqual({});
+    });
+
+    it("emits the 'undefined' cell marker in the prompt when a code cell lacks 
metadata.uuid", async () => {
+      const notebook: Notebook = { cells: [codeCell(undefined, "print(1)")] };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "c1" }, edges: [], outputs: {} }),
+        JSON.stringify({ UDF1: ["CELL1"] })
+      );
+
+      await makeLLM().convertNotebookToWorkflow(notebook);
+
+      // The notebook string (embedded in the workflow prompt) is sent to 
generateText.
+      // messages is a shared, mutated array, so search every message content 
rather than
+      // assuming a fixed index.
+      const allPromptContent = mockGenerateText.mock.calls
+        .flatMap(call => call[0].messages.map((m: any) => m.content))
+        .join("\n");
+      expect(allPromptContent).toContain("# START undefined");
+    });
+  });
+
+  describe("parseJsonResponse", () => {
+    // parseJsonResponse is private; cast to access it directly for focused 
coverage.
+    const parse = (raw: string) => (makeLLM() as any).parseJsonResponse(raw, 
"workflow");
+
+    it("parses bare JSON", () => {
+      expect(parse('{"a":1}')).toEqual({ a: 1 });
+    });
+
+    it("strips a ```json fence", () => {
+      expect(parse('```json\n{"a":1}\n```')).toEqual({ a: 1 });
+    });
+
+    it("strips a plain ``` fence", () => {
+      expect(parse('```\n{"a":1}\n```')).toEqual({ a: 1 });
+    });
+
+    it("tolerates surrounding whitespace and newlines around the fence", () => 
{
+      expect(parse('\n\n  ```json\n{"a":1}\n```  \n\n')).toEqual({ a: 1 });
+    });
+
+    it("throws a contextual error on malformed JSON", () => {
+      expect(() => parse("not json")).toThrow("Failed to parse LLM workflow 
response as JSON");
+    });
+
+    it("does not strip a fence preceded by prose (documents current 
limitation)", () => {
+      expect(() => parse('Here is the JSON: 
```json\n{"a":1}\n```\nThanks!')).toThrow(
+        "Failed to parse LLM workflow response as JSON"
+      );
+    });

Review Comment:
   If `parseJsonResponse` is made more forgiving (extracting fenced JSON even 
when surrounded by prose), update this test to assert successful parsing rather 
than expecting an error.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.ts:
##########
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { createOpenAI } from "@ai-sdk/openai";
+import { generateText, type ModelMessage } from "ai";
+import { AppSettings } from "../../../common/app-setting";
+import { v4 as uuidv4 } from "uuid";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { OperatorPredicate } from "../../types/workflow-common.interface";
+import {
+  TEXERA_OVERVIEW,
+  TUPLE_DOCUMENTATION,
+  TABLE_DOCUMENTATION,
+  OPERATOR_DOCUMENTATION,
+  UDF_INPUT_PORT_DOCUMENTATION,
+  EXAMPLE_OF_GOOD_CONVERSION,
+  VISUALIZER_DOCUMENTATION,
+  EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  WORKFLOW_PROMPT,
+  MAPPING_PROMPT,
+} from "./migration-prompts";
+
+interface Cell {
+  cell_type: string;
+  metadata: { [key: string]: any };
+  source: string;
+}

Review Comment:
   `Cell.source` in `.ipynb` files is commonly an array of strings (lines). 
Treating it as a single string can produce prompts with comma-separated text 
and change semantics. Consider supporting both `string` and `string[]`.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.ts:
##########
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { createOpenAI } from "@ai-sdk/openai";
+import { generateText, type ModelMessage } from "ai";
+import { AppSettings } from "../../../common/app-setting";
+import { v4 as uuidv4 } from "uuid";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { OperatorPredicate } from "../../types/workflow-common.interface";
+import {
+  TEXERA_OVERVIEW,
+  TUPLE_DOCUMENTATION,
+  TABLE_DOCUMENTATION,
+  OPERATOR_DOCUMENTATION,
+  UDF_INPUT_PORT_DOCUMENTATION,
+  EXAMPLE_OF_GOOD_CONVERSION,
+  VISUALIZER_DOCUMENTATION,
+  EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  WORKFLOW_PROMPT,
+  MAPPING_PROMPT,
+} from "./migration-prompts";
+
+interface Cell {
+  cell_type: string;
+  metadata: { [key: string]: any };
+  source: string;
+}
+
+export interface Notebook {
+  cells: Cell[];
+}
+
+interface WorkflowJSON {
+  operators: OperatorPredicate[];
+  operatorPositions: Record<string, { x: number; y: number }>;
+  links: any[];
+  commentBoxes: any[];
+  settings: {
+    dataTransferBatchSize: number;
+  };
+}
+
+interface CombinedMapping {
+  operator_to_cell: Record<string, string[]>;
+  cell_to_operator: Record<string, string[]>;
+}
+
+@Injectable()
+export class NotebookMigrationLLM {
+  private model: any;
+  private messages: ModelMessage[] = [];
+  private initialized = false;
+
+  private static readonly DOCUMENTATION: string[] = [
+    TEXERA_OVERVIEW,
+    TUPLE_DOCUMENTATION,
+    TABLE_DOCUMENTATION,
+    OPERATOR_DOCUMENTATION,
+    EXAMPLE_OF_GOOD_CONVERSION,
+    VISUALIZER_DOCUMENTATION,
+    UDF_INPUT_PORT_DOCUMENTATION,
+    EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  ];
+
+  constructor(
+    private config: GuiConfigService,
+    private workflowUtilService: WorkflowUtilService
+  ) {}
+
+  private get enabled(): boolean {
+    return this.config.env.pythonNotebookMigrationEnabled;
+  }
+
+  private assertEnabled(): void {
+    if (!this.enabled) {
+      throw new Error("Notebook migration feature is disabled");
+    }
+  }
+
+  private parseJsonResponse(raw: string, context: string): any {
+    // Trim first, then strip optional markdown code fences (```json ... ``` 
or ``` ... ```)
+    const cleaned = raw
+      .trim()
+      .replace(/^```[a-zA-Z]*\s*/, "")
+      .replace(/\s*```$/, "")
+      .trim();
+    try {
+      return JSON.parse(cleaned);
+    } catch (err) {
+      throw new Error(`Failed to parse LLM ${context} response as JSON: ${(err 
as Error).message}`);
+    }
+  }
+
+  /**
+   * Initialize a new LLM session with Texera documentation
+   */
+  public initialize(modelType: string = "gpt-5-mini", apiKey: string = 
"dummy"): void {
+    this.assertEnabled();
+    this.model = createOpenAI({
+      baseURL: new URL(`${AppSettings.getApiEndpoint()}`, 
document.baseURI).toString(),
+      // apiKey is required by the library for creating the OpenAI compatible 
client;
+      // For security reason, we store the apiKey at the backend, thus the 
value is dummy here.
+      apiKey: apiKey,
+    }).chat(modelType);
+
+    this.messages = [
+      ...NotebookMigrationLLM.DOCUMENTATION.map(
+        (doc): ModelMessage => ({
+          role: "system",
+          content: doc,
+        })
+      ),
+    ];
+
+    this.initialized = true;
+  }
+
+  /**
+   * Verify the connection to the LLM using the given API key
+   */
+  public async verifyConnection(): Promise<boolean> {
+    if (!this.enabled) return false;
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    try {
+      await generateText({
+        model: this.model,
+        messages: [
+          {
+            role: "user",
+            content: "ping",
+          },
+        ],
+        maxOutputTokens: 10,
+      });
+
+      return true;
+    } catch (err) {
+      console.error("API key verification failed:", err);
+      return false;
+    }
+  }
+
+  /**
+   * Send a prompt and receive a response.
+   * All prior documentation and conversation is preserved.
+   */
+  private async sendPrompt(prompt: string): Promise<string> {
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    this.messages.push({
+      role: "user",
+      content: prompt,
+    });
+
+    const result = await generateText({
+      model: this.model,
+      messages: this.messages,
+    });
+
+    this.messages.push({
+      role: "assistant",
+      content: result.text,
+    });
+
+    return result.text;
+  }
+
+  /**
+   * Send a Jupyter Notebook to be converted into a workflow and mapping.
+   */
+  public async convertNotebookToWorkflow(notebook: Notebook): Promise<string> {
+    this.assertEnabled();
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    const codeCells = notebook.cells.filter(cell => cell.cell_type === "code");
+    const notebookString = codeCells
+      .map(cell => {
+        const uuid = String(cell.metadata.uuid);
+        return `# START ${uuid}\n${cell.source}\n# END ${uuid}`;
+      })
+      .join("\n\n");
+
+    const workflow = await 
this.sendPrompt(`${WORKFLOW_PROMPT}\n${notebookString}`);
+    const mapping = await this.sendPrompt(MAPPING_PROMPT);
+
+    // Remove ```json blocks and parse
+    const udfLLMResponse = this.parseJsonResponse(workflow, "workflow");
+
+    const workflowJSON: WorkflowJSON = {
+      operators: [],
+      operatorPositions: {},
+      links: [],
+      commentBoxes: [],
+      settings: {
+        dataTransferBatchSize: 400,
+      },
+    };

Review Comment:
   Workflow creation hardcodes `dataTransferBatchSize: 400` and does not set 
`executionMode`. The rest of the frontend uses GUI config defaults for both 
(see `defaultDataTransferBatchSize` / `defaultExecutionMode`), so the migrated 
workflow should use those defaults as well.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.ts:
##########
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { createOpenAI } from "@ai-sdk/openai";
+import { generateText, type ModelMessage } from "ai";
+import { AppSettings } from "../../../common/app-setting";
+import { v4 as uuidv4 } from "uuid";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { OperatorPredicate } from "../../types/workflow-common.interface";
+import {
+  TEXERA_OVERVIEW,
+  TUPLE_DOCUMENTATION,
+  TABLE_DOCUMENTATION,
+  OPERATOR_DOCUMENTATION,
+  UDF_INPUT_PORT_DOCUMENTATION,
+  EXAMPLE_OF_GOOD_CONVERSION,
+  VISUALIZER_DOCUMENTATION,
+  EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  WORKFLOW_PROMPT,
+  MAPPING_PROMPT,
+} from "./migration-prompts";
+
+interface Cell {
+  cell_type: string;
+  metadata: { [key: string]: any };
+  source: string;
+}
+
+export interface Notebook {
+  cells: Cell[];
+}
+
+interface WorkflowJSON {
+  operators: OperatorPredicate[];
+  operatorPositions: Record<string, { x: number; y: number }>;
+  links: any[];
+  commentBoxes: any[];
+  settings: {
+    dataTransferBatchSize: number;
+  };
+}
+
+interface CombinedMapping {
+  operator_to_cell: Record<string, string[]>;
+  cell_to_operator: Record<string, string[]>;
+}
+
+@Injectable()
+export class NotebookMigrationLLM {
+  private model: any;
+  private messages: ModelMessage[] = [];
+  private initialized = false;
+
+  private static readonly DOCUMENTATION: string[] = [
+    TEXERA_OVERVIEW,
+    TUPLE_DOCUMENTATION,
+    TABLE_DOCUMENTATION,
+    OPERATOR_DOCUMENTATION,
+    EXAMPLE_OF_GOOD_CONVERSION,
+    VISUALIZER_DOCUMENTATION,
+    UDF_INPUT_PORT_DOCUMENTATION,
+    EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  ];
+
+  constructor(
+    private config: GuiConfigService,
+    private workflowUtilService: WorkflowUtilService
+  ) {}
+
+  private get enabled(): boolean {
+    return this.config.env.pythonNotebookMigrationEnabled;
+  }
+
+  private assertEnabled(): void {
+    if (!this.enabled) {
+      throw new Error("Notebook migration feature is disabled");
+    }
+  }
+
+  private parseJsonResponse(raw: string, context: string): any {
+    // Trim first, then strip optional markdown code fences (```json ... ``` 
or ``` ... ```)
+    const cleaned = raw
+      .trim()
+      .replace(/^```[a-zA-Z]*\s*/, "")
+      .replace(/\s*```$/, "")
+      .trim();
+    try {
+      return JSON.parse(cleaned);
+    } catch (err) {
+      throw new Error(`Failed to parse LLM ${context} response as JSON: ${(err 
as Error).message}`);
+    }
+  }
+
+  /**
+   * Initialize a new LLM session with Texera documentation
+   */
+  public initialize(modelType: string = "gpt-5-mini", apiKey: string = 
"dummy"): void {
+    this.assertEnabled();
+    this.model = createOpenAI({
+      baseURL: new URL(`${AppSettings.getApiEndpoint()}`, 
document.baseURI).toString(),
+      // apiKey is required by the library for creating the OpenAI compatible 
client;
+      // For security reason, we store the apiKey at the backend, thus the 
value is dummy here.
+      apiKey: apiKey,
+    }).chat(modelType);
+
+    this.messages = [
+      ...NotebookMigrationLLM.DOCUMENTATION.map(
+        (doc): ModelMessage => ({
+          role: "system",
+          content: doc,
+        })
+      ),
+    ];
+
+    this.initialized = true;
+  }
+
+  /**
+   * Verify the connection to the LLM using the given API key
+   */
+  public async verifyConnection(): Promise<boolean> {
+    if (!this.enabled) return false;
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    try {
+      await generateText({
+        model: this.model,
+        messages: [
+          {
+            role: "user",
+            content: "ping",
+          },
+        ],
+        maxOutputTokens: 10,
+      });
+
+      return true;
+    } catch (err) {
+      console.error("API key verification failed:", err);
+      return false;
+    }
+  }
+
+  /**
+   * Send a prompt and receive a response.
+   * All prior documentation and conversation is preserved.
+   */
+  private async sendPrompt(prompt: string): Promise<string> {
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    this.messages.push({
+      role: "user",
+      content: prompt,
+    });
+
+    const result = await generateText({
+      model: this.model,
+      messages: this.messages,
+    });
+
+    this.messages.push({
+      role: "assistant",
+      content: result.text,
+    });
+
+    return result.text;
+  }
+
+  /**
+   * Send a Jupyter Notebook to be converted into a workflow and mapping.
+   */
+  public async convertNotebookToWorkflow(notebook: Notebook): Promise<string> {
+    this.assertEnabled();
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    const codeCells = notebook.cells.filter(cell => cell.cell_type === "code");
+    const notebookString = codeCells
+      .map(cell => {
+        const uuid = String(cell.metadata.uuid);
+        return `# START ${uuid}\n${cell.source}\n# END ${uuid}`;
+      })
+      .join("\n\n");
+
+    const workflow = await 
this.sendPrompt(`${WORKFLOW_PROMPT}\n${notebookString}`);
+    const mapping = await this.sendPrompt(MAPPING_PROMPT);
+
+    // Remove ```json blocks and parse
+    const udfLLMResponse = this.parseJsonResponse(workflow, "workflow");
+
+    const workflowJSON: WorkflowJSON = {
+      operators: [],
+      operatorPositions: {},
+      links: [],
+      commentBoxes: [],
+      settings: {
+        dataTransferBatchSize: 400,
+      },
+    };
+
+    const udfMappingToUUID: Record<string, string> = {};
+
+    Object.entries(udfLLMResponse.code).forEach(([udfId, udfCode], i) => {
+      let udfOutputColumns: { attributeName: string; attributeType: string }[] 
= [];
+      if (udfLLMResponse.outputs && udfLLMResponse.outputs[udfId]) {
+        udfOutputColumns = udfLLMResponse.outputs[udfId].map((attr: string) => 
({
+          attributeName: attr,
+          attributeType: "binary",
+        }));
+      }
+
+      // Build the operator from the live PythonUDFV2 schema so the 
operatorVersion, ports, and
+      // property defaults track the backend definition, then overlay the 
generated code/outputs.
+      const base = 
this.workflowUtilService.getNewOperatorPredicate("PythonUDFV2", udfId);
+      const operator: OperatorPredicate = {
+        ...base,
+        operatorProperties: {
+          ...base.operatorProperties,
+          code: udfCode,
+          retainInputColumns: false,
+          outputColumns: udfOutputColumns,
+        },
+      };
+
+      udfMappingToUUID[udfId] = operator.operatorID;
+      workflowJSON.operators.push(operator);
+      workflowJSON.operatorPositions[operator.operatorID] = { x: 140 * (i + 
1), y: 0 };
+    });
+
+    // Add links/edges
+    (udfLLMResponse.edges || []).forEach(([source, target]: [string, string]) 
=> {
+      workflowJSON.links.push({
+        linkID: `link-${uuidv4()}`,
+        source: {
+          operatorID: udfMappingToUUID[source],
+          portID: "output-0",
+        },
+        target: {
+          operatorID: udfMappingToUUID[target],
+          portID: "input-0",
+        },
+      });
+    });
+
+    // Parse mapping
+    const parsedMapping: Record<string, string[]> = 
this.parseJsonResponse(mapping, "mapping");
+
+    const udfToCell: Record<string, string[]> = {};
+    const cellToUdf: Record<string, string[]> = {};
+
+    Object.entries(parsedMapping).forEach(([udf, cells]) => {
+      const udfUUID = udfMappingToUUID[udf];
+      udfToCell[udfUUID] = cells;
+      cells.forEach(cell => {
+        if (!cellToUdf[cell]) {
+          cellToUdf[cell] = [udfUUID];
+        } else {
+          cellToUdf[cell].push(udfUUID);
+        }
+      });
+    });

Review Comment:
   The mapping step assumes every key in the LLM response exists in 
`udfMappingToUUID`. If the LLM returns an extra/typo UDF id, this will create 
an `undefined` key in `operator_to_cell` and insert `undefined` into 
`cell_to_operator`. Validate the mapping keys before using them.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.ts:
##########
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { createOpenAI } from "@ai-sdk/openai";
+import { generateText, type ModelMessage } from "ai";
+import { AppSettings } from "../../../common/app-setting";
+import { v4 as uuidv4 } from "uuid";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { OperatorPredicate } from "../../types/workflow-common.interface";
+import {
+  TEXERA_OVERVIEW,
+  TUPLE_DOCUMENTATION,
+  TABLE_DOCUMENTATION,
+  OPERATOR_DOCUMENTATION,
+  UDF_INPUT_PORT_DOCUMENTATION,
+  EXAMPLE_OF_GOOD_CONVERSION,
+  VISUALIZER_DOCUMENTATION,
+  EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  WORKFLOW_PROMPT,
+  MAPPING_PROMPT,
+} from "./migration-prompts";
+
+interface Cell {
+  cell_type: string;
+  metadata: { [key: string]: any };
+  source: string;
+}
+
+export interface Notebook {
+  cells: Cell[];
+}
+
+interface WorkflowJSON {
+  operators: OperatorPredicate[];
+  operatorPositions: Record<string, { x: number; y: number }>;
+  links: any[];
+  commentBoxes: any[];
+  settings: {
+    dataTransferBatchSize: number;
+  };
+}
+
+interface CombinedMapping {
+  operator_to_cell: Record<string, string[]>;
+  cell_to_operator: Record<string, string[]>;
+}
+
+@Injectable()
+export class NotebookMigrationLLM {
+  private model: any;
+  private messages: ModelMessage[] = [];
+  private initialized = false;
+
+  private static readonly DOCUMENTATION: string[] = [
+    TEXERA_OVERVIEW,
+    TUPLE_DOCUMENTATION,
+    TABLE_DOCUMENTATION,
+    OPERATOR_DOCUMENTATION,
+    EXAMPLE_OF_GOOD_CONVERSION,
+    VISUALIZER_DOCUMENTATION,
+    UDF_INPUT_PORT_DOCUMENTATION,
+    EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  ];
+
+  constructor(
+    private config: GuiConfigService,
+    private workflowUtilService: WorkflowUtilService
+  ) {}
+
+  private get enabled(): boolean {
+    return this.config.env.pythonNotebookMigrationEnabled;
+  }
+
+  private assertEnabled(): void {
+    if (!this.enabled) {
+      throw new Error("Notebook migration feature is disabled");
+    }
+  }
+
+  private parseJsonResponse(raw: string, context: string): any {
+    // Trim first, then strip optional markdown code fences (```json ... ``` 
or ``` ... ```)
+    const cleaned = raw
+      .trim()
+      .replace(/^```[a-zA-Z]*\s*/, "")
+      .replace(/\s*```$/, "")
+      .trim();
+    try {
+      return JSON.parse(cleaned);
+    } catch (err) {
+      throw new Error(`Failed to parse LLM ${context} response as JSON: ${(err 
as Error).message}`);
+    }
+  }
+
+  /**
+   * Initialize a new LLM session with Texera documentation
+   */
+  public initialize(modelType: string = "gpt-5-mini", apiKey: string = 
"dummy"): void {
+    this.assertEnabled();
+    this.model = createOpenAI({
+      baseURL: new URL(`${AppSettings.getApiEndpoint()}`, 
document.baseURI).toString(),
+      // apiKey is required by the library for creating the OpenAI compatible 
client;
+      // For security reason, we store the apiKey at the backend, thus the 
value is dummy here.
+      apiKey: apiKey,
+    }).chat(modelType);
+
+    this.messages = [
+      ...NotebookMigrationLLM.DOCUMENTATION.map(
+        (doc): ModelMessage => ({
+          role: "system",
+          content: doc,
+        })
+      ),
+    ];
+
+    this.initialized = true;
+  }
+
+  /**
+   * Verify the connection to the LLM using the given API key
+   */
+  public async verifyConnection(): Promise<boolean> {
+    if (!this.enabled) return false;
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    try {
+      await generateText({
+        model: this.model,
+        messages: [
+          {
+            role: "user",
+            content: "ping",
+          },
+        ],
+        maxOutputTokens: 10,
+      });
+
+      return true;
+    } catch (err) {
+      console.error("API key verification failed:", err);
+      return false;
+    }
+  }
+
+  /**
+   * Send a prompt and receive a response.
+   * All prior documentation and conversation is preserved.
+   */
+  private async sendPrompt(prompt: string): Promise<string> {
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    this.messages.push({
+      role: "user",
+      content: prompt,
+    });
+
+    const result = await generateText({
+      model: this.model,
+      messages: this.messages,
+    });
+
+    this.messages.push({
+      role: "assistant",
+      content: result.text,
+    });
+
+    return result.text;
+  }
+
+  /**
+   * Send a Jupyter Notebook to be converted into a workflow and mapping.
+   */
+  public async convertNotebookToWorkflow(notebook: Notebook): Promise<string> {
+    this.assertEnabled();
+    if (!this.initialized) {
+      throw new Error("LLM session not initialized");
+    }
+
+    const codeCells = notebook.cells.filter(cell => cell.cell_type === "code");
+    const notebookString = codeCells
+      .map(cell => {
+        const uuid = String(cell.metadata.uuid);
+        return `# START ${uuid}\n${cell.source}\n# END ${uuid}`;
+      })
+      .join("\n\n");
+
+    const workflow = await 
this.sendPrompt(`${WORKFLOW_PROMPT}\n${notebookString}`);
+    const mapping = await this.sendPrompt(MAPPING_PROMPT);
+
+    // Remove ```json blocks and parse
+    const udfLLMResponse = this.parseJsonResponse(workflow, "workflow");
+
+    const workflowJSON: WorkflowJSON = {
+      operators: [],
+      operatorPositions: {},
+      links: [],
+      commentBoxes: [],
+      settings: {
+        dataTransferBatchSize: 400,
+      },
+    };
+
+    const udfMappingToUUID: Record<string, string> = {};
+
+    Object.entries(udfLLMResponse.code).forEach(([udfId, udfCode], i) => {
+      let udfOutputColumns: { attributeName: string; attributeType: string }[] 
= [];
+      if (udfLLMResponse.outputs && udfLLMResponse.outputs[udfId]) {
+        udfOutputColumns = udfLLMResponse.outputs[udfId].map((attr: string) => 
({
+          attributeName: attr,
+          attributeType: "binary",
+        }));
+      }
+
+      // Build the operator from the live PythonUDFV2 schema so the 
operatorVersion, ports, and
+      // property defaults track the backend definition, then overlay the 
generated code/outputs.
+      const base = 
this.workflowUtilService.getNewOperatorPredicate("PythonUDFV2", udfId);
+      const operator: OperatorPredicate = {
+        ...base,
+        operatorProperties: {
+          ...base.operatorProperties,
+          code: udfCode,
+          retainInputColumns: false,
+          outputColumns: udfOutputColumns,
+        },
+      };
+
+      udfMappingToUUID[udfId] = operator.operatorID;
+      workflowJSON.operators.push(operator);
+      workflowJSON.operatorPositions[operator.operatorID] = { x: 140 * (i + 
1), y: 0 };
+    });
+
+    // Add links/edges
+    (udfLLMResponse.edges || []).forEach(([source, target]: [string, string]) 
=> {
+      workflowJSON.links.push({
+        linkID: `link-${uuidv4()}`,
+        source: {
+          operatorID: udfMappingToUUID[source],
+          portID: "output-0",
+        },
+        target: {
+          operatorID: udfMappingToUUID[target],
+          portID: "input-0",
+        },
+      });
+    });

Review Comment:
   Edges from the LLM are currently accepted even when they reference unknown 
UDF ids, producing links with `operatorID: undefined` (and potentially breaking 
the workflow graph). It's safer to validate and fail fast (or skip with 
telemetry) when the LLM response is inconsistent.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.spec.ts:
##########
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { NotebookMigrationLLM, Notebook } from "./migration-llm";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { generateText } from "ai";
+import type { Mock } from "vitest";
+
+// The LLM transport and OpenAI client are mocked so the tests exercise only 
the
+// deterministic transformation (parsing, operator/edge construction, 
cell<->operator mapping).
+vi.mock("ai", () => ({ generateText: vi.fn() }));
+vi.mock("@ai-sdk/openai", () => ({
+  createOpenAI: vi.fn(() => ({ chat: vi.fn(() => ({})) })),
+}));
+
+const mockGenerateText = generateText as unknown as Mock;
+
+describe("NotebookMigrationLLM", () => {
+  let opIdCounter = 0;
+  let stubUtil: WorkflowUtilService;
+
+  // Build a fresh, initialized session with stubbed dependencies. The stubbed
+  // getNewOperatorPredicate hands out deterministic ids (PythonUDFV2-0, -1, 
...).
+  function makeLLM(): NotebookMigrationLLM {
+    const stubConfig = {
+      env: { pythonNotebookMigrationEnabled: true },
+    } as unknown as GuiConfigService;
+
+    stubUtil = {
+      getNewOperatorPredicate: vi.fn((operatorType: string, 
customDisplayName?: string) => ({
+        operatorID: `${operatorType}-${opIdCounter++}`,
+        operatorType,
+        operatorVersion: "test-version",
+        operatorProperties: { workers: 1, defaultEnv: true, envName: "" },
+        inputPorts: [{ portID: "input-0", disallowMultiInputs: false }],
+        outputPorts: [{ portID: "output-0" }],
+        showAdvanced: false,
+        isDisabled: false,
+        customDisplayName,
+        dynamicInputPorts: true,
+        dynamicOutputPorts: true,
+      })),
+    } as unknown as WorkflowUtilService;
+
+    const llm = new NotebookMigrationLLM(stubConfig, stubUtil);
+    llm.initialize();
+    return llm;

Review Comment:
   After switching `initialize()` to use an access token (JWT) by default, 
these tests should pass an explicit dummy token to avoid depending on 
localStorage/AuthService state.



##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.spec.ts:
##########
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { NotebookMigrationLLM, Notebook } from "./migration-llm";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { generateText } from "ai";
+import type { Mock } from "vitest";
+
+// The LLM transport and OpenAI client are mocked so the tests exercise only 
the
+// deterministic transformation (parsing, operator/edge construction, 
cell<->operator mapping).
+vi.mock("ai", () => ({ generateText: vi.fn() }));
+vi.mock("@ai-sdk/openai", () => ({
+  createOpenAI: vi.fn(() => ({ chat: vi.fn(() => ({})) })),
+}));
+
+const mockGenerateText = generateText as unknown as Mock;
+
+describe("NotebookMigrationLLM", () => {
+  let opIdCounter = 0;
+  let stubUtil: WorkflowUtilService;
+
+  // Build a fresh, initialized session with stubbed dependencies. The stubbed
+  // getNewOperatorPredicate hands out deterministic ids (PythonUDFV2-0, -1, 
...).
+  function makeLLM(): NotebookMigrationLLM {
+    const stubConfig = {
+      env: { pythonNotebookMigrationEnabled: true },
+    } as unknown as GuiConfigService;
+
+    stubUtil = {
+      getNewOperatorPredicate: vi.fn((operatorType: string, 
customDisplayName?: string) => ({
+        operatorID: `${operatorType}-${opIdCounter++}`,
+        operatorType,
+        operatorVersion: "test-version",
+        operatorProperties: { workers: 1, defaultEnv: true, envName: "" },
+        inputPorts: [{ portID: "input-0", disallowMultiInputs: false }],
+        outputPorts: [{ portID: "output-0" }],
+        showAdvanced: false,
+        isDisabled: false,
+        customDisplayName,
+        dynamicInputPorts: true,
+        dynamicOutputPorts: true,
+      })),
+    } as unknown as WorkflowUtilService;
+
+    const llm = new NotebookMigrationLLM(stubConfig, stubUtil);
+    llm.initialize();
+    return llm;
+  }
+
+  function codeCell(uuid: string | undefined, source: string) {
+    return { cell_type: "code", metadata: uuid === undefined ? {} : { uuid }, 
source };
+  }
+
+  // Queue the two responses convertNotebookToWorkflow consumes, in order.
+  function mockResponses(workflowResponse: string, mappingResponse: string) {
+    mockGenerateText.mockResolvedValueOnce({ text: workflowResponse 
}).mockResolvedValueOnce({ text: mappingResponse });
+  }
+
+  beforeEach(() => {
+    opIdCounter = 0;
+    mockGenerateText.mockReset();
+  });
+
+  describe("convertNotebookToWorkflow", () => {
+    it("builds operators, links, positions, and a bidirectional mapping", 
async () => {
+      const notebook: Notebook = {
+        cells: [codeCell("CELL1", "print(1)"), codeCell("CELL2", "print(2)")],
+      };
+      mockResponses(
+        JSON.stringify({
+          code: { UDF1: "code1", UDF2: "code2" },
+          edges: [["UDF1", "UDF2"]],
+          outputs: { UDF1: ["a", "b"], UDF2: ["c"] },
+        }),
+        JSON.stringify({ UDF1: ["CELL1"], UDF2: ["CELL2"] })
+      );
+
+      const { workflowJSON, workflowNotebookMapping } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      expect(workflowJSON.operators.map((op: any) => 
op.operatorID)).toEqual(["PythonUDFV2-0", "PythonUDFV2-1"]);
+      expect(workflowJSON.operators[0].operatorProperties).toMatchObject({
+        code: "code1",
+        retainInputColumns: false,
+      });
+      expect(workflowJSON.operatorPositions).toEqual({
+        "PythonUDFV2-0": { x: 140, y: 0 },
+        "PythonUDFV2-1": { x: 280, y: 0 },
+      });
+      expect(workflowJSON.links).toHaveLength(1);
+      expect(workflowJSON.links[0].source).toEqual({ operatorID: 
"PythonUDFV2-0", portID: "output-0" });
+      expect(workflowJSON.links[0].target).toEqual({ operatorID: 
"PythonUDFV2-1", portID: "input-0" });
+      expect(workflowNotebookMapping.operator_to_cell).toEqual({
+        "PythonUDFV2-0": ["CELL1"],
+        "PythonUDFV2-1": ["CELL2"],
+      });
+      expect(workflowNotebookMapping.cell_to_operator).toEqual({
+        CELL1: ["PythonUDFV2-0"],
+        CELL2: ["PythonUDFV2-1"],
+      });
+    });
+
+    // NOTE (C2): the attributeType is currently hardcoded to "binary". If C2 
lands as
+    // "LLM returns per-column types", update the expected attributeType 
values here.
+    it("declares output columns with attributeType binary", async () => {
+      const notebook: Notebook = { cells: [codeCell("CELL1", "x = 1")] };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "code1" }, edges: [], outputs: { UDF1: 
["a", "b"] } }),
+        JSON.stringify({ UDF1: ["CELL1"] })
+      );
+
+      const { workflowJSON } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      
expect(workflowJSON.operators[0].operatorProperties.outputColumns).toEqual([
+        { attributeName: "a", attributeType: "binary" },
+        { attributeName: "b", attributeType: "binary" },
+      ]);
+    });
+
+    it("maps multiple cells onto the same UDF, and one cell onto multiple 
UDFs", async () => {
+      const notebook: Notebook = {
+        cells: [codeCell("CELL1", "a"), codeCell("CELL2", "b")],
+      };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "c1", UDF2: "c2" }, edges: [], outputs: 
{} }),
+        JSON.stringify({ UDF1: ["CELL1", "CELL2"], UDF2: ["CELL1"] })
+      );
+
+      const { workflowNotebookMapping } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      expect(workflowNotebookMapping.operator_to_cell).toEqual({
+        "PythonUDFV2-0": ["CELL1", "CELL2"],
+        "PythonUDFV2-1": ["CELL1"],
+      });
+      expect(workflowNotebookMapping.cell_to_operator).toEqual({
+        CELL1: ["PythonUDFV2-0", "PythonUDFV2-1"],
+        CELL2: ["PythonUDFV2-0"],
+      });
+    });
+
+    it("produces a link with an undefined endpoint when an edge references an 
unknown UDF id", async () => {
+      const notebook: Notebook = { cells: [codeCell("CELL1", "a")] };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "c1" }, edges: [["UDF1", "UDFX"]], 
outputs: {} }),
+        JSON.stringify({ UDF1: ["CELL1"] })
+      );
+
+      const { workflowJSON } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      // Documents current behavior: udfMappingToUUID["UDFX"] is undefined.
+      expect(workflowJSON.links[0].source.operatorID).toBe("PythonUDFV2-0");
+      expect(workflowJSON.links[0].target.operatorID).toBeUndefined();
+    });
+
+    it("handles empty code, edges, and outputs", async () => {
+      const notebook: Notebook = { cells: [] };
+      mockResponses(JSON.stringify({ code: {}, edges: [], outputs: {} }), 
JSON.stringify({}));
+
+      const { workflowJSON, workflowNotebookMapping } = JSON.parse(await 
makeLLM().convertNotebookToWorkflow(notebook));
+
+      expect(workflowJSON.operators).toEqual([]);
+      expect(workflowJSON.links).toEqual([]);
+      expect(workflowNotebookMapping.operator_to_cell).toEqual({});
+      expect(workflowNotebookMapping.cell_to_operator).toEqual({});
+    });
+
+    it("emits the 'undefined' cell marker in the prompt when a code cell lacks 
metadata.uuid", async () => {
+      const notebook: Notebook = { cells: [codeCell(undefined, "print(1)")] };
+      mockResponses(
+        JSON.stringify({ code: { UDF1: "c1" }, edges: [], outputs: {} }),
+        JSON.stringify({ UDF1: ["CELL1"] })
+      );
+
+      await makeLLM().convertNotebookToWorkflow(notebook);
+
+      // The notebook string (embedded in the workflow prompt) is sent to 
generateText.
+      // messages is a shared, mutated array, so search every message content 
rather than
+      // assuming a fixed index.
+      const allPromptContent = mockGenerateText.mock.calls
+        .flatMap(call => call[0].messages.map((m: any) => m.content))
+        .join("\n");
+      expect(allPromptContent).toContain("# START undefined");
+    });

Review Comment:
   If `convertNotebookToWorkflow` is updated to require `metadata.uuid` (to 
avoid ambiguous `# START undefined` markers), this test should assert that the 
method rejects with a clear error instead of checking for the literal string 
`undefined` in the prompt.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to