mengw15 commented on code in PR #5260:
URL: https://github.com/apache/texera/pull/5260#discussion_r3456231802


##########
frontend/src/app/workspace/service/notebook-migration/migration-llm.ts:
##########
@@ -0,0 +1,303 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Injectable } from "@angular/core";
+import { GuiConfigService } from "../../../common/service/gui-config.service";
+import { createOpenAI } from "@ai-sdk/openai";
+import { generateText, type ModelMessage } from "ai";
+import { AppSettings } from "../../../common/app-setting";
+import { v4 as uuidv4 } from "uuid";
+import { WorkflowUtilService } from 
"../workflow-graph/util/workflow-util.service";
+import { OperatorPredicate } from "../../types/workflow-common.interface";
+import {
+  TEXERA_OVERVIEW,
+  TUPLE_DOCUMENTATION,
+  TABLE_DOCUMENTATION,
+  OPERATOR_DOCUMENTATION,
+  UDF_INPUT_PORT_DOCUMENTATION,
+  EXAMPLE_OF_GOOD_CONVERSION,
+  VISUALIZER_DOCUMENTATION,
+  EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  WORKFLOW_PROMPT,
+  MAPPING_PROMPT,
+} from "./migration-prompts";
+
+interface Cell {
+  cell_type: string;
+  metadata: { [key: string]: any };
+  source: string;
+}
+
+export interface Notebook {
+  cells: Cell[];
+}
+
+interface WorkflowJSON {
+  operators: OperatorPredicate[];
+  operatorPositions: Record<string, { x: number; y: number }>;
+  links: any[];
+  commentBoxes: any[];
+  settings: {
+    dataTransferBatchSize: number;
+  };
+}
+
+interface CombinedMapping {
+  operator_to_cell: Record<string, string[]>;
+  cell_to_operator: Record<string, string[]>;
+}
+
+@Injectable()
+export class NotebookMigrationLLM {
+  private model: any;
+  private messages: ModelMessage[] = [];
+  private initialized = false;
+
+  private static readonly DOCUMENTATION: string[] = [
+    TEXERA_OVERVIEW,
+    TUPLE_DOCUMENTATION,
+    TABLE_DOCUMENTATION,
+    OPERATOR_DOCUMENTATION,
+    EXAMPLE_OF_GOOD_CONVERSION,
+    VISUALIZER_DOCUMENTATION,
+    UDF_INPUT_PORT_DOCUMENTATION,
+    EXAMPLE_OF_MULTIPLE_UDF_CONVERSION,
+  ];
+
+  constructor(
+    private config: GuiConfigService,
+    private workflowUtilService: WorkflowUtilService
+  ) {}
+
+  private get enabled(): boolean {
+    return this.config.env.pythonNotebookMigrationEnabled;
+  }
+
+  private assertEnabled(): void {
+    if (!this.enabled) {
+      throw new Error("Notebook migration feature is disabled");
+    }
+  }
+
+  private parseJsonResponse(raw: string, context: string): any {
+    // Trim first, then strip optional markdown code fences (```json ... ``` 
or ``` ... ```)
+    const cleaned = raw
+      .trim()
+      .replace(/^```[a-zA-Z]*\s*/, "")
+      .replace(/\s*```$/, "")
+      .trim();
+    try {
+      return JSON.parse(cleaned);
+    } catch (err) {
+      throw new Error(`Failed to parse LLM ${context} response as JSON: ${(err 
as Error).message}`);
+    }
+  }
+
+  /**
+   * Initialize a new LLM session with Texera documentation
+   */
+  public initialize(modelType: string = "gpt-5-mini", apiKey: string = 
"dummy"): void {
+    this.assertEnabled();
+    this.model = createOpenAI({
+      baseURL: new URL(`${AppSettings.getApiEndpoint()}`, 
document.baseURI).toString(),
+      // apiKey is required by the library for creating the OpenAI compatible 
client;
+      // For security reason, we store the apiKey at the backend, thus the 
value is dummy here.
+      apiKey: apiKey,
+    }).chat(modelType);

Review Comment:
   Strengthening Copilot's apiKey/JWT note on this `initialize()` block: this 
is now a hard blocker rather than a forward-looking suggestion, because of 
#5421.
   
   Before #5421, `AccessControlResource.scala:251` (the `/chat/*` LiteLLM proxy 
in `access-control-service`) was `@PermitAll`. The `JwtAuthFilter` skipped it, 
and the resource body explicitly stripped the incoming `Authorization` header 
and substituted the LiteLLM master key — so `apiKey: "dummy"` worked because 
the value was discarded server-side. That's where the "for security reason, we 
store the apiKey at the backend, thus the value is dummy here" comment in this 
file came from.
   
   After #5421 (`fix(auth): require REGULAR/ADMIN role on LiteLLM proxy 
endpoints`, merged 2026-06-07), the same resource is 
`@RolesAllowed(Array("REGULAR", "ADMIN"))`. The `JwtAuthFilter` registered at 
`AUTHENTICATION` priority in `AccessControlService.scala:79` now runs on every 
`/api/chat/*` request, parses `Authorization: Bearer <…>` as a JWT, and rejects 
malformed tokens with 401 before the request reaches the resource body.
   
   Net effect once this PR rebases on current main:
   - `convertNotebookToWorkflow` → 401
   - `verifyConnection` → 401
   - The whole migration feature is non-functional until the apiKey wiring 
changes.
   
   Vercel AI SDK puts the `apiKey` string into the Authorization header 
verbatim as a Bearer token, which is the exact shape `JwtAuthFilter` parses, so 
passing the current user's JWT here should be a small wire-up (e.g. pulling the 
token from `UserService` at the call site and threading it into 
`initialize()`). The resource body still substitutes the master key before 
forwarding to LiteLLM, so the JWT is used only for the Texera-side auth check.
   
   The "// apiKey is required by the library… the value is dummy here" code 
comment should be updated alongside, otherwise it will mislead the next reader.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to