Copilot commented on code in PR #5675:
URL: https://github.com/apache/texera/pull/5675#discussion_r3444862464


##########
frontend/src/app/workspace/component/result-panel/result-panel-modal.component.ts:
##########
@@ -42,29 +48,79 @@ import { NgxJsonViewerModule } from "ngx-json-viewer";
   selector: "texera-row-modal-content",
   templateUrl: "./result-panel-modal.component.html",
   styleUrls: ["./result-panel-model.component.scss"],
-  imports: [NgxJsonViewerModule],
+  imports: [CommonModule, NzButtonModule, NzIconModule, NgxJsonViewerModule],
 })
 export class RowModalComponent implements OnChanges {
+  rowEntries: { key: string; value: string; mediaSrc: string; isVideo: 
boolean; isImage: boolean; isAudio: boolean }[] =
+    [];
   // Index of current displayed row in currentResult
-  readonly operatorId: string = inject(NZ_MODAL_DATA).operatorId;
-  rowIndex: number = inject(NZ_MODAL_DATA).rowIndex;
+  private readonly modalData: { operatorId: string; rowIndex: number; 
rowData?: Record<string, unknown> } =
+    inject(NZ_MODAL_DATA);
+  readonly operatorId: string = this.modalData.operatorId;
+  rowIndex: number = this.modalData.rowIndex;
   currentDisplayRowData: Record<string, unknown> = {};
 
   constructor(
     public modal: NzModalRef<any, number>,
     private workflowResultService: WorkflowResultService,
-    private resizeService: PanelResizeService
+    private resizeService: PanelResizeService,
+    private notificationService: NotificationService
   ) {
+    if (this.modalData.rowData) {
+      this.currentDisplayRowData = this.modalData.rowData;
+      this.rowEntries = this.buildRowEntries(this.currentDisplayRowData);
+    }
     this.ngOnChanges();
   }
 
+  get prettyRowJson(): string {
+    return JSON.stringify(this.currentDisplayRowData, null, 2);
+  }
+
+  copyText(text: string): void {
+    navigator.clipboard.writeText(text).then(
+      () => this.notificationService.success("Copied to clipboard"),
+      () => this.notificationService.error("Failed to copy")
+    );
+  }
+
   ngOnChanges(): void {
     this.workflowResultService
       .getPaginatedResultService(this.operatorId)
       ?.selectTuple(this.rowIndex, this.resizeService.pageSize)
       .pipe(untilDestroyed(this))
       .subscribe(res => {
-        this.currentDisplayRowData = res.tuple;
+        if (res?.tuple) {
+          this.currentDisplayRowData = res.tuple;
+          this.rowEntries = this.buildRowEntries(this.currentDisplayRowData);
+        }
       });
   }
+
+  trackByEntryKey(_index: number, entry: { key: string }): string {
+    return entry.key;
+  }
+
+  private resolveMediaSrc(value: string): string {
+    if (!value.startsWith("http://";) && !value.startsWith("https://";)) {
+      return value;
+    }
+    return 
`${AppSettings.getApiEndpoint()}/huggingface/media-proxy?url=${encodeURIComponent(value)}`;
+  }
+
+  private buildRowEntries(
+    rowData: Record<string, unknown>
+  ): { key: string; value: string; mediaSrc: string; isVideo: boolean; 
isImage: boolean; isAudio: boolean }[] {
+    return Object.entries(rowData).map(([key, val]) => {
+      const value = typeof val === "string" ? val : JSON.stringify(val);
+      return {
+        key,
+        value,
+        mediaSrc: this.resolveMediaSrc(value),
+        isVideo: typeof val === "string" && isVideoUrl(val),
+        isImage: typeof val === "string" && isImageUrl(val),
+        isAudio: typeof val === "string" && isAudioUrl(val),
+      };
+    });

Review Comment:
   `JSON.stringify(val)` can return `undefined` (e.g., when `val` is 
`undefined`), but `value` is treated as a `string` and passed into 
`resolveMediaSrc(value)` which calls `startsWith`. This can throw at runtime 
and also violates the declared `value: string` type. Coalesce to a real string 
for non-JSON-serializable values.



##########
frontend/src/app/workspace/component/hugging-face/hugging-face.component.ts:
##########
@@ -0,0 +1,637 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Component, OnInit, OnDestroy, ChangeDetectorRef } from 
"@angular/core";
+import { CommonModule } from "@angular/common";
+import { FormsModule } from "@angular/forms";
+import { FieldType, FieldTypeConfig, FormlyModule } from "@ngx-formly/core";
+import { HttpClient } from "@angular/common/http";
+import { NzSelectModule } from "ng-zorro-antd/select";
+import { NzInputModule } from "ng-zorro-antd/input";
+import { NzSpinModule } from "ng-zorro-antd/spin";
+import { NzButtonModule } from "ng-zorro-antd/button";
+import { NzIconModule } from "ng-zorro-antd/icon";
+import { AppSettings } from "../../../common/app-setting";
+import { Subject, Subscription } from "rxjs";
+import { debounceTime, finalize, switchMap, takeUntil } from "rxjs/operators";
+
+export interface HuggingFaceModelOption {
+  id: string;
+  label: string;
+  pipeline_tag?: string;
+  downloads?: number;
+  likes?: number;
+}
+
+export interface HuggingFaceTaskOption {
+  tag: string;
+  label: string;
+}
+
+// ── Static fallback task list (used when the dynamic fetch fails) ──
+export const STATIC_TASK_OPTIONS: HuggingFaceTaskOption[] = [
+  { tag: "text-generation", label: "Text Generation" },
+  { tag: "automatic-speech-recognition", label: "Automatic Speech Recognition" 
},
+  { tag: "audio-classification", label: "Audio Classification" },
+  { tag: "text-classification", label: "Text Classification" },
+  { tag: "text-to-speech", label: "Text to Speech" },
+  { tag: "token-classification", label: "Token Classification" },
+  { tag: "question-answering", label: "Question Answering" },
+  { tag: "table-question-answering", label: "Table Question Answering" },
+  { tag: "zero-shot-classification", label: "Zero-Shot Classification" },
+  { tag: "translation", label: "Translation" },
+  { tag: "summarization", label: "Summarization" },
+  { tag: "feature-extraction", label: "Feature Extraction" },
+  { tag: "fill-mask", label: "Fill-Mask" },
+  { tag: "sentence-similarity", label: "Sentence Similarity" },
+  { tag: "text-ranking", label: "Text Ranking" },
+  { tag: "image-classification", label: "Image Classification" },
+  { tag: "object-detection", label: "Object Detection" },
+  { tag: "image-segmentation", label: "Image Segmentation" },
+  { tag: "image-to-text", label: "Image to Text" },
+  { tag: "visual-question-answering", label: "Visual Question Answering" },
+  { tag: "document-question-answering", label: "Document Question Answering" },
+  { tag: "zero-shot-image-classification", label: "Zero-Shot Image 
Classification" },
+];
+
+const PAGE_SIZE = 50;
+
+const TRUNCATED_HEADER = "X-Texera-Truncated";
+
+// ── Module-level caches (reused across component instances) ──
+const allModelsByTag: Map<string, HuggingFaceModelOption[]> = new Map();
+const truncatedByTag: Set<string> = new Set();
+const inFlightByTag: Map<string, Subscription> = new Map();
+const errorByTag: Map<string, string> = new Map();
+
+let cachedTaskOptions: HuggingFaceTaskOption[] | null = null;
+let tasksFetchSubscription: Subscription | null = null;
+let tasksFetchError: string | null = null;
+
+/** Clear all cached data (useful for tests or manual invalidation). */
+export function invalidateHuggingFaceModelCache(): void {
+  allModelsByTag.clear();
+  truncatedByTag.clear();
+  errorByTag.clear();
+  inFlightByTag.forEach(sub => sub.unsubscribe());
+  inFlightByTag.clear();
+  cachedTaskOptions = null;
+  tasksFetchError = null;
+  tasksFetchSubscription?.unsubscribe();
+  tasksFetchSubscription = null;
+}
+
+@Component({
+  selector: "texera-hugging-face-model-select",
+  templateUrl: "./hugging-face.component.html",
+  styleUrls: ["hugging-face.component.scss"],
+  imports: [
+    CommonModule,
+    FormsModule,
+    NzSelectModule,
+    NzInputModule,
+    NzSpinModule,
+    NzButtonModule,
+    NzIconModule,
+    FormlyModule,
+  ],
+})
+export class HuggingFaceComponent extends FieldType<FieldTypeConfig> 
implements OnInit, OnDestroy {
+  private readonly taskScopedKeys = [
+    "modelId",
+    "promptColumn",
+    "imageInput",
+    "audioInput",
+    "inputImageColumn",
+    "inputAudioColumn",
+    "candidateLabels",
+    "sentencesColumn",
+    "contextColumn",
+    "systemPrompt",
+    "maxNewTokens",
+    "temperature",
+  ] as const;
+  private readonly taskStateByTag = new Map<string, Partial<Record<(typeof 
this.taskScopedKeys)[number], unknown>>>();
+  // ── Task state ──
+  taskOptions: HuggingFaceTaskOption[] = cachedTaskOptions ?? 
STATIC_TASK_OPTIONS;
+  selectedTaskTag = "text-generation";
+  tasksLoading = false;
+  tasksError: string | null = null;
+
+  // ── All models for the current task (fetched once from backend, cached) ──
+  private allModels: HuggingFaceModelOption[] = [];
+
+  // ── Displayed state ──
+  pagedModels: HuggingFaceModelOption[] = [];
+  currentPage = 0;
+  totalPages = 0;
+
+  loading = false;
+  errorMessage: string | null = null;
+
+  // ── Truncation notice ──
+  truncated = false;
+
+  // ── Search state ──
+  searchText = "";
+  searchLoading = false;
+  private filteredModels: HuggingFaceModelOption[] | null = null;
+  private readonly searchSubject$ = new Subject<string>();
+  private searchSubscription: Subscription | null = null;
+
+  private readonly destroy$ = new Subject<void>();
+  private subscription: Subscription | null = null;
+  private taskPollInterval: ReturnType<typeof setInterval> | null = null;
+  private modelPollInterval: ReturnType<typeof setInterval> | null = null;
+  private initTimeout: ReturnType<typeof setTimeout> | null = null;
+
+  constructor(
+    private http: HttpClient,
+    private cdr: ChangeDetectorRef
+  ) {
+    super();
+  }
+
+  ngOnInit(): void {
+    const savedTag = this.getCurrentTaskTag();
+    this.selectedTaskTag = savedTag ?? this.selectedTaskTag;
+    this.syncTaskSelection(this.selectedTaskTag, false);
+    this.loadTasks();
+    this.loadAllModels();
+    this.setupServerSearch();
+    // Formly can attach sibling controls after this field initializes.
+    // Re-sync once the control tree settles so a fresh operator starts in a 
valid task state.
+    this.initTimeout = setTimeout(
+      () => this.syncTaskSelection(this.getCurrentTaskTag() ?? 
this.selectedTaskTag, false),
+      0
+    );
+  }
+
+  ngOnDestroy(): void {
+    this.destroy$.next();
+    this.destroy$.complete();
+    this.subscription?.unsubscribe();
+    this.searchSubscription?.unsubscribe();
+    this.searchSubject$.complete();
+    if (this.taskPollInterval !== null) {
+      clearInterval(this.taskPollInterval);
+    }
+    if (this.modelPollInterval !== null) {
+      clearInterval(this.modelPollInterval);
+    }
+    if (this.initTimeout !== null) {
+      clearTimeout(this.initTimeout);
+    }
+  }
+
+  // ── Task loading ──
+
+  /**
+   * Fetch available pipeline tags from the backend, which proxies 
HuggingFace's /api/tasks.
+   * Falls back to STATIC_TASK_OPTIONS if the fetch fails.
+   */
+  private loadTasks(): void {
+    // Already fetched and cached
+    if (cachedTaskOptions !== null) {
+      this.taskOptions = cachedTaskOptions;
+      return;
+    }
+
+    // Previous fetch errored — show static list, don't retry automatically
+    if (tasksFetchError !== null) {
+      this.tasksError = tasksFetchError;
+      this.taskOptions = STATIC_TASK_OPTIONS;
+      return;
+    }
+
+    // Another component instance already has a fetch in flight — wait for it
+    if (tasksFetchSubscription !== null) {
+      this.tasksLoading = true;
+      // Poll for completion (the module-level cache will be set when done)
+      this.taskPollInterval = setInterval(() => {
+        if (cachedTaskOptions !== null || tasksFetchError !== null) {
+          clearInterval(this.taskPollInterval!);
+          this.taskPollInterval = null;
+          this.tasksLoading = false;
+          this.taskOptions = cachedTaskOptions ?? STATIC_TASK_OPTIONS;
+          if (tasksFetchError) this.tasksError = tasksFetchError;
+          this.cdr.detectChanges();
+        }
+      }, 200);
+      return;
+    }
+
+    this.tasksLoading = true;
+    this.tasksError = null;
+    this.cdr.detectChanges();
+
+    tasksFetchSubscription = this.http
+      
.get<HuggingFaceTaskOption[]>(`${AppSettings.getApiEndpoint()}/huggingface/tasks`)
+      .pipe(
+        takeUntil(this.destroy$),
+        finalize(() => {
+          // If takeUntil fires before next/error, reset the module-level guard
+          // so the next component instance can start a fresh fetch.
+          if (cachedTaskOptions === null && tasksFetchError === null) {
+            tasksFetchSubscription = null;
+          }
+        })
+      )
+      .subscribe({
+        next: tasks => {
+          tasksFetchSubscription = null;
+          cachedTaskOptions = tasks.length > 0 ? tasks : STATIC_TASK_OPTIONS;
+          this.taskOptions = cachedTaskOptions;
+          this.tasksLoading = false;
+          this.cdr.detectChanges();
+        },
+        error: (err: unknown) => {
+          console.error("Failed to load HuggingFace tasks:", err);
+          tasksFetchSubscription = null;
+          tasksFetchError = "Could not load tasks from Hugging Face. Using 
default list.";
+          this.tasksError = tasksFetchError;
+          this.taskOptions = STATIC_TASK_OPTIONS;
+          this.tasksLoading = false;
+          this.cdr.detectChanges();
+        },
+      });
+  }
+
+  retryTasksLoad(): void {
+    tasksFetchError = null;
+    this.tasksError = null;
+    this.loadTasks();
+  }
+
+  // ── Task selection ──
+
+  onTaskSelected(tag: string): void {
+    const previousTask = this.getCurrentTaskTag() ?? this.selectedTaskTag;
+    this.snapshotTaskState(previousTask);
+    this.syncTaskSelection(tag, true);
+    this.restoreTaskState(tag);
+    this.searchText = "";
+    this.filteredModels = null;
+    this.loadAllModels();
+  }
+
+  // ── Data loading ──
+
+  /**
+   * Fetch ALL models for the selected task.
+   * The backend paginates through HF Hub internally and caches the result.
+   * The first request per task may be slow; subsequent requests are instant.
+   */
+  private loadAllModels(): void {
+    const tag = this.selectedTaskTag || "text-generation";
+
+    this.loading = false;
+    this.errorMessage = null;
+
+    // Fast path: cached on the frontend
+    if (allModelsByTag.has(tag)) {
+      this.allModels = allModelsByTag.get(tag)!;
+      this.truncated = truncatedByTag.has(tag);
+      this.goToPage(0);
+      return;
+    }
+
+    // Previous error
+    if (errorByTag.has(tag)) {
+      this.errorMessage = errorByTag.get(tag)!;
+      this.allModels = [];
+      this.pagedModels = [];
+      this.totalPages = 0;
+      return;
+    }
+
+    // Another instance is already fetching this task — wait for it
+    if (inFlightByTag.has(tag)) {
+      this.loading = true;
+      this.modelPollInterval = setInterval(() => {
+        if (allModelsByTag.has(tag) || errorByTag.has(tag)) {
+          clearInterval(this.modelPollInterval!);
+          this.modelPollInterval = null;
+          this.loading = false;
+          if (allModelsByTag.has(tag)) {
+            this.allModels = allModelsByTag.get(tag)!;
+            this.truncated = truncatedByTag.has(tag);
+            this.goToPage(0);
+          } else {
+            this.errorMessage = errorByTag.get(tag)!;
+            this.cdr.detectChanges();
+          }
+        }
+      }, 200);
+      return;
+    }
+
+    // Cancel previous
+    this.subscription?.unsubscribe();
+    this.subscription = null;
+
+    this.allModels = [];
+    this.pagedModels = [];
+    this.totalPages = 0;
+
+    // Show spinner immediately for the initial fetch — it can take a while
+    // as the backend pages through HF Hub for the first time.
+    this.loading = true;
+    this.cdr.detectChanges();
+
+    this.subscription = this.http
+      .get<
+        HuggingFaceModelOption[]
+      
>(`${AppSettings.getApiEndpoint()}/huggingface/models?task=${encodeURIComponent(tag)}`)
+      .subscribe({
+        next: resp => {
+          const models = resp.body ?? [];
+          if (resp.headers.get(TRUNCATED_HEADER) === "true") {
+            truncatedByTag.add(tag);
+          }
+          allModelsByTag.set(tag, models);
+          inFlightByTag.delete(tag);
+          this.loading = false;
+          this.truncated = truncatedByTag.has(tag);
+          this.allModels = models;
+          this.goToPage(0);
+        },
+        error: (err: unknown) => {
+          console.error(`Failed to load HuggingFace models for task 
'${tag}':`, err);
+          const msg = "Failed to load models. Click retry to try again.";
+          errorByTag.set(tag, msg);
+          inFlightByTag.delete(tag);
+          this.loading = false;
+          this.errorMessage = msg;
+          this.cdr.detectChanges();
+        },
+      });

Review Comment:
   `HttpClient.get<T>()` returns `T` by default, but this code treats the 
response as an `HttpResponse` (uses `resp.body` / `resp.headers`). This will 
not compile under `strict` TS and would break the truncation-header logic. Use 
`observe: "response"` (and optionally `finalize` to always clear 
`inFlightByTag`) so headers/body are available and `inFlightByTag` can't get 
stuck if the request is unsubscribed.



##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceInferenceOpDesc.scala:
##########
@@ -128,29 +128,15 @@ class HuggingFaceInferenceOpDesc extends 
PythonOperatorDescriptor {
   private def codegenForTask(t: String): TaskCodegen =
     registeredCodegens.getOrElse(t, TextGenCodegen)
 
-  /**
-    * The output column name to use in generated Python and in the output
-    * schema. Falls back to the `"hf_response"` sentinel when the user
-    * leaves the field null or blank.
-    *
-    * Shared between [[generatePythonCode]] and [[getOutputSchemas]] so the
-    * two never drift apart (a divergence would cause the Python operator
-    * to write to a column the schema didn't declare). Returns
-    * [[EncodableString]] rather than `String` so the value flows into the
-    * `pyb` template with the encoding annotation intact.
-    */
-  private def resolvedResultColumn: EncodableString =
-    if (resultColumn == null || resultColumn.trim.isEmpty) "hf_response"
-    else resultColumn
-
   override def generatePythonCode(): String = {
     val safeTask: EncodableString =
       if (task == null || task.trim.isEmpty) "text-generation" else task
     val safeModelId: EncodableString =
       if (modelId == null) "" else modelId.trim
     val safePromptCol: EncodableString =
       if (promptColumn == null) "" else promptColumn
-    val safeResultCol: EncodableString = resolvedResultColumn
+    val safeResultCol: EncodableString =
+      if (resultColumn == null || resultColumn.trim.isEmpty) "hf_response" 
else resultColumn
     val safeSystemPrompt: EncodableString =
       if (systemPrompt == null) "" else systemPrompt

Review Comment:
   The operator currently only registers `TextGenCodegen` in 
`registeredCodegens`, so any non-text-generation task selected in the UI 
(image/audio/video/QA/etc.) will silently fall back to the text-generation 
payload/parse logic. Given this PR adds UI support for many tasks (and adds new 
`*Codegen` objects), those codegens also need to be wired into 
`registeredCodegens` (and imported) so the backend generates task-correct 
Python.



##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/ImageTaskCodegen.scala:
##########
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.huggingFace.codegen
+
+/**
+  * Codegen for the Hugging Face image-pipeline task family.
+  *
+  * Splits into two sub-families:
+  *  - "image-only" tasks send raw image bytes as the request body and don't
+  *    consume the prompt column: image-classification, object-detection,
+  *    image-segmentation, image-to-text.
+  *  - "image + prompt" tasks bundle a base64 image and a text prompt in a
+  *    JSON payload: visual-question-answering, document-question-answering,
+  *    zero-shot-image-classification, image-text-to-text, image-to-image.
+  *
+  * Per-row `current_image_bytes` is resolved upstream in
+  * [[PythonCodegenBase]]'s `process_table` (either from the operator's
+  * uploaded image or from `INPUT_IMAGE_COLUMN`). The image helpers
+  * (`_read_image_input`, `_compress_image_bytes`, `_image_input_as_base64`,
+  * `_read_binary_value`, `_looks_like_html`, `_html_to_image_bytes`,
+  * `_extract_json_arg`) live in PythonCodegenBase alongside the per-task
+  * tuples (`image_only_tasks`, `image_prompt_tasks`, `image_tasks`).
+  */
+object ImageTaskCodegen extends TaskCodegen {
+
+  /** Primary key for registration; the dispatcher maps every task in
+    * [[tasks]] to this codegen.
+    */
+  override val task: String = "image-classification"
+
+  /** All HF tasks routed through this codegen. */
+  override val tasks: Set[String] = Set(
+    // image-only
+    "image-classification",
+    "object-detection",
+    "image-segmentation",
+    "image-to-text",
+    // image + prompt
+    "visual-question-answering",
+    "document-question-answering",
+    "zero-shot-image-classification",
+    "image-text-to-text",
+    "image-to-image"
+  )
+
+  override def payloadPython(ctx: CodegenContext): String =
+    """            if task in image_only_tasks:
+      |                payload = current_image_bytes
+      |                use_raw_binary_body = True
+      |                raw_binary_headers = image_headers
+      |            elif task in ("visual-question-answering", 
"document-question-answering"):
+      |                payload = {
+      |                    "inputs": {
+      |                        "image": 
self._image_input_as_base64(current_image_bytes),
+      |                        "question": prompt_value,
+      |                    }
+      |                }
+      |            elif task == "image-text-to-text":
+      |                img_b64 = 
self._image_input_as_base64(current_image_bytes)
+      |                payload = {
+      |                    "model": self.MODEL_ID,
+      |                    "messages": [{
+      |                        "role": "user",
+      |                        "content": [
+      |                            {"type": "image_url", "image_url": {"url": 
f"data:image/png;base64,{img_b64}"}},
+      |                            {"type": "text", "text": prompt_value if 
prompt_value else "Describe this image."},
+      |                        ],
+      |                    }],
+      |                    "max_tokens": self.MAX_NEW_TOKENS,
+      |                }
+      |            elif task == "image-to-image":
+      |                payload = current_image_bytes
+      |                use_raw_binary_body = True
+      |                raw_binary_headers = image_headers
+      |            elif task == "zero-shot-image-classification":
+      |                # Zero-shot requires the caller to supply candidate 
labels.
+      |                # We reuse the prompt column as a comma-separated label 
list so
+      |                # the task is shippable without a dedicated operator 
field.
+      |                # TODO: replace with a first-class `candidateLabels` 
field once
+      |                # the property panel supports task-specific inputs.
+      |                #
+      |                # Fail fast if usable labels can't be derived. Both 
modes lead to
+      |                # a meaningless inference call:
+      |                #   1. Empty prompt column          -> labels = []
+      |                #      The HF API rejects candidate_labels: [] with an 
opaque 400.
+      |                #   2. Missing prompt column        -> upstream sets 
prompt_value
+      |                #      to the fallback "What is shown in this image?", 
which has
+      |                #      no comma, so labels collapses to a single 
nonsense entry.
+      |                # Zero-shot classification needs >= 2 candidate labels 
to be
+      |                # meaningful — surface a configuration error in both 
cases.
+      |                labels = [s.strip() for s in prompt_value.split(",") if 
s.strip()]
+      |                if len(labels) < 2:
+      |                    raise ValueError(
+      |                        "zero-shot-image-classification requires at 
least 2 candidate "
+      |                        "labels: provide a comma-separated list in the 
prompt column."
+      |                    )
+      |                payload = {
+      |                    "inputs": 
self._image_input_as_base64(current_image_bytes),
+      |                    "parameters": {"candidate_labels": labels},
+      |                }

Review Comment:
   `zero-shot-image-classification` still reuses `prompt_value` as the 
comma-separated candidate-label list (and the error message tells the user to 
put labels in the prompt column). In this PR, the UI/schema introduces a 
dedicated `candidateLabels` field (and other codegen, e.g. `QaRankingCodegen`, 
already uses `self.CANDIDATE_LABELS`). This mismatch will make the UI 
configuration misleading and can cause runtime failures even when 
`candidateLabels` is provided.



##########
frontend/src/app/workspace/component/result-panel/result-panel-modal.component.ts:
##########
@@ -42,29 +48,79 @@ import { NgxJsonViewerModule } from "ngx-json-viewer";
   selector: "texera-row-modal-content",
   templateUrl: "./result-panel-modal.component.html",
   styleUrls: ["./result-panel-model.component.scss"],
-  imports: [NgxJsonViewerModule],
+  imports: [CommonModule, NzButtonModule, NzIconModule, NgxJsonViewerModule],
 })

Review Comment:
   `NgxJsonViewerModule` is still listed in the component `imports`, but the 
template no longer uses `<ngx-json-viewer>`. Keeping unused standalone imports 
increases bundle size and can hide dead dependencies; it should be removed from 
`imports` (and then the TS import can be removed separately).



##########
frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.html:
##########
@@ -161,7 +161,40 @@ <h5 class="rightAlign"><span 
[innerHTML]="compare(column.header, 'other')"></spa
             class="table-cell"
             nzEllipsis
             (click)="open(i, row)">
-            <span class="cell-content">{{ column.getCell(row) }}</span>
+            <span class="cell-content">
+              <ng-container *ngIf="isVideoCell(column.getCell(row)); else 
checkAudio">
+                <i
+                  nz-icon
+                  nzType="play-circle"
+                  nzTheme="outline"

Review Comment:
   The result-table cell template currently renders only an icon + "Play/View" 
text for media cells, but the PR description says media should be rendered 
inline in the result table via `<img>`, `<audio>`, and `<video>` tags. Either 
update the implementation to render the actual media elements in-table, or 
adjust the PR description/UX expectation if the intended behavior is "show 
label and open modal to view".



##########
frontend/src/app/workspace/component/hugging-face/hugging-face.component.ts:
##########
@@ -0,0 +1,637 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Component, OnInit, OnDestroy, ChangeDetectorRef } from 
"@angular/core";
+import { CommonModule } from "@angular/common";
+import { FormsModule } from "@angular/forms";
+import { FieldType, FieldTypeConfig, FormlyModule } from "@ngx-formly/core";
+import { HttpClient } from "@angular/common/http";
+import { NzSelectModule } from "ng-zorro-antd/select";
+import { NzInputModule } from "ng-zorro-antd/input";
+import { NzSpinModule } from "ng-zorro-antd/spin";
+import { NzButtonModule } from "ng-zorro-antd/button";
+import { NzIconModule } from "ng-zorro-antd/icon";
+import { AppSettings } from "../../../common/app-setting";
+import { Subject, Subscription } from "rxjs";
+import { debounceTime, finalize, switchMap, takeUntil } from "rxjs/operators";
+
+export interface HuggingFaceModelOption {
+  id: string;
+  label: string;
+  pipeline_tag?: string;
+  downloads?: number;
+  likes?: number;
+}
+
+export interface HuggingFaceTaskOption {
+  tag: string;
+  label: string;
+}
+
+// ── Static fallback task list (used when the dynamic fetch fails) ──
+export const STATIC_TASK_OPTIONS: HuggingFaceTaskOption[] = [
+  { tag: "text-generation", label: "Text Generation" },
+  { tag: "automatic-speech-recognition", label: "Automatic Speech Recognition" 
},
+  { tag: "audio-classification", label: "Audio Classification" },
+  { tag: "text-classification", label: "Text Classification" },
+  { tag: "text-to-speech", label: "Text to Speech" },
+  { tag: "token-classification", label: "Token Classification" },
+  { tag: "question-answering", label: "Question Answering" },
+  { tag: "table-question-answering", label: "Table Question Answering" },
+  { tag: "zero-shot-classification", label: "Zero-Shot Classification" },
+  { tag: "translation", label: "Translation" },
+  { tag: "summarization", label: "Summarization" },
+  { tag: "feature-extraction", label: "Feature Extraction" },
+  { tag: "fill-mask", label: "Fill-Mask" },
+  { tag: "sentence-similarity", label: "Sentence Similarity" },
+  { tag: "text-ranking", label: "Text Ranking" },
+  { tag: "image-classification", label: "Image Classification" },
+  { tag: "object-detection", label: "Object Detection" },
+  { tag: "image-segmentation", label: "Image Segmentation" },
+  { tag: "image-to-text", label: "Image to Text" },
+  { tag: "visual-question-answering", label: "Visual Question Answering" },
+  { tag: "document-question-answering", label: "Document Question Answering" },
+  { tag: "zero-shot-image-classification", label: "Zero-Shot Image 
Classification" },
+];
+
+const PAGE_SIZE = 50;
+
+const TRUNCATED_HEADER = "X-Texera-Truncated";
+
+// ── Module-level caches (reused across component instances) ──
+const allModelsByTag: Map<string, HuggingFaceModelOption[]> = new Map();
+const truncatedByTag: Set<string> = new Set();
+const inFlightByTag: Map<string, Subscription> = new Map();
+const errorByTag: Map<string, string> = new Map();
+
+let cachedTaskOptions: HuggingFaceTaskOption[] | null = null;
+let tasksFetchSubscription: Subscription | null = null;
+let tasksFetchError: string | null = null;
+
+/** Clear all cached data (useful for tests or manual invalidation). */
+export function invalidateHuggingFaceModelCache(): void {
+  allModelsByTag.clear();
+  truncatedByTag.clear();
+  errorByTag.clear();
+  inFlightByTag.forEach(sub => sub.unsubscribe());
+  inFlightByTag.clear();
+  cachedTaskOptions = null;
+  tasksFetchError = null;
+  tasksFetchSubscription?.unsubscribe();
+  tasksFetchSubscription = null;
+}
+
+@Component({
+  selector: "texera-hugging-face-model-select",
+  templateUrl: "./hugging-face.component.html",
+  styleUrls: ["hugging-face.component.scss"],
+  imports: [
+    CommonModule,
+    FormsModule,
+    NzSelectModule,
+    NzInputModule,
+    NzSpinModule,
+    NzButtonModule,
+    NzIconModule,
+    FormlyModule,
+  ],
+})
+export class HuggingFaceComponent extends FieldType<FieldTypeConfig> 
implements OnInit, OnDestroy {
+  private readonly taskScopedKeys = [
+    "modelId",
+    "promptColumn",
+    "imageInput",
+    "audioInput",
+    "inputImageColumn",
+    "inputAudioColumn",
+    "candidateLabels",
+    "sentencesColumn",
+    "contextColumn",
+    "systemPrompt",
+    "maxNewTokens",
+    "temperature",
+  ] as const;
+  private readonly taskStateByTag = new Map<string, Partial<Record<(typeof 
this.taskScopedKeys)[number], unknown>>>();
+  // ── Task state ──
+  taskOptions: HuggingFaceTaskOption[] = cachedTaskOptions ?? 
STATIC_TASK_OPTIONS;
+  selectedTaskTag = "text-generation";
+  tasksLoading = false;
+  tasksError: string | null = null;
+
+  // ── All models for the current task (fetched once from backend, cached) ──
+  private allModels: HuggingFaceModelOption[] = [];
+
+  // ── Displayed state ──
+  pagedModels: HuggingFaceModelOption[] = [];
+  currentPage = 0;
+  totalPages = 0;
+
+  loading = false;
+  errorMessage: string | null = null;
+
+  // ── Truncation notice ──
+  truncated = false;
+
+  // ── Search state ──
+  searchText = "";
+  searchLoading = false;
+  private filteredModels: HuggingFaceModelOption[] | null = null;
+  private readonly searchSubject$ = new Subject<string>();
+  private searchSubscription: Subscription | null = null;
+
+  private readonly destroy$ = new Subject<void>();
+  private subscription: Subscription | null = null;
+  private taskPollInterval: ReturnType<typeof setInterval> | null = null;
+  private modelPollInterval: ReturnType<typeof setInterval> | null = null;
+  private initTimeout: ReturnType<typeof setTimeout> | null = null;
+
+  constructor(
+    private http: HttpClient,
+    private cdr: ChangeDetectorRef
+  ) {
+    super();
+  }
+
+  ngOnInit(): void {
+    const savedTag = this.getCurrentTaskTag();
+    this.selectedTaskTag = savedTag ?? this.selectedTaskTag;
+    this.syncTaskSelection(this.selectedTaskTag, false);
+    this.loadTasks();
+    this.loadAllModels();
+    this.setupServerSearch();
+    // Formly can attach sibling controls after this field initializes.
+    // Re-sync once the control tree settles so a fresh operator starts in a 
valid task state.
+    this.initTimeout = setTimeout(
+      () => this.syncTaskSelection(this.getCurrentTaskTag() ?? 
this.selectedTaskTag, false),
+      0
+    );
+  }
+
+  ngOnDestroy(): void {
+    this.destroy$.next();
+    this.destroy$.complete();
+    this.subscription?.unsubscribe();
+    this.searchSubscription?.unsubscribe();
+    this.searchSubject$.complete();
+    if (this.taskPollInterval !== null) {
+      clearInterval(this.taskPollInterval);
+    }
+    if (this.modelPollInterval !== null) {
+      clearInterval(this.modelPollInterval);
+    }
+    if (this.initTimeout !== null) {
+      clearTimeout(this.initTimeout);
+    }
+  }
+
+  // ── Task loading ──
+
+  /**
+   * Fetch available pipeline tags from the backend, which proxies 
HuggingFace's /api/tasks.
+   * Falls back to STATIC_TASK_OPTIONS if the fetch fails.
+   */
+  private loadTasks(): void {
+    // Already fetched and cached
+    if (cachedTaskOptions !== null) {
+      this.taskOptions = cachedTaskOptions;
+      return;
+    }
+
+    // Previous fetch errored — show static list, don't retry automatically
+    if (tasksFetchError !== null) {
+      this.tasksError = tasksFetchError;
+      this.taskOptions = STATIC_TASK_OPTIONS;
+      return;
+    }
+
+    // Another component instance already has a fetch in flight — wait for it
+    if (tasksFetchSubscription !== null) {
+      this.tasksLoading = true;
+      // Poll for completion (the module-level cache will be set when done)
+      this.taskPollInterval = setInterval(() => {
+        if (cachedTaskOptions !== null || tasksFetchError !== null) {
+          clearInterval(this.taskPollInterval!);
+          this.taskPollInterval = null;
+          this.tasksLoading = false;
+          this.taskOptions = cachedTaskOptions ?? STATIC_TASK_OPTIONS;
+          if (tasksFetchError) this.tasksError = tasksFetchError;
+          this.cdr.detectChanges();
+        }
+      }, 200);
+      return;
+    }
+
+    this.tasksLoading = true;
+    this.tasksError = null;
+    this.cdr.detectChanges();
+
+    tasksFetchSubscription = this.http
+      
.get<HuggingFaceTaskOption[]>(`${AppSettings.getApiEndpoint()}/huggingface/tasks`)
+      .pipe(
+        takeUntil(this.destroy$),
+        finalize(() => {
+          // If takeUntil fires before next/error, reset the module-level guard
+          // so the next component instance can start a fresh fetch.
+          if (cachedTaskOptions === null && tasksFetchError === null) {
+            tasksFetchSubscription = null;
+          }
+        })
+      )
+      .subscribe({

Review Comment:
   `tasksFetchSubscription` is module-level/shared across component instances, 
but the request is canceled via `takeUntil(this.destroy$)`. If the initiating 
component is destroyed before the request completes, 
`cachedTaskOptions`/`tasksFetchError` remain null and any other instances 
currently polling will spin indefinitely. Since this is a shared fetch, it 
shouldn’t be tied to a single component’s lifecycle.



##########
frontend/src/app/common/util/media-type.util.ts:
##########
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+export function isVideoUrl(value: string): boolean {
+  if (typeof value !== "string") return false;
+  return (
+    value.match(/\.(mp4|webm|ogg)(\?.*)?$/i) !== null ||
+    value.startsWith("data:video/") ||
+    value.startsWith("https://v3b.fal.media/files/";)
+  );

Review Comment:
   The helpers are not mutually exclusive for `.ogg` URLs: `isVideoUrl` and 
`isAudioUrl` both return true for `*.ogg`, so callers that check video first 
will render many Ogg Vorbis audio files as `<video>`. Since video Ogg is 
typically `.ogv`, it’s safer to treat `.ogg` as audio-only and use `.ogv` for 
video extension detection.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to