Copilot commented on code in PR #5568:
URL: https://github.com/apache/texera/pull/5568#discussion_r3444862316


##########
frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts:
##########
@@ -167,6 +170,273 @@ export class OperatorPropertyEditFrameComponent 
implements OnInit, OnChanges, On
   // used to tear down subscriptions that takeUntil(teardownObservable)
   private teardownObservable: Subject<void> = new Subject();
 
+  readonly huggingFaceTaskPreviewSamples: Record<
+    string,
+    {
+      kind: "image" | "video" | "audio" | "text";
+      inputLabel?: string;
+      outputLabel?: string;
+      title?: string;
+      body?: string;
+      outputBody?: string;
+      pills?: string[];
+      assetSrc?: string;
+    }
+  > = {
+    "text-to-image": {
+      kind: "image",
+      inputLabel: "Text prompt",
+      outputLabel: "Generated image",
+      title: "Comic-style city action scene",
+      body: "Prompt becomes a generated image preview.",
+      assetSrc: "assets/sample-image.png",
+    },
+    "image-to-image": {
+      kind: "image",
+      inputLabel: "Source image",
+      outputLabel: "Edited image",
+      title: "Image transformation preview",
+      body: "Image input produces a modified image result.",
+      assetSrc: "assets/sample-image.png",
+    },
+    "text-to-video": {
+      kind: "video",
+      inputLabel: "Text prompt",
+      outputLabel: "Generated video",
+      title: "Prompt-based motion preview",
+      body: "Prompt becomes a generated video clip.",
+      assetSrc: "assets/sample-video.mp4",
+    },

Review Comment:
   The task preview sample map references `assets/sample-video.mp4`, but that 
file doesn't exist under `frontend/src/assets/` in this PR. This will render a 
broken video preview for video tasks in the property editor.



##########
frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.html:
##########
@@ -0,0 +1,61 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<div class="hf-audio-upload">
+  <div class="hf-audio-guidance">
+    Audio files are uploaded to temporary backend storage and referenced from 
the operator, so larger clips can be used
+    without bloating the workflow JSON.
+  </div>
+
+  <input
+    #fileInput
+    type="file"
+    accept="audio/*"
+    class="hf-audio-upload-input"
+    (change)="onFileSelected($event)" />

Review Comment:
   The audio upload UI leaves the file input enabled while an upload is in 
progress. Users can trigger additional change events while `isUploading` is 
true (the handler returns early), but the input still looks interactive and can 
be confusing. Disabling the input during upload also reduces the chance of 
weird edge-case states in the DOM file picker.



##########
frontend/src/app/workspace/component/hugging-face/hugging-face.component.ts:
##########
@@ -0,0 +1,648 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Component, OnInit, OnDestroy, ChangeDetectorRef } from 
"@angular/core";
+import { CommonModule } from "@angular/common";
+import { FormsModule } from "@angular/forms";
+import { FieldType, FieldTypeConfig, FormlyModule } from "@ngx-formly/core";
+import { HttpClient } from "@angular/common/http";
+import { NzSelectModule } from "ng-zorro-antd/select";
+import { NzInputModule } from "ng-zorro-antd/input";
+import { NzSpinModule } from "ng-zorro-antd/spin";
+import { NzButtonModule } from "ng-zorro-antd/button";
+import { NzIconModule } from "ng-zorro-antd/icon";
+import { AppSettings } from "../../../common/app-setting";
+import { Subject, Subscription } from "rxjs";
+import { debounceTime, finalize, switchMap, takeUntil } from "rxjs/operators";
+
+export interface HuggingFaceModelOption {
+  id: string;
+  label: string;
+  pipeline_tag?: string;
+  downloads?: number;
+  likes?: number;
+}
+
+export interface HuggingFaceTaskOption {
+  tag: string;
+  label: string;
+}
+
+// ── Static fallback task list (used when the dynamic fetch fails) ──
+export const STATIC_TASK_OPTIONS: HuggingFaceTaskOption[] = [
+  { tag: "text-generation", label: "Text Generation" },
+  { tag: "automatic-speech-recognition", label: "Automatic Speech Recognition" 
},
+  { tag: "audio-classification", label: "Audio Classification" },
+  { tag: "text-classification", label: "Text Classification" },
+  { tag: "text-to-speech", label: "Text to Speech" },
+  { tag: "token-classification", label: "Token Classification" },
+  { tag: "question-answering", label: "Question Answering" },
+  { tag: "table-question-answering", label: "Table Question Answering" },
+  { tag: "zero-shot-classification", label: "Zero-Shot Classification" },
+  { tag: "translation", label: "Translation" },
+  { tag: "summarization", label: "Summarization" },
+  { tag: "feature-extraction", label: "Feature Extraction" },
+  { tag: "fill-mask", label: "Fill-Mask" },
+  { tag: "sentence-similarity", label: "Sentence Similarity" },
+  { tag: "text-ranking", label: "Text Ranking" },
+  { tag: "image-classification", label: "Image Classification" },
+  { tag: "object-detection", label: "Object Detection" },
+  { tag: "image-segmentation", label: "Image Segmentation" },
+  { tag: "image-to-text", label: "Image to Text" },
+  { tag: "visual-question-answering", label: "Visual Question Answering" },
+  { tag: "document-question-answering", label: "Document Question Answering" },
+  { tag: "zero-shot-image-classification", label: "Zero-Shot Image 
Classification" },
+];
+
+const PAGE_SIZE = 50;
+
+const TRUNCATED_HEADER = "X-Texera-Truncated";
+
+// ── Module-level caches (reused across component instances) ──
+const allModelsByTag: Map<string, HuggingFaceModelOption[]> = new Map();
+const truncatedByTag: Set<string> = new Set();
+const inFlightByTag: Map<string, Subscription> = new Map();
+const errorByTag: Map<string, string> = new Map();
+
+let cachedTaskOptions: HuggingFaceTaskOption[] | null = null;
+let tasksFetchSubscription: Subscription | null = null;
+let tasksFetchError: string | null = null;
+
+/** Clear all cached data (useful for tests or manual invalidation). */
+export function invalidateHuggingFaceModelCache(): void {
+  allModelsByTag.clear();
+  truncatedByTag.clear();
+  errorByTag.clear();
+  inFlightByTag.forEach(sub => sub.unsubscribe());
+  inFlightByTag.clear();
+  cachedTaskOptions = null;
+  tasksFetchError = null;
+  tasksFetchSubscription?.unsubscribe();
+  tasksFetchSubscription = null;
+}
+
+@Component({
+  selector: "texera-hugging-face-model-select",
+  templateUrl: "./hugging-face.component.html",
+  styleUrls: ["hugging-face.component.scss"],
+  imports: [
+    CommonModule,
+    FormsModule,
+    NzSelectModule,
+    NzInputModule,
+    NzSpinModule,
+    NzButtonModule,
+    NzIconModule,
+    FormlyModule,
+  ],
+})
+export class HuggingFaceComponent extends FieldType<FieldTypeConfig> 
implements OnInit, OnDestroy {
+  private readonly taskScopedKeys = [
+    "modelId",
+    "promptColumn",
+    "imageInput",
+    "audioInput",
+    "inputImageColumn",
+    "inputAudioColumn",
+    "candidateLabels",
+    "sentencesColumn",
+    "contextColumn",
+    "systemPrompt",
+    "maxNewTokens",
+    "temperature",
+  ] as const;
+  private readonly taskStateByTag = new Map<string, Partial<Record<(typeof 
this.taskScopedKeys)[number], unknown>>>();
+  // ── Task state ──
+  taskOptions: HuggingFaceTaskOption[] = cachedTaskOptions ?? 
STATIC_TASK_OPTIONS;
+  selectedTaskTag = "text-generation";
+  tasksLoading = false;
+  tasksError: string | null = null;
+
+  // ── All models for the current task (fetched once from backend, cached) ──
+  private allModels: HuggingFaceModelOption[] = [];
+
+  // ── Displayed state ──
+  pagedModels: HuggingFaceModelOption[] = [];
+  currentPage = 0;
+  totalPages = 0;
+
+  loading = false;
+  errorMessage: string | null = null;
+
+  // ── Truncation notice ──
+  truncated = false;
+
+  // ── Search state ──
+  searchText = "";
+  searchLoading = false;
+  private filteredModels: HuggingFaceModelOption[] | null = null;
+  private readonly searchSubject$ = new Subject<string>();
+  private searchSubscription: Subscription | null = null;
+
+  private readonly destroy$ = new Subject<void>();
+  private subscription: Subscription | null = null;
+  private taskPollInterval: ReturnType<typeof setInterval> | null = null;
+  private modelPollInterval: ReturnType<typeof setInterval> | null = null;
+  private initTimeout: ReturnType<typeof setTimeout> | null = null;
+
+  constructor(
+    private http: HttpClient,
+    private cdr: ChangeDetectorRef
+  ) {
+    super();
+  }
+
+  ngOnInit(): void {
+    const savedTag = this.getCurrentTaskTag();
+    this.selectedTaskTag = savedTag ?? this.selectedTaskTag;
+    this.syncTaskSelection(this.selectedTaskTag, false);
+    this.loadTasks();
+    this.loadAllModels();
+    this.setupServerSearch();
+    // Formly can attach sibling controls after this field initializes.
+    // Re-sync once the control tree settles so a fresh operator starts in a 
valid task state.
+    this.initTimeout = setTimeout(
+      () => this.syncTaskSelection(this.getCurrentTaskTag() ?? 
this.selectedTaskTag, false),
+      0
+    );
+  }
+
+  ngOnDestroy(): void {
+    this.destroy$.next();
+    this.destroy$.complete();
+    this.subscription?.unsubscribe();
+    this.searchSubscription?.unsubscribe();
+    this.searchSubject$.complete();
+    if (this.taskPollInterval !== null) {
+      clearInterval(this.taskPollInterval);
+    }
+    if (this.modelPollInterval !== null) {
+      clearInterval(this.modelPollInterval);
+    }
+    if (this.initTimeout !== null) {
+      clearTimeout(this.initTimeout);
+    }
+  }
+
+  // ── Task loading ──
+
+  /**
+   * Fetch available pipeline tags from the backend, which proxies 
HuggingFace's /api/tasks.
+   * Falls back to STATIC_TASK_OPTIONS if the fetch fails.
+   */
+  private loadTasks(): void {
+    // Already fetched and cached
+    if (cachedTaskOptions !== null) {
+      this.taskOptions = cachedTaskOptions;
+      return;
+    }
+
+    // Previous fetch errored — show static list, don't retry automatically
+    if (tasksFetchError !== null) {
+      this.tasksError = tasksFetchError;
+      this.taskOptions = STATIC_TASK_OPTIONS;
+      return;
+    }
+
+    // Another component instance already has a fetch in flight — wait for it
+    if (tasksFetchSubscription !== null) {
+      this.tasksLoading = true;
+      // Poll for completion (the module-level cache will be set when done)
+      this.taskPollInterval = setInterval(() => {
+        if (cachedTaskOptions !== null || tasksFetchError !== null) {
+          clearInterval(this.taskPollInterval!);
+          this.taskPollInterval = null;
+          this.tasksLoading = false;
+          this.taskOptions = cachedTaskOptions ?? STATIC_TASK_OPTIONS;
+          if (tasksFetchError) this.tasksError = tasksFetchError;
+          this.cdr.detectChanges();
+        }
+      }, 200);
+      return;
+    }

Review Comment:
   `loadTasks()` uses a polling `setInterval` when a module-level fetch is 
already in-flight, but it only stops when `cachedTaskOptions` or 
`tasksFetchError` is set. If the in-flight subscription is canceled (e.g., the 
instance that started it is destroyed before next/error), `finalize` clears 
`tasksFetchSubscription` without setting either cache value, causing this 
polling interval to run forever in other instances (CPU leak / tasks dropdown 
stuck loading). Handle the "guard cleared without result" case by stopping the 
poll and starting a fresh fetch from the current instance.



##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/ImageTaskCodegen.scala:
##########
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.huggingFace.codegen
+
+/**
+  * Codegen for the Hugging Face image-pipeline task family.
+  *
+  * Splits into two sub-families:
+  *  - "image-only" tasks send raw image bytes as the request body and don't
+  *    consume the prompt column: image-classification, object-detection,
+  *    image-segmentation, image-to-text.
+  *  - "image + prompt" tasks bundle a base64 image and a text prompt in a
+  *    JSON payload: visual-question-answering, document-question-answering,
+  *    zero-shot-image-classification, image-text-to-text, image-to-image.
+  *
+  * Per-row `current_image_bytes` is resolved upstream in
+  * [[PythonCodegenBase]]'s `process_table` (either from the operator's
+  * uploaded image or from `INPUT_IMAGE_COLUMN`). The image helpers
+  * (`_read_image_input`, `_compress_image_bytes`, `_image_input_as_base64`,
+  * `_read_binary_value`, `_looks_like_html`, `_html_to_image_bytes`,
+  * `_extract_json_arg`) live in PythonCodegenBase alongside the per-task
+  * tuples (`image_only_tasks`, `image_prompt_tasks`, `image_tasks`).
+  */
+object ImageTaskCodegen extends TaskCodegen {
+
+  /** Primary key for registration; the dispatcher maps every task in
+    * [[tasks]] to this codegen.
+    */
+  override val task: String = "image-classification"
+
+  /** All HF tasks routed through this codegen. */
+  override val tasks: Set[String] = Set(
+    // image-only
+    "image-classification",
+    "object-detection",
+    "image-segmentation",
+    "image-to-text",
+    // image + prompt
+    "visual-question-answering",
+    "document-question-answering",
+    "zero-shot-image-classification",
+    "image-text-to-text",
+    "image-to-image"
+  )
+
+  override def payloadPython(ctx: CodegenContext): String =
+    """            if task in image_only_tasks:
+      |                payload = current_image_bytes
+      |                use_raw_binary_body = True
+      |                raw_binary_headers = image_headers
+      |            elif task in ("visual-question-answering", 
"document-question-answering"):
+      |                payload = {
+      |                    "inputs": {
+      |                        "image": 
self._image_input_as_base64(current_image_bytes),
+      |                        "question": prompt_value,
+      |                    }
+      |                }
+      |            elif task == "image-text-to-text":
+      |                img_b64 = 
self._image_input_as_base64(current_image_bytes)
+      |                payload = {
+      |                    "model": self.MODEL_ID,
+      |                    "messages": [{
+      |                        "role": "user",
+      |                        "content": [
+      |                            {"type": "image_url", "image_url": {"url": 
f"data:image/png;base64,{img_b64}"}},
+      |                            {"type": "text", "text": prompt_value if 
prompt_value else "Describe this image."},
+      |                        ],
+      |                    }],
+      |                    "max_tokens": self.MAX_NEW_TOKENS,
+      |                }
+      |            elif task == "image-to-image":
+      |                payload = current_image_bytes
+      |                use_raw_binary_body = True
+      |                raw_binary_headers = image_headers
+      |            elif task == "zero-shot-image-classification":
+      |                # Zero-shot requires the caller to supply candidate 
labels.
+      |                # We reuse the prompt column as a comma-separated label 
list so
+      |                # the task is shippable without a dedicated operator 
field.
+      |                # TODO: replace with a first-class `candidateLabels` 
field once
+      |                # the property panel supports task-specific inputs.
+      |                #
+      |                # Fail fast if usable labels can't be derived. Both 
modes lead to
+      |                # a meaningless inference call:
+      |                #   1. Empty prompt column          -> labels = []
+      |                #      The HF API rejects candidate_labels: [] with an 
opaque 400.
+      |                #   2. Missing prompt column        -> upstream sets 
prompt_value
+      |                #      to the fallback "What is shown in this image?", 
which has
+      |                #      no comma, so labels collapses to a single 
nonsense entry.
+      |                # Zero-shot classification needs >= 2 candidate labels 
to be
+      |                # meaningful — surface a configuration error in both 
cases.
+      |                labels = [s.strip() for s in prompt_value.split(",") if 
s.strip()]
+      |                if len(labels) < 2:
+      |                    raise ValueError(
+      |                        "zero-shot-image-classification requires at 
least 2 candidate "
+      |                        "labels: provide a comma-separated list in the 
prompt column."
+      |                    )

Review Comment:
   For `zero-shot-image-classification`, the generated Python currently derives 
`candidate_labels` from `prompt_value` (prompt column). But the operator now 
has a dedicated `candidateLabels` property (`self.CANDIDATE_LABELS`), and the 
frontend shows that field for this task. As a result, the UI's Candidate Labels 
input is ignored at runtime. Prefer `self.CANDIDATE_LABELS` (with a fallback to 
`prompt_value` for backward compatibility).



##########
frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts:
##########
@@ -237,6 +507,19 @@ export class OperatorPropertyEditFrameComponent implements 
OnInit, OnChanges, On
       });
   }
 
+  private isHuggingFaceOperator(): boolean {
+    if (!this.currentOperatorId) return false;
+    const operator = 
this.workflowActionService.getTexeraGraph().getOperator(this.currentOperatorId);
+    return operator.operatorType === "HuggingFace";
+  }

Review Comment:
   `isHuggingFaceOperator()` calls `getOperator()` directly, which throws if 
the operator ID is no longer present in the graph (e.g., operator deleted while 
the property panel is still rendering). Since this is used by a template getter 
(`huggingFaceTaskPreview`), an exception here can break change detection and 
the property editor UI. Use `hasOperator()` (or a try/catch) before calling 
`getOperator()`.



##########
frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.html:
##########
@@ -0,0 +1,61 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<div class="hf-audio-upload">
+  <div class="hf-audio-guidance">
+    Audio files are uploaded to temporary backend storage and referenced from 
the operator, so larger clips can be used
+    without bloating the workflow JSON.
+  </div>
+
+  <input
+    #fileInput
+    type="file"
+    accept="audio/*"
+    class="hf-audio-upload-input"
+    (change)="onFileSelected($event)" />
+
+  <div
+    *ngIf="previewSrc"
+    class="hf-audio-preview">
+    <audio
+      controls
+      [src]="previewSrc"></audio>
+    <div class="hf-audio-meta">
+      <span>{{ fileName || "Selected audio" }}</span>
+      <span
+        *ngIf="isUploading"
+        class="hf-audio-status"
+        >Uploading...</span
+      >
+      <button
+        nz-button
+        nzSize="small"
+        type="button"
+        (click)="clearAudio(fileInput)">
+        Clear
+      </button>

Review Comment:
   While an upload is in progress, the Clear button stays enabled. Clicking it 
can't actually cancel the in-flight request (it will still resolve), so it can 
lead to confusing UX where the field is cleared and then later repopulated when 
the upload finishes. Disabling Clear while `isUploading` avoids this race 
without needing request cancellation plumbing.



##########
frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts:
##########
@@ -541,6 +824,203 @@ export class OperatorPropertyEditFrameComponent 
implements OnInit, OnChanges, On
         mappedField.type = "inputautocomplete";
       }
 
+      if (mappedField.key === "huggingFaceModel") {
+        mappedField.type = "huggingface";
+      }
+
+      if (mappedField.key === "modelId" && 
this.currentOperatorSchema?.operatorType === "HuggingFace") {
+        mappedField.type = "huggingface";
+      }
+
+      if (mappedField.key === "task" && 
this.currentOperatorSchema?.operatorType === "HuggingFace") {
+        mappedField.hide = true;
+      }
+
+      // ── Dynamic field visibility for HuggingFace based on selected task ──
+      if (this.currentOperatorSchema?.operatorType === "HuggingFace" && typeof 
mappedField.key === "string") {
+        const hfKey = mappedField.key;
+        const imageOnlyTasks = ["image-classification", "object-detection", 
"image-segmentation", "image-to-text"];
+        const imageInputTasks = [
+          ...imageOnlyTasks,
+          "visual-question-answering",
+          "document-question-answering",
+          "zero-shot-image-classification",
+          "image-text-to-text",
+          "image-to-image",
+        ];
+        const audioInputTasks = ["automatic-speech-recognition", 
"audio-classification"];
+        const promptRequiredTasks = [
+          "text-generation",
+          "text-classification",
+          "token-classification",
+          "question-answering",
+          "table-question-answering",
+          "zero-shot-classification",
+          "translation",
+          "summarization",
+          "feature-extraction",
+          "fill-mask",
+          "sentence-similarity",
+          "text-ranking",
+          "visual-question-answering",
+          "document-question-answering",
+          "zero-shot-image-classification",
+        ];
+        const getSelectedTask = (field: FormlyFieldConfig): string | undefined 
=> {
+          const fromForm = field.form?.get("task")?.value ?? 
field.formControl?.parent?.get("task")?.value;
+          if (typeof fromForm === "string" && fromForm.trim().length > 0) {
+            return fromForm;
+          }
+          const fromModel = field.model?.task;
+          if (typeof fromModel === "string" && fromModel.trim().length > 0) {
+            return fromModel;
+          }
+          return undefined;
+        };
+        if (hfKey === "imageInput") {
+          mappedField.type = "huggingface-image-upload";
+          mappedField.expressions = {
+            ...mappedField.expressions,
+            hide: (field: FormlyFieldConfig) => {
+              const t = getSelectedTask(field);
+              return t === undefined || !imageInputTasks.includes(t);
+            },
+          };
+          mappedField.validators = {
+            ...mappedField.validators,
+            requiredImageInput: {
+              expression: (_control: AbstractControl, field: 
FormlyFieldConfig) => {
+                const t = getSelectedTask(field);
+                if (t === undefined || !imageInputTasks.includes(t)) {
+                  return true;
+                }
+                const inputImageCol = field.model?.inputImageColumn;
+                if (typeof inputImageCol === "string" && 
inputImageCol.trim().length > 0) {
+                  return true;
+                }
+                const value = field.formControl?.value ?? 
field.model?.imageInput;
+                return typeof value === "string" && value.trim().length > 0;
+              },
+              message: () => "Upload an image or select an Input Image Column 
for this task.",
+            },
+          };

Review Comment:
   The new HuggingFace task-aware field visibility and the custom validators 
(`requiredImageInput`, `requiredAudioInput`, `requiredPromptColumn`) introduce 
non-trivial conditional behavior, but the added specs only cover 
`huggingFaceTaskPreview`. Adding focused unit tests for these hide/requirement 
rules would help prevent regressions (e.g., image/audio tasks accidentally 
requiring `promptColumn`, or validators not turning off when the task changes).



##########
frontend/src/app/workspace/component/hugging-face/hugging-face.component.ts:
##########
@@ -0,0 +1,648 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { Component, OnInit, OnDestroy, ChangeDetectorRef } from 
"@angular/core";
+import { CommonModule } from "@angular/common";
+import { FormsModule } from "@angular/forms";
+import { FieldType, FieldTypeConfig, FormlyModule } from "@ngx-formly/core";
+import { HttpClient } from "@angular/common/http";
+import { NzSelectModule } from "ng-zorro-antd/select";
+import { NzInputModule } from "ng-zorro-antd/input";
+import { NzSpinModule } from "ng-zorro-antd/spin";
+import { NzButtonModule } from "ng-zorro-antd/button";
+import { NzIconModule } from "ng-zorro-antd/icon";
+import { AppSettings } from "../../../common/app-setting";
+import { Subject, Subscription } from "rxjs";
+import { debounceTime, finalize, switchMap, takeUntil } from "rxjs/operators";
+
+export interface HuggingFaceModelOption {
+  id: string;
+  label: string;
+  pipeline_tag?: string;
+  downloads?: number;
+  likes?: number;
+}
+
+export interface HuggingFaceTaskOption {
+  tag: string;
+  label: string;
+}
+
+// ── Static fallback task list (used when the dynamic fetch fails) ──
+export const STATIC_TASK_OPTIONS: HuggingFaceTaskOption[] = [
+  { tag: "text-generation", label: "Text Generation" },
+  { tag: "automatic-speech-recognition", label: "Automatic Speech Recognition" 
},
+  { tag: "audio-classification", label: "Audio Classification" },
+  { tag: "text-classification", label: "Text Classification" },
+  { tag: "text-to-speech", label: "Text to Speech" },
+  { tag: "token-classification", label: "Token Classification" },
+  { tag: "question-answering", label: "Question Answering" },
+  { tag: "table-question-answering", label: "Table Question Answering" },
+  { tag: "zero-shot-classification", label: "Zero-Shot Classification" },
+  { tag: "translation", label: "Translation" },
+  { tag: "summarization", label: "Summarization" },
+  { tag: "feature-extraction", label: "Feature Extraction" },
+  { tag: "fill-mask", label: "Fill-Mask" },
+  { tag: "sentence-similarity", label: "Sentence Similarity" },
+  { tag: "text-ranking", label: "Text Ranking" },
+  { tag: "image-classification", label: "Image Classification" },
+  { tag: "object-detection", label: "Object Detection" },
+  { tag: "image-segmentation", label: "Image Segmentation" },
+  { tag: "image-to-text", label: "Image to Text" },
+  { tag: "visual-question-answering", label: "Visual Question Answering" },
+  { tag: "document-question-answering", label: "Document Question Answering" },
+  { tag: "zero-shot-image-classification", label: "Zero-Shot Image 
Classification" },
+];
+
+const PAGE_SIZE = 50;
+
+const TRUNCATED_HEADER = "X-Texera-Truncated";
+
+// ── Module-level caches (reused across component instances) ──
+const allModelsByTag: Map<string, HuggingFaceModelOption[]> = new Map();
+const truncatedByTag: Set<string> = new Set();
+const inFlightByTag: Map<string, Subscription> = new Map();
+const errorByTag: Map<string, string> = new Map();
+
+let cachedTaskOptions: HuggingFaceTaskOption[] | null = null;
+let tasksFetchSubscription: Subscription | null = null;
+let tasksFetchError: string | null = null;
+
+/** Clear all cached data (useful for tests or manual invalidation). */
+export function invalidateHuggingFaceModelCache(): void {
+  allModelsByTag.clear();
+  truncatedByTag.clear();
+  errorByTag.clear();
+  inFlightByTag.forEach(sub => sub.unsubscribe());
+  inFlightByTag.clear();
+  cachedTaskOptions = null;
+  tasksFetchError = null;
+  tasksFetchSubscription?.unsubscribe();
+  tasksFetchSubscription = null;
+}
+
+@Component({
+  selector: "texera-hugging-face-model-select",
+  templateUrl: "./hugging-face.component.html",
+  styleUrls: ["hugging-face.component.scss"],
+  imports: [
+    CommonModule,
+    FormsModule,
+    NzSelectModule,
+    NzInputModule,
+    NzSpinModule,
+    NzButtonModule,
+    NzIconModule,
+    FormlyModule,
+  ],
+})
+export class HuggingFaceComponent extends FieldType<FieldTypeConfig> 
implements OnInit, OnDestroy {
+  private readonly taskScopedKeys = [
+    "modelId",
+    "promptColumn",
+    "imageInput",
+    "audioInput",
+    "inputImageColumn",
+    "inputAudioColumn",
+    "candidateLabels",
+    "sentencesColumn",
+    "contextColumn",
+    "systemPrompt",
+    "maxNewTokens",
+    "temperature",
+  ] as const;
+  private readonly taskStateByTag = new Map<string, Partial<Record<(typeof 
this.taskScopedKeys)[number], unknown>>>();
+  // ── Task state ──
+  taskOptions: HuggingFaceTaskOption[] = cachedTaskOptions ?? 
STATIC_TASK_OPTIONS;
+  selectedTaskTag = "text-generation";
+  tasksLoading = false;
+  tasksError: string | null = null;
+
+  // ── All models for the current task (fetched once from backend, cached) ──
+  private allModels: HuggingFaceModelOption[] = [];
+
+  // ── Displayed state ──
+  pagedModels: HuggingFaceModelOption[] = [];
+  currentPage = 0;
+  totalPages = 0;
+
+  loading = false;
+  errorMessage: string | null = null;
+
+  // ── Truncation notice ──
+  truncated = false;
+
+  // ── Search state ──
+  searchText = "";
+  searchLoading = false;
+  private filteredModels: HuggingFaceModelOption[] | null = null;
+  private readonly searchSubject$ = new Subject<string>();
+  private searchSubscription: Subscription | null = null;
+
+  private readonly destroy$ = new Subject<void>();
+  private subscription: Subscription | null = null;
+  private taskPollInterval: ReturnType<typeof setInterval> | null = null;
+  private modelPollInterval: ReturnType<typeof setInterval> | null = null;
+  private initTimeout: ReturnType<typeof setTimeout> | null = null;
+
+  constructor(
+    private http: HttpClient,
+    private cdr: ChangeDetectorRef
+  ) {
+    super();
+  }
+
+  ngOnInit(): void {
+    const savedTag = this.getCurrentTaskTag();
+    this.selectedTaskTag = savedTag ?? this.selectedTaskTag;
+    this.syncTaskSelection(this.selectedTaskTag, false);
+    this.loadTasks();
+    this.loadAllModels();
+    this.setupServerSearch();
+    // Formly can attach sibling controls after this field initializes.
+    // Re-sync once the control tree settles so a fresh operator starts in a 
valid task state.
+    this.initTimeout = setTimeout(
+      () => this.syncTaskSelection(this.getCurrentTaskTag() ?? 
this.selectedTaskTag, false),
+      0
+    );
+  }
+
+  ngOnDestroy(): void {
+    this.destroy$.next();
+    this.destroy$.complete();
+    this.subscription?.unsubscribe();
+    this.searchSubscription?.unsubscribe();
+    this.searchSubject$.complete();
+    if (this.taskPollInterval !== null) {
+      clearInterval(this.taskPollInterval);
+    }
+    if (this.modelPollInterval !== null) {
+      clearInterval(this.modelPollInterval);
+    }
+    if (this.initTimeout !== null) {
+      clearTimeout(this.initTimeout);
+    }
+  }
+
+  // ── Task loading ──
+
+  /**
+   * Fetch available pipeline tags from the backend, which proxies 
HuggingFace's /api/tasks.
+   * Falls back to STATIC_TASK_OPTIONS if the fetch fails.
+   */
+  private loadTasks(): void {
+    // Already fetched and cached
+    if (cachedTaskOptions !== null) {
+      this.taskOptions = cachedTaskOptions;
+      return;
+    }
+
+    // Previous fetch errored — show static list, don't retry automatically
+    if (tasksFetchError !== null) {
+      this.tasksError = tasksFetchError;
+      this.taskOptions = STATIC_TASK_OPTIONS;
+      return;
+    }
+
+    // Another component instance already has a fetch in flight — wait for it
+    if (tasksFetchSubscription !== null) {
+      this.tasksLoading = true;
+      // Poll for completion (the module-level cache will be set when done)
+      this.taskPollInterval = setInterval(() => {
+        if (cachedTaskOptions !== null || tasksFetchError !== null) {
+          clearInterval(this.taskPollInterval!);
+          this.taskPollInterval = null;
+          this.tasksLoading = false;
+          this.taskOptions = cachedTaskOptions ?? STATIC_TASK_OPTIONS;
+          if (tasksFetchError) this.tasksError = tasksFetchError;
+          this.cdr.detectChanges();
+        }
+      }, 200);
+      return;
+    }
+
+    this.tasksLoading = true;
+    this.tasksError = null;
+    this.cdr.detectChanges();
+
+    tasksFetchSubscription = this.http
+      
.get<HuggingFaceTaskOption[]>(`${AppSettings.getApiEndpoint()}/huggingface/tasks`)
+      .pipe(
+        takeUntil(this.destroy$),
+        finalize(() => {
+          // If takeUntil fires before next/error, reset the module-level guard
+          // so the next component instance can start a fresh fetch.
+          if (cachedTaskOptions === null && tasksFetchError === null) {
+            tasksFetchSubscription = null;
+          }
+        })
+      )
+      .subscribe({
+        next: tasks => {
+          tasksFetchSubscription = null;
+          cachedTaskOptions = tasks.length > 0 ? tasks : STATIC_TASK_OPTIONS;
+          this.taskOptions = cachedTaskOptions;
+          this.tasksLoading = false;
+          this.cdr.detectChanges();
+        },
+        error: (err: unknown) => {
+          console.error("Failed to load HuggingFace tasks:", err);
+          tasksFetchSubscription = null;
+          tasksFetchError = "Could not load tasks from Hugging Face. Using 
default list.";
+          this.tasksError = tasksFetchError;
+          this.taskOptions = STATIC_TASK_OPTIONS;
+          this.tasksLoading = false;
+          this.cdr.detectChanges();
+        },
+      });
+  }
+
+  retryTasksLoad(): void {
+    tasksFetchError = null;
+    this.tasksError = null;
+    this.loadTasks();
+  }
+
+  // ── Task selection ──
+
+  onTaskSelected(tag: string): void {
+    const previousTask = this.getCurrentTaskTag() ?? this.selectedTaskTag;
+    this.snapshotTaskState(previousTask);
+    this.syncTaskSelection(tag, true);
+    this.restoreTaskState(tag);
+    this.searchText = "";
+    this.filteredModels = null;
+    this.loadAllModels();
+  }
+
+  // ── Data loading ──
+
+  /**
+   * Fetch ALL models for the selected task.
+   * The backend paginates through HF Hub internally and caches the result.
+   * The first request per task may be slow; subsequent requests are instant.
+   */
+  private loadAllModels(): void {
+    const tag = this.selectedTaskTag || "text-generation";
+
+    this.loading = false;
+    this.errorMessage = null;
+
+    // Fast path: cached on the frontend
+    if (allModelsByTag.has(tag)) {
+      this.allModels = allModelsByTag.get(tag)!;
+      this.truncated = truncatedByTag.has(tag);
+      this.goToPage(0);
+      return;
+    }
+
+    // Previous error
+    if (errorByTag.has(tag)) {
+      this.errorMessage = errorByTag.get(tag)!;
+      this.allModels = [];
+      this.pagedModels = [];
+      this.totalPages = 0;
+      return;
+    }
+
+    // Another instance is already fetching this task — wait for it
+    if (inFlightByTag.has(tag)) {
+      this.loading = true;
+      this.modelPollInterval = setInterval(() => {
+        if (allModelsByTag.has(tag) || errorByTag.has(tag)) {
+          clearInterval(this.modelPollInterval!);
+          this.modelPollInterval = null;
+          this.loading = false;
+          if (allModelsByTag.has(tag)) {
+            this.allModels = allModelsByTag.get(tag)!;
+            this.truncated = truncatedByTag.has(tag);
+            this.goToPage(0);
+          } else {
+            this.errorMessage = errorByTag.get(tag)!;
+            this.cdr.detectChanges();
+          }
+        }
+      }, 200);
+      return;
+    }

Review Comment:
   `loadAllModels()` has the same polling pattern as `loadTasks()`: when 
another instance is fetching the same tag, it starts a `setInterval` that only 
terminates when `allModelsByTag` or `errorByTag` is populated. If the in-flight 
subscription is canceled (the instance that started it is destroyed), 
`finalize` deletes the guard entry without setting either map, so this interval 
will poll forever and the UI stays stuck loading. Stop polling when the guard 
disappears and re-trigger the fetch from this instance.



##########
frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts:
##########
@@ -167,6 +170,273 @@ export class OperatorPropertyEditFrameComponent 
implements OnInit, OnChanges, On
   // used to tear down subscriptions that takeUntil(teardownObservable)
   private teardownObservable: Subject<void> = new Subject();
 
+  readonly huggingFaceTaskPreviewSamples: Record<
+    string,
+    {
+      kind: "image" | "video" | "audio" | "text";
+      inputLabel?: string;
+      outputLabel?: string;
+      title?: string;
+      body?: string;
+      outputBody?: string;
+      pills?: string[];
+      assetSrc?: string;
+    }
+  > = {
+    "text-to-image": {
+      kind: "image",
+      inputLabel: "Text prompt",
+      outputLabel: "Generated image",
+      title: "Comic-style city action scene",
+      body: "Prompt becomes a generated image preview.",
+      assetSrc: "assets/sample-image.png",
+    },
+    "image-to-image": {
+      kind: "image",
+      inputLabel: "Source image",
+      outputLabel: "Edited image",
+      title: "Image transformation preview",
+      body: "Image input produces a modified image result.",
+      assetSrc: "assets/sample-image.png",
+    },
+    "text-to-video": {
+      kind: "video",
+      inputLabel: "Text prompt",
+      outputLabel: "Generated video",
+      title: "Prompt-based motion preview",
+      body: "Prompt becomes a generated video clip.",
+      assetSrc: "assets/sample-video.mp4",
+    },
+    "image-to-video": {
+      kind: "video",
+      inputLabel: "Source image",
+      outputLabel: "Animated clip",
+      title: "Image animation preview",
+      body: "Image input becomes a short generated video.",
+      assetSrc: "assets/sample-video.mp4",
+    },
+    "text-to-speech": {
+      kind: "audio",
+      inputLabel: "Text input",
+      outputLabel: "Spoken audio",
+      title: "Speech synthesis preview",
+      body: "Text becomes an audio clip the user can play back.",
+      assetSrc: "assets/sample-audio.wav",
+    },

Review Comment:
   The task preview sample map references `assets/sample-audio.wav`, but that 
file doesn't exist under `frontend/src/assets/` in this PR. This will render a 
broken audio preview for audio tasks in the property editor.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to