Croway commented on code in PR #21449: URL: https://github.com/apache/camel/pull/21449#discussion_r2817233755
########## components/camel-ai/camel-huggingface/src/main/java/org/apache/camel/component/huggingface/tasks/ChatPredictor.java: ########## @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.component.huggingface.tasks; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import ai.djl.modality.Input; +import ai.djl.modality.Output; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.camel.Exchange; +import org.apache.camel.RuntimeCamelException; +import org.apache.camel.component.huggingface.HuggingFaceConfiguration; +import org.apache.camel.component.huggingface.HuggingFaceConstants; + +/** + * Predictor for the CHAT task, handling conversational LLM inference with automatic history management. + * + * <p> + * This predictor manages multi-turn chat conversations using Hugging Face's text-generation pipeline for instruct-tuned + * models. It supports automatic history retention in-memory (keyed by a header for multi-user support), system prompts, + * and configurable roles. The predictor is designed to be model-agnostic, allowing seamless swapping of compatible + * chat-tuned LLMs via the modelId configuration. + * </p> + * + * <p> + * <b>Input Contract (Camel Message Body):</b> + * </p> + * <ul> + * <li>{@code String}: The user's message or prompt for the current turn.</li> + * </ul> + * + * <p> + * <b>Output Contract (Camel Message Body):</b> + * </p> + * <ul> + * <li>{@code String}: The LLM's generated response (extracted from the last "assistant" message).</li> + * </ul> + * + * <p> + * <b>Camel Headers Used:</b> + * </p> + * <ul> + * <li>{@code CamelChatMemoryId} (optional, default "default"): String key for conversation history (e.g., user session + * ID).</li> + * <li>{@code CamelChatClearHistory} (optional): Boolean to clear the history for the current memory ID.</li> + * <li>{@code HuggingFaceConstants.OUTPUT}: The same generated response string (for convenience).</li> + * </ul> + * + * <p> + * <b>Relevant HuggingFaceConfiguration Properties:</b> + * </p> + * <ul> + * <li>{@code modelId}: Required String, e.g., "mistralai/Mistral-7B-Instruct-v0.2" or + * "microsoft/Phi-3-mini-4k-instruct". Use chat-tuned models for best results.</li> + * <li>{@code revision}: Optional String, model revision (default "main").</li> + * <li>{@code device}: Optional String, inference device (default "auto" — "cpu" for no GPU).</li> + * <li>{@code maxTokens}: Optional int, max new tokens in response (default 512).</li> + * <li>{@code temperature}: Optional float, sampling temperature (default 1.0f; set to 0 for deterministic).</li> + * <li>{@code authToken}: Optional String, HF API token for gated models.</li> + * <li>{@code userRole}: Optional String, role for user messages (default "user").</li> + * <li>{@code systemPrompt}: Optional String, initial system message (appended if history is empty).</li> + * </ul> + * + * <p> + * <b>Python Model Input/Output Expectations:</b> + * </p> + * <ul> + * <li><b>Input</b>: JSON string of a list of dicts (messages): [{ "role": "system/user/assistant", "content": "text" + * }]. Models must support chat templates or multi-turn formats.</li> + * <li><b>Output</b>: JSON from HF pipeline: [{"generated_text": [full history list of dicts]}]. The predictor extracts + * the last assistant content.</li> + * </ul> + * <p> + * To ensure model interchangeability, use instruct-tuned models with compatible chat formats (e.g., Llama-3, + * Mistral-Instruct, Phi-3). + * </p> + */ +public class ChatPredictor extends AbstractTaskPredictor { + + private final ObjectMapper objectMapper = new ObjectMapper(); + + // In-memory chat history (key = memoryId, value = JSON string of list of dicts) + private final Map<String, String> chatHistories = new ConcurrentHashMap<>(); + + public ChatPredictor(HuggingFaceConfiguration config) { + super(config); + } + + @Override + protected String getPythonScript() { + String doSample = config.getTemperature() > 0 ? "True" : "False"; + float temperature = config.getTemperature() > 0 ? config.getTemperature() : 1.0f; + String tokenClause = config.getAuthToken() != null ? ", token='" + config.getAuthToken() + "'" : ""; + return """ Review Comment: @johnpoth this not a blocker, but what about moving the python code under the resources folder in its .py file? so that it can be retrieved and returned by these `getPythonScript()` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
