This is an automated email from the ASF dual-hosted git repository.

sxnan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/flink-agents.git


The following commit(s) were added to refs/heads/main by this push:
     new 834c07bf [python][integrations] Add Tongyi embedding model integration 
(#611)
834c07bf is described below

commit 834c07bf7c4daec056556cc0f8f8cc1ebdf4f394
Author: Xuannan <[email protected]>
AuthorDate: Fri Apr 10 20:12:03 2026 +0800

    [python][integrations] Add Tongyi embedding model integration (#611)
---
 .../flink/agents/api/resource/ResourceName.java    |   6 +
 docs/content/docs/development/embedding_models.md  |  68 +++++++
 python/flink_agents/api/resource.py                |   4 +
 .../tests/test_tongyi_embedding_model.py           | 222 +++++++++++++++++++++
 .../embedding_models/tongyi_embedding_model.py     | 182 +++++++++++++++++
 5 files changed, 482 insertions(+)

diff --git 
a/api/src/main/java/org/apache/flink/agents/api/resource/ResourceName.java 
b/api/src/main/java/org/apache/flink/agents/api/resource/ResourceName.java
index 1cbbfdf6..5ec16fcd 100644
--- a/api/src/main/java/org/apache/flink/agents/api/resource/ResourceName.java
+++ b/api/src/main/java/org/apache/flink/agents/api/resource/ResourceName.java
@@ -152,6 +152,12 @@ public final class ResourceName {
             public static final String OPENAI_SETUP =
                     
"flink_agents.integrations.embedding_models.openai_embedding_model.OpenAIEmbeddingModelSetup";
 
+            // Tongyi
+            public static final String TONGYI_CONNECTION =
+                    
"flink_agents.integrations.embedding_models.tongyi_embedding_model.TongyiEmbeddingModelConnection";
+            public static final String TONGYI_SETUP =
+                    
"flink_agents.integrations.embedding_models.tongyi_embedding_model.TongyiEmbeddingModelSetup";
+
             private Python() {}
         }
 
diff --git a/docs/content/docs/development/embedding_models.md 
b/docs/content/docs/development/embedding_models.md
index 4781e43a..634c5dd3 100644
--- a/docs/content/docs/development/embedding_models.md
+++ b/docs/content/docs/development/embedding_models.md
@@ -394,6 +394,74 @@ Current popular models include:
 Model availability and specifications may change. Always check the official 
OpenAI documentation for the latest information before implementing in 
production.
 {{< /hint >}}
 
+### Tongyi (DashScope)
+
+Tongyi provides cloud-based embedding models from Alibaba Cloud, with strong 
support for Chinese and English text.
+
+{{< hint info >}}
+Tongyi embedding models are currently supported in the Python API only. To use 
Tongyi from Java agents, see [Using Cross-Language 
Providers](#using-cross-language-providers).
+{{< /hint >}}
+
+#### Prerequisites
+
+1. Get an API key from [Alibaba Cloud 
DashScope](https://dashscope.console.aliyun.com/)
+
+#### Usage Example
+
+```python
+class MyAgent(Agent):
+
+    @embedding_model_connection
+    @staticmethod
+    def tongyi_connection() -> ResourceDescriptor:
+        return ResourceDescriptor(
+            clazz=ResourceName.EmbeddingModel.TONGYI_CONNECTION,
+            api_key="your-api-key-here",  # Or set DASHSCOPE_API_KEY env var
+            request_timeout=30.0
+        )
+
+    @embedding_model_setup
+    @staticmethod
+    def tongyi_embedding() -> ResourceDescriptor:
+        return ResourceDescriptor(
+            clazz=ResourceName.EmbeddingModel.TONGYI_SETUP,
+            connection="tongyi_connection",
+            model="text-embedding-v4",
+            text_type="query"
+        )
+```
+
+#### TongyiEmbeddingModelConnection Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `api_key` | str | `$DASHSCOPE_API_KEY` | DashScope API key for 
authentication |
+| `request_timeout` | float | `30.0` | HTTP request timeout in seconds |
+
+#### TongyiEmbeddingModelSetup Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `connection` | str | Required | Reference to connection method name |
+| `model` | str | `"text-embedding-v4"` | Embedding model name |
+| `text_type` | str | None | Input type: `"query"` or `"document"` |
+| `dimension` | int | None | Output vector dimensions (model-dependent) |
+| `additional_kwargs` | dict | `{}` | Additional DashScope API parameters |
+
+#### Available Models
+
+Visit the [DashScope Embedding Models 
documentation](https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-api-details)
 for the complete and up-to-date list of available embedding models.
+
+Some popular options include:
+- **text-embedding-v4** (default, recommended)
+- **text-embedding-v3**
+- **text-embedding-v2**
+- **text-embedding-v1**
+
+{{< hint warning >}}
+Model availability and specifications may change. Always check the official 
DashScope documentation for the latest information before implementing in 
production.
+{{< /hint >}}
+
 ## Using Cross-Language Providers
 
 Flink Agents supports cross-language embedding model integration, allowing you 
to use embedding models implemented in one language (Java or Python) from 
agents written in the other language. This is particularly useful when an 
embedding model provider is only available in one language (e.g., OpenAI 
embedding is currently Python-only).
diff --git a/python/flink_agents/api/resource.py 
b/python/flink_agents/api/resource.py
index 57d50667..a684202d 100644
--- a/python/flink_agents/api/resource.py
+++ b/python/flink_agents/api/resource.py
@@ -296,6 +296,10 @@ class ResourceName:
         OPENAI_CONNECTION = 
"flink_agents.integrations.embedding_models.openai_embedding_model.OpenAIEmbeddingModelConnection"
         OPENAI_SETUP = 
"flink_agents.integrations.embedding_models.openai_embedding_model.OpenAIEmbeddingModelSetup"
 
+        # Tongyi
+        TONGYI_CONNECTION = 
"flink_agents.integrations.embedding_models.tongyi_embedding_model.TongyiEmbeddingModelConnection"
+        TONGYI_SETUP = 
"flink_agents.integrations.embedding_models.tongyi_embedding_model.TongyiEmbeddingModelSetup"
+
         # Java Wrapper
         JAVA_WRAPPER_CONNECTION = 
"flink_agents.api.embedding_models.java_embedding_model.JavaEmbeddingModelConnection"
         JAVA_WRAPPER_SETUP = 
"flink_agents.api.embedding_models.java_embedding_model.JavaEmbeddingModelSetup"
diff --git 
a/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py
 
b/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py
new file mode 100644
index 00000000..59712efa
--- /dev/null
+++ 
b/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py
@@ -0,0 +1,222 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+import os
+from http import HTTPStatus
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+import pytest
+
+from flink_agents.api.resource import Resource, ResourceType
+from flink_agents.integrations.embedding_models.tongyi_embedding_model import (
+    TongyiEmbeddingModelConnection,
+    TongyiEmbeddingModelSetup,
+)
+
+test_model = os.environ.get("TONGYI_EMBEDDING_MODEL", "text-embedding-v4")
+api_key_available = "DASHSCOPE_API_KEY" in os.environ
+
+
[email protected](not api_key_available, reason="DashScope API key is not 
set")
+def test_tongyi_embedding_model() -> None:
+    """Test basic embedding functionality of TongyiEmbeddingModelConnection."""
+    connection = TongyiEmbeddingModelConnection(name="tongyi")
+
+    def get_resource(name: str, type: ResourceType) -> Resource:
+        if type == ResourceType.EMBEDDING_MODEL_CONNECTION:
+            return connection
+        else:
+            msg = f"Unknown resource type: {type}"
+            raise ValueError(msg)
+
+    embedding_model = TongyiEmbeddingModelSetup(
+        name="tongyi", model=test_model, connection="tongyi", 
get_resource=get_resource
+    )
+    embedding_model.open()
+
+    response = embedding_model.embed("The quality of the clothes is excellent, 
very beautiful, worth the wait, I like it and will buy here again")
+    assert response is not None
+    assert isinstance(response, list)
+    assert len(response) > 0
+    assert all(isinstance(x, float) for x in response)
+
+
[email protected](not api_key_available, reason="DashScope API key is not 
set")
+def test_tongyi_embedding_with_text_type() -> None:
+    """Test embedding with text_type parameter."""
+    connection = TongyiEmbeddingModelConnection(name="tongyi")
+
+    def get_resource(name: str, type: ResourceType) -> Resource:
+        if type == ResourceType.EMBEDDING_MODEL_CONNECTION:
+            return connection
+        else:
+            msg = f"Unknown resource type: {type}"
+            raise ValueError(msg)
+
+    embedding_model_query = TongyiEmbeddingModelSetup(
+        name="tongyi",
+        model=test_model,
+        connection="tongyi",
+        text_type="query",
+        get_resource=get_resource,
+    )
+    embedding_model_query.open()
+
+    response_query = embedding_model_query.embed("Hello, Flink Agent!")
+    assert response_query is not None
+    assert isinstance(response_query, list)
+    assert len(response_query) > 0
+
+    embedding_model_doc = TongyiEmbeddingModelSetup(
+        name="tongyi",
+        model=test_model,
+        connection="tongyi",
+        text_type="document",
+        get_resource=get_resource,
+    )
+    embedding_model_doc.open()
+
+    response_doc = embedding_model_doc.embed("Hello, Flink Agent!")
+    assert response_doc is not None
+    assert isinstance(response_doc, list)
+    assert len(response_doc) > 0
+
+
+def test_tongyi_embedding_mock(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Test embedding functionality with mocked DashScope API."""
+    mock_embedding = [0.1, 0.2, 0.3, 0.4, 0.5]
+
+    mocked_response = SimpleNamespace(
+        status_code=HTTPStatus.OK,
+        output={"embeddings": [{"embedding": mock_embedding}]},
+        message="Success",
+    )
+
+    mock_call = MagicMock(return_value=mocked_response)
+
+    monkeypatch.setattr(
+        
"flink_agents.integrations.embedding_models.tongyi_embedding_model.dashscope.TextEmbedding.call",
+        mock_call,
+    )
+
+    connection = TongyiEmbeddingModelConnection(
+        name="tongyi",
+        api_key="fake-key",
+    )
+
+    def get_resource(name: str, type: ResourceType) -> Resource:
+        if type == ResourceType.EMBEDDING_MODEL_CONNECTION:
+            return connection
+        else:
+            msg = f"Unknown resource type: {type}"
+            raise ValueError(msg)
+
+    embedding_model = TongyiEmbeddingModelSetup(
+        name="tongyi", model=test_model, connection="tongyi", 
get_resource=get_resource
+    )
+    embedding_model.open()
+
+    response = embedding_model.embed("Test text")
+
+    mock_call.assert_called_once()
+    assert response == mock_embedding
+    assert len(response) == 5
+
+
+def test_tongyi_embedding_batch_mock(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Test batch embedding functionality with mocked DashScope API."""
+    mock_embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
+
+    mocked_response = SimpleNamespace(
+        status_code=HTTPStatus.OK,
+        output={
+            "embeddings": [
+                {"embedding": mock_embeddings[0]},
+                {"embedding": mock_embeddings[1]},
+            ]
+        },
+        message="Success",
+    )
+
+    mock_call = MagicMock(return_value=mocked_response)
+
+    monkeypatch.setattr(
+        
"flink_agents.integrations.embedding_models.tongyi_embedding_model.dashscope.TextEmbedding.call",
+        mock_call,
+    )
+
+    connection = TongyiEmbeddingModelConnection(
+        name="tongyi",
+        api_key="fake-key",
+    )
+
+    def get_resource(name: str, type: ResourceType) -> Resource:
+        if type == ResourceType.EMBEDDING_MODEL_CONNECTION:
+            return connection
+        else:
+            msg = f"Unknown resource type: {type}"
+            raise ValueError(msg)
+
+    embedding_model = TongyiEmbeddingModelSetup(
+        name="tongyi", model=test_model, connection="tongyi", 
get_resource=get_resource
+    )
+    embedding_model.open()
+
+    response = embedding_model.embed(["Text one", "Text two"])
+
+    mock_call.assert_called_once()
+    assert response == mock_embeddings
+    assert len(response) == 2
+
+
+def test_tongyi_embedding_error_handling(monkeypatch: pytest.MonkeyPatch) -> 
None:
+    """Test error handling when API call fails."""
+    mocked_response = SimpleNamespace(
+        status_code=HTTPStatus.BAD_REQUEST,
+        message="Invalid API key",
+    )
+
+    mock_call = MagicMock(return_value=mocked_response)
+
+    monkeypatch.setattr(
+        
"flink_agents.integrations.embedding_models.tongyi_embedding_model.dashscope.TextEmbedding.call",
+        mock_call,
+    )
+
+    connection = TongyiEmbeddingModelConnection(
+        name="tongyi",
+        api_key="invalid-key",
+    )
+
+    with pytest.raises(RuntimeError, match="DashScope TextEmbedding call 
failed"):
+        connection.embed("Test text", model=test_model)
+
+
+def test_tongyi_embedding_without_api_key() -> None:
+    """Test that ValueError is raised when API key is not provided."""
+    original_api_key = os.environ.get("DASHSCOPE_API_KEY")
+
+    if "DASHSCOPE_API_KEY" in os.environ:
+        del os.environ["DASHSCOPE_API_KEY"]
+
+    try:
+        with pytest.raises(ValueError, match="DashScope API key is not 
provided"):
+            TongyiEmbeddingModelConnection(name="tongyi")
+    finally:
+        if original_api_key is not None:
+            os.environ["DASHSCOPE_API_KEY"] = original_api_key
diff --git 
a/python/flink_agents/integrations/embedding_models/tongyi_embedding_model.py 
b/python/flink_agents/integrations/embedding_models/tongyi_embedding_model.py
new file mode 100644
index 00000000..c63c083d
--- /dev/null
+++ 
b/python/flink_agents/integrations/embedding_models/tongyi_embedding_model.py
@@ -0,0 +1,182 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+import os
+from http import HTTPStatus
+from typing import Any, Dict, Sequence
+
+import dashscope
+from pydantic import Field
+
+from flink_agents.api.embedding_models.embedding_model import (
+    BaseEmbeddingModelConnection,
+    BaseEmbeddingModelSetup,
+)
+
+DEFAULT_REQUEST_TIMEOUT = 30.0
+DEFAULT_MODEL = "text-embedding-v4"
+
+
+class TongyiEmbeddingModelConnection(BaseEmbeddingModelConnection):
+    """Tongyi Embedding Model Connection which manages connection to DashScope 
API.
+
+    Visit https://dashscope.console.aliyun.com/ to get your API key.
+
+    Attributes:
+    ----------
+    api_key : str
+        DashScope API key for authentication.
+    request_timeout : float
+        The timeout for making http request to Tongyi API server.
+    """
+
+    api_key: str | None = Field(
+        default=None,
+        description="Your DashScope API key.",
+    )
+    request_timeout: float = Field(
+        default=DEFAULT_REQUEST_TIMEOUT,
+        description="The timeout for making http request to Tongyi API 
server.",
+    )
+
+    def __init__(
+        self,
+        api_key: str | None = None,
+        request_timeout: float | None = DEFAULT_REQUEST_TIMEOUT,
+        **kwargs: Any,
+    ) -> None:
+        """Init method."""
+        resolved_api_key = api_key or os.environ.get("DASHSCOPE_API_KEY")
+        if not resolved_api_key:
+            msg = (
+                "DashScope API key is not provided. "
+                "Please pass it as an argument or set the 'DASHSCOPE_API_KEY' 
environment variable."
+            )
+            raise ValueError(msg)
+
+        super().__init__(
+            api_key=resolved_api_key,
+            request_timeout=request_timeout,
+            **kwargs,
+        )
+
+    def embed(
+        self, text: str | Sequence[str], **kwargs: Any
+    ) -> list[float] | list[list[float]]:
+        """Generate embedding vector for text input."""
+        model = kwargs.pop("model", DEFAULT_MODEL)
+        text_type = kwargs.pop("text_type", None)
+        dimension = kwargs.pop("dimension", None)
+        req_api_key = kwargs.pop("api_key", self.api_key)
+
+        call_params: Dict[str, Any] = {
+            "model": model,
+            "input": text,
+            "api_key": req_api_key,
+            "timeout": self.request_timeout,
+        }
+
+        if text_type is not None:
+            call_params["text_type"] = text_type
+        if dimension is not None:
+            call_params["dimension"] = dimension
+
+        call_params.update(kwargs)
+
+        response = dashscope.TextEmbedding.call(**call_params)
+
+        if response.status_code != HTTPStatus.OK:
+            msg = f"DashScope TextEmbedding call failed: {response.message}"
+            raise RuntimeError(msg)
+
+        embeddings = [e["embedding"] for e in response.output["embeddings"]]
+        return embeddings[0] if isinstance(text, str) else embeddings
+
+
+class TongyiEmbeddingModelSetup(BaseEmbeddingModelSetup):
+    """The settings for Tongyi embedding model.
+
+    Attributes:
+    ----------
+    connection : str
+        Name of the referenced connection. (Inherited from 
BaseEmbeddingModelSetup)
+    model : str
+        Name of the embedding model to use. (Inherited from 
BaseEmbeddingModelSetup)
+        Available models: text-embedding-v1, text-embedding-v2, 
text-embedding-v3,
+            text-embedding-v4
+    text_type : str | None
+        The type of input text. Optional values: "query" or "document".
+        Used to optimize embeddings for different use cases.
+    dimension : int | None
+        The number of dimensions for the output embedding vector.
+        Only supported in certain models (e.g., text-embedding-v3 and later).
+    additional_kwargs : Dict[str, Any]
+        Additional parameters for the DashScope TextEmbedding API.
+    """
+
+    text_type: str | None = Field(
+        default=None,
+        description='The type of input text. Optional values: "query" or 
"document".',
+    )
+    dimension: int | None = Field(
+        default=None,
+        description="The number of dimensions for the output embedding 
vector.",
+    )
+    additional_kwargs: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Additional parameters for the DashScope TextEmbedding 
API.",
+    )
+
+    def __init__(
+        self,
+        *,
+        connection: str,
+        model: str = DEFAULT_MODEL,
+        text_type: str | None = None,
+        dimension: int | None = None,
+        additional_kwargs: Dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """Init method."""
+        if additional_kwargs is None:
+            additional_kwargs = {}
+        super().__init__(
+            connection=connection,
+            model=model,
+            text_type=text_type,
+            dimension=dimension,
+            additional_kwargs=additional_kwargs,
+            **kwargs,
+        )
+
+    @property
+    def model_kwargs(self) -> Dict[str, Any]:
+        """Return Tongyi embedding model configuration."""
+        base_kwargs: Dict[str, Any] = {
+            "model": self.model,
+        }
+
+        if self.text_type is not None:
+            base_kwargs["text_type"] = self.text_type
+
+        if self.dimension is not None:
+            base_kwargs["dimension"] = self.dimension
+
+        return {
+            **base_kwargs,
+            **self.additional_kwargs,
+        }

Reply via email to