This is an automated email from the ASF dual-hosted git repository. wenjin272 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/flink-agents.git
commit 69ff4f8688b8902257b94bfa9589834ecde38903 Author: WenjinXie <[email protected]> AuthorDate: Mon Apr 20 15:43:51 2026 +0800 [api][runtime][python] Support agent skills in Python. Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]> --- .github/workflows/ci.yml | 2 + .licenserc.yaml | 1 + .../flink/agents/api/resource/ResourceType.java | 3 +- python/flink_agents/api/chat_models/chat_model.py | 40 ++- python/flink_agents/api/decorators.py | 17 + python/flink_agents/api/resource.py | 3 +- python/flink_agents/api/resource_context.py | 17 +- python/flink_agents/api/skills.py | 72 +++++ .../e2e_tests_integration/agent_skills_test.py | 346 +++++++++++++++++++++ .../chat_model_integration_test.py | 6 +- .../resources/skills/joke-generator/SKILL.md | 18 ++ .../skills/joke-generator/scripts/gen_joke.py} | 22 +- .../resources/skills/math-calculator/SKILL.md | 57 ++++ .../flink_agents/plan/actions/chat_model_action.py | 34 +- python/flink_agents/plan/agent_plan.py | 88 +++++- .../runtime/java/java_resource_wrapper.py | 11 + python/flink_agents/runtime/resource_context.py | 37 ++- python/flink_agents/runtime/skill/__init__.py | 16 + python/flink_agents/runtime/skill/agent_skill.py | 87 ++++++ .../skill/repository/__init__.py} | 19 -- .../skill/repository/filesystem_repository.py | 210 +++++++++++++ python/flink_agents/runtime/skill/skill_manager.py | 134 ++++++++ python/flink_agents/runtime/skill/skill_parser.py | 143 +++++++++ .../runtime/skill/skill_prompt_provider.py | 52 ++++ .../flink_agents/runtime/skill/skill_repository.py | 84 +++++ python/flink_agents/runtime/skill/skill_tools.py | 134 ++++++++ .../skill/tests/__init__.py} | 19 -- .../runtime/skill/tests/resources/NOTICE | 8 + .../tests/resources/skill_discovery_prompt.txt | 24 ++ .../skill/tests/resources/skills/github/SKILL.md | 47 +++ .../resources/skills/nano-banana-pro/SKILL.md | 130 ++++++++ .../resources/skills/nano-banana-pro/_meta.json | 6 + .../nano-banana-pro/scripts/generate_image.py | 165 ++++++++++ .../runtime/skill/tests/test_load_skill.py | 112 +++++++ .../runtime/skill/tests/test_manager.py | 88 ++++++ .../runtime/skill/tests/test_skill_parser.py | 166 ++++++++++ .../runtime/skill/tests/test_skill_repository.py | 96 ++++++ tools/.rat-excludes | 3 +- 38 files changed, 2443 insertions(+), 74 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5a7a2941..a7020454 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -172,6 +172,8 @@ jobs: - name: Install ollama run: bash tools/start_ollama_server.sh - name: Run Python IT + env: + DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} run: tools/ut.sh -p -e -f ${{ matrix.flink-version }} java_it_tests: diff --git a/.licenserc.yaml b/.licenserc.yaml index b73bf4f2..a356e8a5 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -29,6 +29,7 @@ header: - '**/*.txt' - '**/dependency-reduced-pom.xml' - '**/LICENSE.*' + - '**/resources/skills/*' comment: on-failure dependency: files: diff --git a/api/src/main/java/org/apache/flink/agents/api/resource/ResourceType.java b/api/src/main/java/org/apache/flink/agents/api/resource/ResourceType.java index f277052c..79e4ab52 100644 --- a/api/src/main/java/org/apache/flink/agents/api/resource/ResourceType.java +++ b/api/src/main/java/org/apache/flink/agents/api/resource/ResourceType.java @@ -31,7 +31,8 @@ public enum ResourceType { VECTOR_STORE("vector_store"), PROMPT("prompt"), TOOL("tool"), - MCP_SERVER("mcp_server"); + MCP_SERVER("mcp_server"), + SKILLS("skills"); private final String value; diff --git a/python/flink_agents/api/chat_models/chat_model.py b/python/flink_agents/api/chat_models/chat_model.py index 81608c79..7d7fc194 100644 --- a/python/flink_agents/api/chat_models/chat_model.py +++ b/python/flink_agents/api/chat_models/chat_model.py @@ -25,9 +25,11 @@ from typing_extensions import override from flink_agents.api.chat_message import ( ChatMessage, MessageRole, + find_first_system_message, ) from flink_agents.api.prompts.prompt import Prompt from flink_agents.api.resource import Resource, ResourceType +from flink_agents.api.skills import BASH_TOOL, LOAD_SKILL_TOOL from flink_agents.api.tools.tool import Tool @@ -144,6 +146,10 @@ class BaseChatModelSetup(Resource): _resolved_connection: BaseChatModelConnection | None = PrivateAttr(default=None) prompt: Prompt | str | None = None tools: List[str] | List[Tool] = Field(default_factory=list) + skills: List[str] | None = None + skill_discovery_prompt: str | None = None + allowed_commands: List[str] = Field(default_factory=list) + allowed_script_dirs: List[str] = Field(default_factory=list) @property @abstractmethod @@ -160,22 +166,34 @@ class BaseChatModelSetup(Resource): def open(self) -> None: self._resolved_connection = cast( "BaseChatModelConnection", - self.resource_context.get_resource(self.connection, ResourceType.CHAT_MODEL_CONNECTION), + self.resource_context.get_resource( + self.connection, ResourceType.CHAT_MODEL_CONNECTION + ), ) if self.prompt is not None: if isinstance(self.prompt, str): # Get prompt resource if it's a string self.prompt = cast( - "Prompt", self.resource_context.get_resource(self.prompt, ResourceType.PROMPT) + "Prompt", + self.resource_context.get_resource( + self.prompt, ResourceType.PROMPT + ), ) - + if self.skills is not None: + self.skill_discovery_prompt = ( + self.resource_context.generate_available_skills_prompt(*self.skills) + ) + self.tools.extend([LOAD_SKILL_TOOL, BASH_TOOL]) + if len(self.tools) > 0: self.tools = [ - cast("Tool", self.resource_context.get_resource(tool_name, ResourceType.TOOL)) + cast( + "Tool", + self.resource_context.get_resource(tool_name, ResourceType.TOOL), + ) for tool_name in self.tools ] - def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatMessage: """Execute chat conversation. @@ -215,6 +233,18 @@ class BaseChatModelSetup(Resource): prompt_messages.append(msg) messages = prompt_messages + if self.skills is not None: + index = find_first_system_message(messages) + messages = ( + messages[: index + 1] + + [ + ChatMessage( + role=MessageRole.SYSTEM, content=self.skill_discovery_prompt + ) + ] + + messages[index + 1 :] + ) + # Call chat model connection to execute chat merged_kwargs = self.model_kwargs.copy() merged_kwargs.update(kwargs) diff --git a/python/flink_agents/api/decorators.py b/python/flink_agents/api/decorators.py index 231d5272..83d5f479 100644 --- a/python/flink_agents/api/decorators.py +++ b/python/flink_agents/api/decorators.py @@ -190,6 +190,23 @@ def vector_store(func: Callable) -> Callable: return func +def skills(func: Callable) -> Callable: + """Decorator for marking a function declaring skills. + + Parameters + ---------- + func : Callable + Function to be decorated. + + Returns: + ------- + Callable + Decorator function that marks the target function declare skills. + """ + func._is_skills = True + return func + + def java_resource(cls: Type) -> Type: """Decorator to mark a class as Java resource.""" cls._is_java_resource = True diff --git a/python/flink_agents/api/resource.py b/python/flink_agents/api/resource.py index 3b577233..f8d7741a 100644 --- a/python/flink_agents/api/resource.py +++ b/python/flink_agents/api/resource.py @@ -32,7 +32,7 @@ class ResourceType(Enum): """Type enum of resource. Currently, support chat_model, chat_model_server, tool, embedding_model, - vector_store, prompt, mcp_server. + vector_store, prompt, mcp_server, skills. """ CHAT_MODEL = "chat_model" @@ -43,6 +43,7 @@ class ResourceType(Enum): VECTOR_STORE = "vector_store" PROMPT = "prompt" MCP_SERVER = "mcp_server" + SKILLS = "skills" class Resource(BaseModel, ABC): diff --git a/python/flink_agents/api/resource_context.py b/python/flink_agents/api/resource_context.py index d0508508..8b323df7 100644 --- a/python/flink_agents/api/resource_context.py +++ b/python/flink_agents/api/resource_context.py @@ -22,7 +22,7 @@ The concrete implementation lives in :mod:`flink_agents.runtime.resource_context """ from abc import ABC, abstractmethod -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, List if TYPE_CHECKING: from flink_agents.api.resource import Resource, ResourceType @@ -34,3 +34,18 @@ class ResourceContext(ABC): @abstractmethod def get_resource(self, name: str, resource_type: "ResourceType") -> "Resource": """Get another resource declared in the same Agent.""" + + @abstractmethod + def generate_available_skills_prompt(self, *skill_names: str) -> str: + """Generate the available skills prompt for the given skill names. + + Returns empty string if no skills are configured. + """ + + @abstractmethod + def get_skill_dirs(self, *skill_names: str) -> List[str]: + """Return absolute directory paths for the given skill names. + + Returns an empty list if no skills are configured or none of the + requested skills are filesystem-backed. + """ diff --git a/python/flink_agents/api/skills.py b/python/flink_agents/api/skills.py new file mode 100644 index 00000000..0306d983 --- /dev/null +++ b/python/flink_agents/api/skills.py @@ -0,0 +1,72 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################# +"""Skills configuration resource for agent skills discovery. + +Use :meth:`Skills.from_local_dir` to construct a :class:`Skills` resource. + +Example:: + + @skills + @staticmethod + def my_skills() -> Skills: + return Skills.from_local_dir("./skills") + +Declare more than one ``@skills`` function on the same agent to combine +sources; the runtime merges them. +""" + +from __future__ import annotations + +from typing import List + +from pydantic import Field +from typing_extensions import override + +from flink_agents.api.resource import ResourceType, SerializableResource + + +class Skills(SerializableResource): + """A resource describing where to load agent skills from. + + Use :meth:`from_local_dir` to construct β direct field construction is + reserved for internal serialization and not part of the public API. + """ + + # Internal location list populated through the factory methods; kept + # as a public pydantic field so it can be serialized/deserialized. + paths: List[str] = Field(default_factory=list) + + @classmethod + def from_local_dir(cls, *paths: str) -> Skills: + """Create a Skills resource from one or more local filesystem directories. + + Each path points to a directory whose immediate subdirectories each + contain a ``SKILL.md`` file. + """ + return cls(paths=list(paths)) + + @classmethod + @override + def resource_type(cls) -> ResourceType: + """Return resource type of class.""" + return ResourceType.SKILLS + + +# name of built-in tools needed by using skills +LOAD_SKILL_TOOL = "load_skill" +BASH_TOOL = "bash" diff --git a/python/flink_agents/e2e_tests/e2e_tests_integration/agent_skills_test.py b/python/flink_agents/e2e_tests/e2e_tests_integration/agent_skills_test.py new file mode 100644 index 00000000..261be27a --- /dev/null +++ b/python/flink_agents/e2e_tests/e2e_tests_integration/agent_skills_test.py @@ -0,0 +1,346 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################# +import json +import os +import sysconfig +import uuid +from dataclasses import dataclass +from pathlib import Path + +import pytest +from pyflink.common import Configuration, Encoder +from pyflink.common.typeinfo import BasicTypeInfo, ExternalTypeInfo, RowTypeInfo, Types +from pyflink.datastream import StreamExecutionEnvironment +from pyflink.datastream.connectors.file_system import StreamingFileSink +from pyflink.table import DataTypes, Schema, StreamTableEnvironment, TableDescriptor + +from flink_agents.api.agents.agent import Agent +from flink_agents.api.agents.react_agent import ReActAgent +from flink_agents.api.chat_message import ChatMessage, MessageRole +from flink_agents.api.core_options import AgentExecutionOptions, ErrorHandlingStrategy +from flink_agents.api.decorators import ( + action, + chat_model_connection, + chat_model_setup, + prompt, + skills, +) +from flink_agents.api.events.chat_event import ChatRequestEvent, ChatResponseEvent +from flink_agents.api.events.event import InputEvent, OutputEvent +from flink_agents.api.execution_environment import AgentsExecutionEnvironment +from flink_agents.api.prompts.prompt import Prompt +from flink_agents.api.resource import ResourceDescriptor, ResourceName, ResourceType +from flink_agents.api.runner_context import RunnerContext +from flink_agents.api.skills import Skills +from flink_agents.e2e_tests.e2e_tests_integration.react_agent_test import MyKeySelector + +current_dir = Path(__file__).parent + +PYTHON_PATH = sysconfig.get_paths()["purelib"] +MODEL = "qwen3.6-plus" +BASE_URL = "https://coding.dashscope.aliyuncs.com/v1" +API_KEY = os.environ.get("DASHSCOPE_API_KEY") + + +@dataclass +class Operation: + a: int + b: int + + +class SkillTestAgent(Agent): + """Agent for testing async execution.""" + + @chat_model_connection + @staticmethod + def openai_connection() -> ResourceDescriptor: + return ResourceDescriptor( + clazz=ResourceName.ChatModel.OPENAI_COMPLETIONS_CONNECTION, + api_key=API_KEY, + api_base_url=BASE_URL, + request_timeout=300, + ) + + @chat_model_setup + @staticmethod + def openai_setup() -> ResourceDescriptor: + return ResourceDescriptor( + clazz=ResourceName.ChatModel.OPENAI_COMPLETIONS_SETUP, + connection="openai_connection", + model=MODEL, + skills=["math-calculator", "joke-generator"], + allowed_commands=["echo", "bc", "python"], + prompt="system_prompt", + ) + + @skills + @staticmethod + def my_skills() -> Skills: + return Skills.from_local_dir(str(current_dir.parent / "resources" / "skills")) + + @prompt + @staticmethod + def system_prompt() -> Prompt: + return Prompt.from_messages( + messages=[ + ChatMessage( + role=MessageRole.SYSTEM, + content="You are a help assistant. Use the math-calculator skill when asked to evaluate " + "an expression. You **must load the skill first** and strictly follow the instructions " + "of the skill.", + ) + ], + ) + + @action(InputEvent) + @staticmethod + def process_input(event: InputEvent, ctx: RunnerContext) -> None: + if isinstance(event.input, Operation): + input: Operation = event.input + ctx.send_event( + ChatRequestEvent( + model="openai_setup", + messages=[ + ChatMessage( + role=MessageRole.USER, + content=f"Please evaluate the expression: ({input.a} ^ {input.b})", + ) + ], + ) + ) + else: + input: str = event.input + ctx.send_event( + ChatRequestEvent( + model="openai_setup", + messages=[ + ChatMessage( + role=MessageRole.USER, + content=input, + ) + ], + ) + ) + + @action(ChatResponseEvent) + @staticmethod + def process_chat_response(event: ChatResponseEvent, ctx: RunnerContext) -> None: + input = event.response + ctx.send_event(OutputEvent(output=input.content)) + + [email protected](not API_KEY, reason="openai api key is required.") +def test_workflow_with_skills(tmp_path: Path) -> None: + config = Configuration() + config.set_string("python.pythonpath", PYTHON_PATH) + env = StreamExecutionEnvironment.get_execution_environment(config) + env.set_parallelism(1) + + input_stream = env.from_collection( + [Operation(a=2, b=3)], + ) + + agents_env = AgentsExecutionEnvironment.get_execution_environment(env=env) + output_datastream = ( + agents_env.from_datastream( + input=input_stream, key_selector=lambda x: uuid.uuid4() + ) + .apply(SkillTestAgent()) + .to_datastream(Types.STRING()) + ) + + result_dir = tmp_path / "results" + result_dir.mkdir(parents=True, exist_ok=True) + + output_datastream.add_sink( + StreamingFileSink.for_row_format( + base_path=str(result_dir.absolute()), + encoder=Encoder.simple_string_encoder(), + ).build() + ) + + agents_env.execute() + + for file in result_dir.iterdir(): + if file.is_dir(): + for child in file.iterdir(): + with child.open() as f: + content = f.read() + + assert "8" in content + + [email protected](not API_KEY, reason="openai api key is required.") +def test_execute_python_script(tmp_path: Path) -> None: + config = Configuration() + config.set_string("python.pythonpath", PYTHON_PATH) + env = StreamExecutionEnvironment.get_execution_environment(config) + env.set_parallelism(1) + + input_stream = env.from_collection( + ["Tell me a joke about cat."], + ) + + agents_env = AgentsExecutionEnvironment.get_execution_environment(env=env) + output_datastream = ( + agents_env.from_datastream( + input=input_stream, key_selector=lambda x: uuid.uuid4() + ) + .apply(SkillTestAgent()) + .to_datastream(Types.STRING()) + ) + + result_dir = tmp_path / "results" + result_dir.mkdir(parents=True, exist_ok=True) + + output_datastream.add_sink( + StreamingFileSink.for_row_format( + base_path=str(result_dir.absolute()), + encoder=Encoder.simple_string_encoder(), + ).build() + ) + + agents_env.execute() + + for file in result_dir.iterdir(): + if file.is_dir(): + for child in file.iterdir(): + with child.open() as f: + content = f.read() + + assert "Too many cheetahs" in content + + [email protected](not API_KEY, reason="openai api key is required.") +def test_react_agent_with_skills(tmp_path: Path) -> None: + config = Configuration() + config.set_string("python.pythonpath", PYTHON_PATH) + stream_env = StreamExecutionEnvironment.get_execution_environment(config) + + stream_env.set_parallelism(1) + + t_env = StreamTableEnvironment.create(stream_execution_environment=stream_env) + + table = t_env.from_elements( + elements=[(2, 3)], + schema=DataTypes.ROW( + [ + DataTypes.FIELD("a", DataTypes.INT()), + DataTypes.FIELD("b", DataTypes.INT()), + ] + ), + ) + + env = AgentsExecutionEnvironment.get_execution_environment( + env=stream_env, t_env=t_env + ) + + env.get_config().set( + AgentExecutionOptions.ERROR_HANDLING_STRATEGY, ErrorHandlingStrategy.RETRY + ) + + env.get_config().set(AgentExecutionOptions.MAX_RETRIES, 3) + + # register resource to execution environment + ( + env.add_resource( + "openai", + ResourceType.CHAT_MODEL_CONNECTION, + ResourceDescriptor( + clazz=ResourceName.ChatModel.OPENAI_COMPLETIONS_CONNECTION, + api_key=API_KEY, + api_base_url=BASE_URL, + request_timeout=300, + ), + ).add_resource( + "my_skill", + ResourceType.SKILLS, + Skills.from_local_dir(str(current_dir.parent / "resources" / "skills")), + ) + ) + + # prepare prompt + prompt = Prompt.from_messages( + messages=[ + ChatMessage( + role=MessageRole.SYSTEM, + content="You are a math calculate assistant. Use the math-calculator skill when asked to evaluate " + "an expression. You **must load the skill first** and strictly follow the instructions " + "of the skill.", + ), + ChatMessage( + role=MessageRole.USER, + content="Please evaluate the expression: {a} ^ {b}", + ), + ], + ) + + output_type_info = RowTypeInfo( + [BasicTypeInfo.INT_TYPE_INFO()], + ["result"], + ) + + # create ReAct agent. + agent = ReActAgent( + chat_model=ResourceDescriptor( + clazz=ResourceName.ChatModel.OPENAI_COMPLETIONS_SETUP, + connection="openai", + model=MODEL, + skills=["math-calculator"], + allowed_commands=["echo", "bc"], + ), + prompt=prompt, + output_schema=output_type_info, + ) + + output_type = ExternalTypeInfo(output_type_info) + + schema = (Schema.new_builder().column("result", DataTypes.INT())).build() + + output_table = ( + env.from_table(input=table, key_selector=MyKeySelector()) + .apply(agent) + .to_table(schema=schema, output_type=output_type) + ) + + result_dir = tmp_path / "results" + result_dir.mkdir(parents=True, exist_ok=True) + + t_env.create_temporary_table( + "sink", + TableDescriptor.for_connector("filesystem") + .option("path", str(result_dir.absolute())) + .format("json") + .schema(schema) + .build(), + ) + + output_table.execute_insert("sink").wait() + + actual_result = [] + for file in result_dir.iterdir(): + if file.is_file(): + with file.open() as f: + actual_result.extend(f.readlines()) + + assert len(actual_result) == 1, ( + "This may be caused by the LLM response does not match the output schema, you can rerun this case." + ) + content = json.loads(actual_result[0].strip()) + assert "result" in content + assert int(content["result"]) == 8 diff --git a/python/flink_agents/e2e_tests/e2e_tests_integration/chat_model_integration_test.py b/python/flink_agents/e2e_tests/e2e_tests_integration/chat_model_integration_test.py index bda497db..07bae38e 100644 --- a/python/flink_agents/e2e_tests/e2e_tests_integration/chat_model_integration_test.py +++ b/python/flink_agents/e2e_tests/e2e_tests_integration/chat_model_integration_test.py @@ -61,19 +61,19 @@ client = pull_model(OLLAMA_MODEL) pytest.param( "Tongyi", marks=pytest.mark.skipif( - DASHSCOPE_API_KEY is None, reason="Tongyi api key is not set." + not DASHSCOPE_API_KEY, reason="Tongyi api key is not set." ), ), pytest.param( "OpenAI", marks=pytest.mark.skipif( - OPENAI_API_KEY is None, reason="OpenAI api key is not set." + not OPENAI_API_KEY, reason="OpenAI api key is not set." ), ), pytest.param( "AzureOpenAI", marks=pytest.mark.skipif( - AZURE_OPENAI_API_KEY is None, reason="Azure OpenAI api key is not set." + not AZURE_OPENAI_API_KEY, reason="Azure OpenAI api key is not set." ), ), ], diff --git a/python/flink_agents/e2e_tests/resources/skills/joke-generator/SKILL.md b/python/flink_agents/e2e_tests/resources/skills/joke-generator/SKILL.md new file mode 100644 index 00000000..eadf07ab --- /dev/null +++ b/python/flink_agents/e2e_tests/resources/skills/joke-generator/SKILL.md @@ -0,0 +1,18 @@ +--- +name: joke-generator +description: Tell a joke about cat. +--- + +# Math Calculator Skill + +This skill provides the ability to tell a joke about cat. + +## When to Use + +Use this skill when user want to get a joke about cat. + +## Methods + +```bash +python scripts/gen_joke.py +``` diff --git a/python/flink_agents/api/resource_context.py b/python/flink_agents/e2e_tests/resources/skills/joke-generator/scripts/gen_joke.py old mode 100644 new mode 100755 similarity index 60% copy from python/flink_agents/api/resource_context.py copy to python/flink_agents/e2e_tests/resources/skills/joke-generator/scripts/gen_joke.py index d0508508..3c681eba --- a/python/flink_agents/api/resource_context.py +++ b/python/flink_agents/e2e_tests/resources/skills/joke-generator/scripts/gen_joke.py @@ -14,23 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -################################################################################ -"""Public ResourceContext interface. - -Defines the capabilities available to a Resource during execution. -The concrete implementation lives in :mod:`flink_agents.runtime.resource_context`. -""" - -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from flink_agents.api.resource import Resource, ResourceType - - -class ResourceContext(ABC): - """Base abstract class for Resource Context.""" +################################################################################# - @abstractmethod - def get_resource(self, name: str, resource_type: "ResourceType") -> "Resource": - """Get another resource declared in the same Agent.""" +if __name__ == "__main__": + print("Why don't cats play poker in the jungle? Too many cheetahs. π±") diff --git a/python/flink_agents/e2e_tests/resources/skills/math-calculator/SKILL.md b/python/flink_agents/e2e_tests/resources/skills/math-calculator/SKILL.md new file mode 100644 index 00000000..b00a1592 --- /dev/null +++ b/python/flink_agents/e2e_tests/resources/skills/math-calculator/SKILL.md @@ -0,0 +1,57 @@ +--- +name: math-calculator +description: Calculate simple mathematical expressions using shell commands. Use when the user asks to perform arithmetic calculations like addition, subtraction, multiplication, division, or more complex math expressions. +license: Apache-2.0 +compatibility: Requires bash with bc (basic calculator) +--- + +# Math Calculator Skill + +This skill provides the ability to calculate mathematical expressions using shell commands. + +## When to Use + +Use this skill when: +- Performing arithmetic calculations (add, subtract, multiply, divide) +- Evaluating mathematical expressions with parentheses +- Computing percentages or powers +- Any numeric computation requested by the user + +## Methods + +### Using `bc` (Basic Calculator) + +The `bc` command is a powerful calculator that supports: +- Basic arithmetic: `+`, `-`, `*`, `/` +- Power: `^` +- Parentheses for grouping +- Scale for decimal precision + +**Example:** +```bash +echo "2 + 3 * 4" | bc +# Output: 14 + +echo "scale=2; 10 / 3" | bc +# Output: 3.33 + +echo "(2 + 3) * 4" | bc +# Output: 20 +``` + +## Supported Operations + +| Operation | Symbol | Example | +|-----------|--------|---------| +| Addition | `+` | `5 + 3 = 8` | +| Subtraction | `-` | `10 - 4 = 6` | +| Multiplication | `*` | `6 * 7 = 42` | +| Division | `/` | `15 / 3 = 5` | +| Power | `^` (bc) or `**` (Python) | `2 ^ 3 = 8` | +| Modulo | `%` | `17 % 5 = 2` | +| Square Root | `sqrt()` (bc) | `sqrt(16) = 4` | + +## Notes + +- Use `scale=N` in `bc` to set decimal precision (default is 0, integer only) +- For floating-point division, always set `scale` in `bc` diff --git a/python/flink_agents/plan/actions/chat_model_action.py b/python/flink_agents/plan/actions/chat_model_action.py index 55aa3965..de500788 100644 --- a/python/flink_agents/plan/actions/chat_model_action.py +++ b/python/flink_agents/plan/actions/chat_model_action.py @@ -164,10 +164,34 @@ def _record_retry_metrics( model_group.get_counter("retryWaitSec").inc(total_retry_wait_sec) +def _inject_bash_tool_args( + tool_calls: List[Dict], + chat_model: "BaseChatModelSetup", +) -> None: + """Inject framework-controlled args (allowed_commands, allowed_script_dirs) + into bash tool calls so they remain hidden from the LLM. + """ + from flink_agents.api.skills import BASH_TOOL + + script_dirs = list(chat_model.allowed_script_dirs) + if chat_model.resource_context is not None and chat_model.skills: + script_dirs.extend( + chat_model.resource_context.get_skill_dirs(*chat_model.skills) + ) + + for tool_call in tool_calls: + if tool_call["function"]["name"] != BASH_TOOL: + continue + args = tool_call["function"]["arguments"] + args["allowed_commands"] = list(chat_model.allowed_commands) + args["allowed_script_dirs"] = script_dirs + + def _handle_tool_calls( response: ChatMessage, initial_request_id: UUID, model: str, + chat_model: "BaseChatModelSetup", messages: List[ChatMessage], output_schema: OutputSchema | None, ctx: RunnerContext, @@ -177,6 +201,8 @@ def _handle_tool_calls( ctx.sensory_memory, initial_request_id, messages, [response] ) + _inject_bash_tool_args(response.tool_calls, chat_model) + tool_request_event = ToolRequestEvent( model=model, tool_calls=response.tool_calls, @@ -320,7 +346,13 @@ async def chat( len(response.tool_calls) > 0 ): # generate tool request event according tool calls in response _handle_tool_calls( - response, initial_request_id, model, messages, output_schema, ctx + response, + initial_request_id, + model, + chat_model, + messages, + output_schema, + ctx, ) else: # if there is no tool call generated, return chat response directly retry_stats = _get_retry_stats(ctx.sensory_memory, initial_request_id) diff --git a/python/flink_agents/plan/agent_plan.py b/python/flink_agents/plan/agent_plan.py index 40490e45..b5ac464d 100644 --- a/python/flink_agents/plan/agent_plan.py +++ b/python/flink_agents/plan/agent_plan.py @@ -21,11 +21,15 @@ from pydantic import BaseModel, field_serializer, model_validator from flink_agents.api.agents.agent import Agent from flink_agents.api.resource import ( - Resource, ResourceDescriptor, ResourceType, ) from flink_agents.api.resource_context import ResourceContext +from flink_agents.api.skills import ( + BASH_TOOL, + LOAD_SKILL_TOOL, + Skills, +) from flink_agents.plan.actions.action import Action from flink_agents.plan.actions.chat_model_action import CHAT_MODEL_ACTION from flink_agents.plan.actions.context_retrieval_action import CONTEXT_RETRIEVAL_ACTION @@ -42,6 +46,9 @@ from flink_agents.plan.resource_provider import ( from flink_agents.plan.tools.function_tool import from_callable if TYPE_CHECKING: + from flink_agents.api.resource import ( + Resource, + ) from flink_agents.integrations.mcp.mcp import MCPServer BUILT_IN_ACTIONS = [CHAT_MODEL_ACTION, TOOL_CALL_ACTION, CONTEXT_RETRIEVAL_ACTION] @@ -259,6 +266,7 @@ def _get_resource_providers( agent: Agent, config: AgentConfiguration ) -> List[ResourceProvider]: resource_providers = [] + skills_descriptors = {} # retrieve resource declared by decorator for name, value in agent.__class__.__dict__.items(): if ( @@ -309,6 +317,10 @@ def _get_resource_providers( descriptor = value() _add_mcp_server(name, resource_providers, descriptor, config) + elif hasattr(value, "_is_skills"): + if isinstance(value, staticmethod): + value = value.__func__ + skills_descriptors[name] = value() # retrieve resource declared by add interface for name, prompt in agent.resources[ResourceType.PROMPT].items(): @@ -326,6 +338,12 @@ def _get_resource_providers( for name, descriptor in agent.resources[ResourceType.MCP_SERVER].items(): _add_mcp_server(name, resource_providers, descriptor, config) + # Merge decorator-based and programmatic skills + all_skills: Dict[str, Skills] = dict( + {**skills_descriptors, **agent.resources[ResourceType.SKILLS]}.items() + ) + _add_skills(all_skills, resource_providers) + for resource_type in [ ResourceType.CHAT_MODEL, ResourceType.CHAT_MODEL_CONNECTION, @@ -358,12 +376,19 @@ def _add_mcp_server( class ResourceContextPlaceholder(ResourceContext): """Placeholder - MCP server construction doesn't need resource resolution.""" - + + def generate_available_skills_prompt(self, *skill_names: str) -> str: + pass + def get_resource(self, name: str, resource_type: "ResourceType") -> "Resource": pass + def get_skill_dirs(self, *skill_names: str) -> List[str]: + return [] + mcp_server = cast( - "MCPServer", provider.provide(resource_context=ResourceContextPlaceholder(), config=config) + "MCPServer", + provider.provide(resource_context=ResourceContextPlaceholder(), config=config), ) resource_providers.extend( @@ -385,3 +410,60 @@ def _add_mcp_server( ) mcp_server.close() + + +SKILLS_CONFIG = "_skills_config" + + +def _add_skills( + skills_objects: Dict[str, Skills], + resource_providers: List[ResourceProvider], +) -> None: + """Register skill configuration and skill tools. + + Merges all Skills objects into a single Skills config resource, + and registers built-in skill tools (load_skill, bash). + + + """ + if len(skills_objects) == 0: + return + + # Register skill tools via descriptor (no runtime import needed). + # The tool classes live in flink_agents.runtime.skill_tools and will + # be instantiated at runtime by PythonResourceProvider. + + resource_providers.extend( + [ + PythonResourceProvider.get( + name=LOAD_SKILL_TOOL, + descriptor=ResourceDescriptor( + clazz="flink_agents.runtime.skill.skill_tools.LoadSkillTool", + ), + ), + PythonResourceProvider.get( + name=BASH_TOOL, + descriptor=ResourceDescriptor( + clazz="flink_agents.plan.tools.bash.bash_tool.BashTool", + ), + ), + ] + ) + + # TODO: Currently, we construct a global agent skill manager for all skill + # resource descriptors. In the future, we can support crate individual + # agent skill manager for each resource descriptor, and support specifying + # skill names and which skill manager they belong to when declaring a chat + # model setup. MCP prompts and tools face the same situation, we can refactor + # them as a whole. + paths: List[str] = [] + for skills_obj in skills_objects.values(): + paths.extend(skills_obj.paths) + + merged = Skills.from_local_dir(*dict.fromkeys(paths)) + + resource_providers.append( + PythonSerializableResourceProvider.from_resource( + name=SKILLS_CONFIG, resource=merged + ) + ) diff --git a/python/flink_agents/runtime/java/java_resource_wrapper.py b/python/flink_agents/runtime/java/java_resource_wrapper.py index 579e9141..cc929b6f 100644 --- a/python/flink_agents/runtime/java/java_resource_wrapper.py +++ b/python/flink_agents/runtime/java/java_resource_wrapper.py @@ -90,3 +90,14 @@ class JavaResourceContextWrapper(ResourceContext): def get_resource(self, name: str, type: ResourceType) -> Resource: """Get a resource by name and type.""" return self._j_resource_adapter.getResource(name, type.value) + + @override + def generate_available_skills_prompt(self, *skill_names: str) -> str: + """Generate the skill discovery prompt for the given skill names.""" + # TODO: Implement after java supports agent skills. + + @override + def get_skill_dirs(self, *skill_names: str) -> List[str]: + """Return absolute directory paths for the given skill names.""" + # TODO: Implement after java supports agent skills. + return [] diff --git a/python/flink_agents/runtime/resource_context.py b/python/flink_agents/runtime/resource_context.py index da6a68c9..1cfb24b9 100644 --- a/python/flink_agents/runtime/resource_context.py +++ b/python/flink_agents/runtime/resource_context.py @@ -16,9 +16,10 @@ # limitations under the License. ################################################################################ """Runtime implementation of ResourceContext.""" + from __future__ import annotations -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING, List, cast from flink_agents.api.resource import ResourceType from flink_agents.api.resource_context import ResourceContext @@ -49,3 +50,37 @@ class ResourceContextImpl(ResourceContext): def get_resource(self, name: str, resource_type: ResourceType) -> Resource: """Get another resource declared in the same Agent.""" return self._resource_cache.get_resource(name, resource_type) + + def generate_available_skills_prompt(self, *skill_names: str) -> str: + """Generate the skill discovery prompt for the given skill names.""" + manager = self.get_skill_manager() + if manager is None: + return "" + return manager.generate_discovery_prompt(*skill_names) + + def get_skill_dirs(self, *skill_names: str) -> List[str]: + """Return absolute directory paths for the given skill names.""" + manager = self.get_skill_manager() + if manager is None: + return [] + return manager.get_skill_dirs(*skill_names) + + def get_skill_manager(self) -> SkillManager | None: + """Get the SkillManager (runtime-internal only). + + NOT part of the public ResourceContext interface. + """ + if not self._skill_manager_initialized: + self._skill_manager_initialized = True + self._skill_manager = self._create_skill_manager() + return self._skill_manager + + def _create_skill_manager(self) -> SkillManager | None: + try: + skills_config = cast( + "Skills", + self._resource_cache.get_resource(SKILLS_CONFIG, ResourceType.SKILLS), + ) + except KeyError: + return None + return SkillManager(skills_config) diff --git a/python/flink_agents/runtime/skill/__init__.py b/python/flink_agents/runtime/skill/__init__.py new file mode 100644 index 00000000..b77373a9 --- /dev/null +++ b/python/flink_agents/runtime/skill/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/flink_agents/runtime/skill/agent_skill.py b/python/flink_agents/runtime/skill/agent_skill.py new file mode 100644 index 00000000..df44d48e --- /dev/null +++ b/python/flink_agents/runtime/skill/agent_skill.py @@ -0,0 +1,87 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################# +from typing import Callable, Dict, List + +from pydantic import BaseModel, Field, PrivateAttr + + +class AgentSkill(BaseModel): + """Represents an agent skill that can be loaded and used by agents. + + Attributes: + ---------- + name : str + Skill name (1-64 characters, lowercase letters, numbers, hyphens only). + Must not start or end with a hyphen. + description : str + Skill description (1-1024 characters). Describes what the skill does + and when to use it. + license : Optional[str] + License name or reference to a bundled license file. + compatibility : Optional[str] + Indicates environment requirements (intended product, system packages, + network access, etc.) (max 500 characters). + metadata : Optional[Dict[str, str]] + Arbitrary key-value mapping for additional metadata. + content : str + The skill implementation or instructions (the markdown body after + frontmatter in SKILL.md). + resources : Dict[str, str] + Supporting resources referenced by the skill. Keys are relative paths + from the skill root, values are file contents. + """ + + name: str = Field(..., min_length=1, max_length=64) + description: str = Field(..., min_length=1, max_length=1024) + content: str = Field(..., min_length=1) + license: str | None = Field(default=None) + compatibility: str | None = Field(default=None, max_length=500) + metadata: Dict[str, str] | None = Field(default=None) + resources: Dict[str, str] | None = None + + _resource_loader: Callable[[], Dict[str, str]] | None = PrivateAttr(default=None) + _activated: bool = PrivateAttr(default=False) + + def set_resource_loader(self, loader: Callable[[], Dict[str, str]]) -> None: + """Set a lazy resource loader for this skill.""" + self._resource_loader = loader + + def _activate(self) -> None: + """Load resources lazily on first access.""" + if not self._activated and self._resource_loader is not None: + self.resources = self._resource_loader() + self._activated = True + + def get_resource(self, resource_path: str) -> str | None: + """Get resource content by path. + + Args: + resource_path: The relative path of the resource from skill root. + + Returns: + The resource content, or None if not found. + """ + self._activate() + return self.resources.get(resource_path) if self.resources else None + + def get_resource_paths(self) -> List[str]: + """Get all the resource relative paths of the skill.""" + self._activate() + if self.resources is None: + return [] + return list(self.resources.keys()) diff --git a/python/flink_agents/api/resource_context.py b/python/flink_agents/runtime/skill/repository/__init__.py similarity index 61% copy from python/flink_agents/api/resource_context.py copy to python/flink_agents/runtime/skill/repository/__init__.py index d0508508..65b48d4d 100644 --- a/python/flink_agents/api/resource_context.py +++ b/python/flink_agents/runtime/skill/repository/__init__.py @@ -15,22 +15,3 @@ # See the License for the specific language governing permissions and # limitations under the License. ################################################################################ -"""Public ResourceContext interface. - -Defines the capabilities available to a Resource during execution. -The concrete implementation lives in :mod:`flink_agents.runtime.resource_context`. -""" - -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from flink_agents.api.resource import Resource, ResourceType - - -class ResourceContext(ABC): - """Base abstract class for Resource Context.""" - - @abstractmethod - def get_resource(self, name: str, resource_type: "ResourceType") -> "Resource": - """Get another resource declared in the same Agent.""" diff --git a/python/flink_agents/runtime/skill/repository/filesystem_repository.py b/python/flink_agents/runtime/skill/repository/filesystem_repository.py new file mode 100644 index 00000000..6e6eaa04 --- /dev/null +++ b/python/flink_agents/runtime/skill/repository/filesystem_repository.py @@ -0,0 +1,210 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +import logging +import os +from pathlib import Path +from typing import Dict, List + +from typing_extensions import override + +from flink_agents.runtime.skill.agent_skill import AgentSkill +from flink_agents.runtime.skill.skill_parser import SkillParser +from flink_agents.runtime.skill.skill_repository import ( + SkillRepository, +) + +logger = logging.getLogger(__name__) + + +class FileSystemSkillRepository(SkillRepository): + """File system based implementation of SkillRepository. + + This repository stores skills in a local file system directory structure + where each skill is stored in its own subdirectory containing a SKILL.md + file and optional resource files. + + Directory structure: + + baseDir/ + βββ skill-name-1/ + β βββ SKILL.md # Required: Entry file with YAML frontmatter + β βββ references/ # Optional: Reference documentation + β βββ examples/ # Optional: Example files + β βββ scripts/ # Optional: Script files + βββ skill-name-2/ + βββ SKILL.md + """ + + SKILL_MD_FILE = "SKILL.md" + + def __init__( + self, + base_dir: Path | str, + ) -> None: + """Create a FileSystemSkillRepository. + + Args: + base_dir: The base directory containing skill subdirectories. + skip_dirs: Optional set of directory names to skip. + skip_patterns: Optional set of file patterns to skip. + + Raises: + ValueError: If base_dir is None, doesn't exist, or is not a directory. + """ + if base_dir is None: + msg = "Base directory cannot be None" + raise ValueError(msg) + + # Convert to Path and normalize + self._base_dir = Path(base_dir).resolve() + + # Validate directory exists + if not self._base_dir.exists(): + msg = f"Base directory does not exist: {self._base_dir}" + raise ValueError(msg) + + # Validate it's a directory + if not self._base_dir.is_dir(): + msg = f"Base directory is not a directory: {self._base_dir}" + raise ValueError(msg) + + @property + def base_dir(self) -> Path: + """Get the base directory. + + Returns: + The base directory path. + """ + return self._base_dir + + @override + def get_skill(self, name: str) -> AgentSkill | None: + """Get a skill by name. + + Args: + name: The skill name. + + Returns: + The skill, or None if not found. + """ + skill_dir = self._base_dir / name + skill_md_path = skill_dir / self.SKILL_MD_FILE + + if not skill_md_path.exists(): + return None + + return self._load_skill(skill_dir) + + @override + def get_resources(self, name: str) -> Dict[str, str]: + skill_dir = self._base_dir / name + return self._load_resources(skill_dir) + + @override + def get_skills(self) -> List[AgentSkill]: + """Get all skills in this repository. + + Returns: + List of all skills. + """ + skills = [] + for skill_name in self._get_all_skill_names(): + skill = self.get_skill(skill_name) + if skill is not None: + skills.append(skill) + return skills + + def _get_all_skill_names(self) -> List[str]: + """Get all skill names in this repository. + + Returns: + List of skill names. + """ + return sorted( + [ + entry.name + for entry in self._base_dir.iterdir() + if entry.is_dir() and (entry / self.SKILL_MD_FILE).exists() + ] + ) + + def _load_skill(self, skill_dir: Path) -> AgentSkill | None: + """Load a skill from a directory. + + Args: + skill_dir: Path to the skill directory. + + Returns: + The loaded skill, or None if loading failed. + """ + skill_md_path = skill_dir / self.SKILL_MD_FILE + + if not skill_md_path.exists(): + return None + + try: + skill_md_content = skill_md_path.read_text() + + skill = SkillParser.parse_skill(skill_md_content) + + if skill.name != skill_dir.name: + logger.warning( + f"The skill name {skill.name} is different from the base directory {skill_dir.name}." + ) + + except Exception as e: + err_msg = f"Failed to load skill from {skill_dir}" + raise ValueError(err_msg) from e + else: + return skill + + def _load_resources(self, skill_dir: Path) -> dict[str, str]: + """Load all resources from a skill directory. + + Args: + skill_dir: Path to the skill directory. + + Returns: + Map of relative path to content. + """ + resources = {} + + for root, _dirs, files in os.walk(skill_dir): + root_path = Path(root) + + for file_name in files: + # Skip SKILL.md (handled separately) + if file_name == self.SKILL_MD_FILE: + continue + + file_path = root_path / file_name + relative_path = str(file_path.relative_to(skill_dir)) + + try: + # Try to read as text + content = file_path.read_text() + resources[relative_path] = content + except UnicodeDecodeError: + content = file_path.read_bytes() + resources[relative_path] = f"base64: {content}" + except Exception: + logging.warning( + f"Failed to read resource file {file_path}", exc_info=True + ) + + return resources diff --git a/python/flink_agents/runtime/skill/skill_manager.py b/python/flink_agents/runtime/skill/skill_manager.py new file mode 100644 index 00000000..deb9ce82 --- /dev/null +++ b/python/flink_agents/runtime/skill/skill_manager.py @@ -0,0 +1,134 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################# +from pathlib import Path +from typing import TYPE_CHECKING, Dict, List + +from flink_agents.api.skills import Skills +from flink_agents.runtime.skill.agent_skill import AgentSkill +from flink_agents.runtime.skill.repository.filesystem_repository import ( + FileSystemSkillRepository, +) +from flink_agents.runtime.skill.skill_prompt_provider import SkillPromptProvider + +if TYPE_CHECKING: + from flink_agents.runtime.skill.skill_repository import SkillRepository + + +class SkillManager: + """Internal runtime component for loading, parsing, and managing skills. + + Created by the runtime from a :class:`Skills` configuration resource. + Never exposed to users directly. + + Progressive Disclosure: + - Discovery: Load only name/description at startup (~100 tokens) + - Activation: Load full SKILL.md when skill matches task + - Execution: Load resources/scripts only when needed + """ + + def __init__(self, skills_config: Skills) -> None: + """Initialize the SkillManager from a Skills configuration.""" + self._skills: Dict[str, AgentSkill] = {} + self._repos: Dict[str, SkillRepository] = {} + self._config = skills_config + self._load_skills_from_paths() + + @property + def size(self) -> int: + """Get the number of registered skills.""" + return len(self._skills) + + def get_skill(self, name: str) -> AgentSkill: + """Get a registered skill by name.""" + if name not in self._skills: + msg = f"Skill {name} not found, available skill names are: {list(self._skills.keys())}" + raise ValueError(msg) + return self._skills[name] + + def get_all_skill_names(self) -> List[str]: + """Get the names of all registered skills.""" + return list(self._skills.keys()) + + def load_skill_resource(self, skill_name: str, resource_path: str) -> str | None: + """Load a specified resource of a skill.""" + skill = self.get_skill(skill_name) + return skill.get_resource(resource_path) + + def generate_discovery_prompt(self, *names: str) -> str: + """Generate a system prompt for skill discovery.""" + if self.size == 0: + return "" + + skill_list = [] + for name in names: + skill = self.get_skill(name) + skill_list.append( + SkillPromptProvider.AVAILABLE_SKILL_TEMPLATE.format( + name=skill.name, description=skill.description + ) + ) + + return ( + SkillPromptProvider.SKILL_DISCOVERY_PROMPT.format() + + ("".join(skill_list)) + + SkillPromptProvider.AVAILABLE_SKILLS_TAG_END + ) + + def get_skill_dirs(self, *names: str) -> List[str]: + """Return absolute directory paths for the given skill names. + + If no names are provided, returns directories for all filesystem-backed + skills. Unknown names and skills not backed by a filesystem repo are + silently skipped. + """ + selected = names if names else tuple(self._repos.keys()) + dirs: List[str] = [] + for skill_name in selected: + repo = self._repos.get(skill_name) + if isinstance(repo, FileSystemSkillRepository): + dirs.append(str(repo.base_dir / skill_name)) + return dirs + + def get_skill_dir(self, skill_name: str) -> Path | None: + """Return absolute directory path for a single skill, if filesystem-backed.""" + repo = self._repos.get(skill_name) + if isinstance(repo, FileSystemSkillRepository): + return repo.base_dir / skill_name + return None + + def resolve_resource_path(self, skill_name: str, resource_path: str) -> Path | None: + """Resolve a skill resource's relative path to an absolute filesystem path. + + Returns None if the skill's repository doesn't support path resolution. + """ + repo = self._repos.get(skill_name) + if isinstance(repo, FileSystemSkillRepository): + resolved = repo.base_dir / skill_name / resource_path + if resolved.exists() and resolved.is_file(): + return resolved + return None + + def _load_skills_from_paths(self) -> None: + for path in self._config.paths: + repo = FileSystemSkillRepository(path) + for skill in repo.get_skills(): + skill.set_resource_loader( + lambda name=skill.name, r=repo: r.get_resources(name) + ) + self._skills[skill.name] = skill + self._repos[skill.name] = repo diff --git a/python/flink_agents/runtime/skill/skill_parser.py b/python/flink_agents/runtime/skill/skill_parser.py new file mode 100644 index 00000000..3683c2a1 --- /dev/null +++ b/python/flink_agents/runtime/skill/skill_parser.py @@ -0,0 +1,143 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +import re +from dataclasses import dataclass +from typing import Dict + +import yaml + +from flink_agents.runtime.skill.agent_skill import AgentSkill + + +@dataclass +class ParsedMarkdown: + """Result of parsing markdown with frontmatter. + + Contains both the extracted metadata and the markdown content. + + Attributes: + ---------- + metadata : Dict[str, str] + YAML metadata extracted from frontmatter. + content : str + Markdown content (without frontmatter). + """ + + metadata: Dict[str, str] + content: str + + +class MarkdownSkillParser: + """Utility for parsing Markdown files with YAML frontmatter.""" + + # Pattern to match frontmatter: starts with ---, ends with --- + # Group 1: frontmatter content (non-greedy) + # Group 2: remaining content + FRONTMATTER_PATTERN = re.compile( + r"^---\s*[\r\n]+(.*?)[\r\n]*---(?:\s*[\r\n]+)?(.*)", re.DOTALL + ) + + @classmethod + def parse(cls, markdown: str) -> ParsedMarkdown: + """Parse markdown content with YAML frontmatter. + + Extracts both the YAML metadata and the markdown content. + + Args: + markdown: Markdown content. + + Returns: + ParsedMarkdown containing metadata and content. + + Raises: + ValueError: If YAML syntax is invalid. + """ + if not markdown: + return ParsedMarkdown(metadata={}, content="") + + matcher = cls.FRONTMATTER_PATTERN.match(markdown) + + if not matcher: + # No frontmatter found, treat entire content as markdown + return ParsedMarkdown(metadata={}, content=markdown) + + yaml_content = matcher.group(1).strip() + markdown_content = matcher.group(2) + + if not yaml_content: + return ParsedMarkdown(metadata={}, content=markdown_content) + + try: + metadata = yaml.safe_load(yaml_content) + return ParsedMarkdown(metadata=metadata, content=markdown_content) + except ValueError: + raise + except Exception as e: + msg = f"Invalid YAML frontmatter syntax: {e}" + raise ValueError(msg) from e + + +class SkillParser: + """Parser for creating AgentSkill from SKILL.md content. + + This class provides high-level parsing functionality to create + AgentSkill instances from raw SKILL.md file content and resources. + """ + + @classmethod + def parse_skill( + cls, + skill_md_content: str, + ) -> "AgentSkill": + """Parse SKILL.md content and create an AgentSkill. + + Args: + skill_md_content: The raw content of SKILL.md file. + + Returns: + The parsed AgentSkill. + + Raises: + ValueError: If required fields (name, description) are missing. + """ + parsed = MarkdownSkillParser.parse(skill_md_content) + metadata = parsed.metadata + + # Required fields + name = metadata.get("name") + description = metadata.get("description") + + if not name: + msg = "The SKILL.md must have a YAML frontmatter including 'name' field." + raise ValueError(msg) + if not description: + msg = "The SKILL.md must have a YAML frontmatter including 'description' field." + raise ValueError(msg) + + if not parsed.content: + msg = "The SKILL.md must have a markdown content after YAML frontmatter." + raise ValueError(msg) + + return AgentSkill( + name=name.strip(), + description=description.strip(), + license=metadata.get("license"), + compatibility=metadata.get("compatibility"), + metadata=metadata.get("metadata"), + content=parsed.content, + ) diff --git a/python/flink_agents/runtime/skill/skill_prompt_provider.py b/python/flink_agents/runtime/skill/skill_prompt_provider.py new file mode 100644 index 00000000..4fd061f3 --- /dev/null +++ b/python/flink_agents/runtime/skill/skill_prompt_provider.py @@ -0,0 +1,52 @@ +################################################################################# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + + +class SkillPromptProvider: + """System prompt templates for skill discovery and activation. + + This class provides prompt templates used to generate system prompts + for LLM integration with skills. + """ + + # System prompt template for skills discovery + SKILL_DISCOVERY_PROMPT = """## Available Skills + +<usage> +Skills provide specialized capabilities and domain knowledge. Use them when they match your current task. + +Load a skill with `load_skill(name="<skill-name>")` to get its full instructions. +Individual resources (scripts, references, assets) can be loaded with a `path` argument. + +The loaded content includes the skill's base directory and the absolute paths of its resources. +</usage> + +<available_skills> +""" + + # System prompt template for available skills + AVAILABLE_SKILL_TEMPLATE = """ +<skill> +<name>{name}</name> +<description>{description}</description> +</skill> +""" + + AVAILABLE_SKILLS_TAG_END = """ +</available_skills> +""" diff --git a/python/flink_agents/runtime/skill/skill_repository.py b/python/flink_agents/runtime/skill/skill_repository.py new file mode 100644 index 00000000..1d2cd407 --- /dev/null +++ b/python/flink_agents/runtime/skill/skill_repository.py @@ -0,0 +1,84 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################# +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Dict, List + +from flink_agents.runtime.skill.agent_skill import AgentSkill + + +@dataclass +class SkillRepositoryInfo: + """Information about a skill repository. + + Attributes: + ---------- + repo_type : str + The type of repository (e.g., "filesystem", "classpath", "url"). + location : str + The location of the repository (e.g., path, URL). + writeable : bool + Whether the repository supports write operations. + """ + + repo_type: str + location: str + writeable: bool + + +class SkillRepository(ABC): + """Abstract interface for skill repositories. + + A SkillRepository is responsible for loading and optionally storing skills + from a specific source (filesystem, classpath, URL, etc.). + + Each skill is stored in its own subdirectory containing a SKILL.md file + and optional resource files: + + baseDir/ + βββ skill-name-1/ + β βββ SKILL.md # Required: Entry file with YAML frontmatter + β βββ references/ # Optional: Reference documentation + β βββ examples/ # Optional: Example files + β βββ scripts/ # Optional: Script files + βββ skill-name-2/ + βββ SKILL.md + """ + + @abstractmethod + def get_skill(self, name: str) -> str: + """Get a skill by name. + + Args: + name: The skill name. + + Returns: + The skill, or None if not found. + """ + + @abstractmethod + def get_skills(self) -> List[AgentSkill]: + """Get all skills in this repository. + + Returns: + List of all skills. + """ + + @abstractmethod + def get_resources(self, name: str) -> Dict[str, str]: + """Get resources for the specified skill.""" diff --git a/python/flink_agents/runtime/skill/skill_tools.py b/python/flink_agents/runtime/skill/skill_tools.py new file mode 100644 index 00000000..b5cb8ede --- /dev/null +++ b/python/flink_agents/runtime/skill/skill_tools.py @@ -0,0 +1,134 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# limitations under the License. +################################################################################ +"""Built-in tools for skill loading.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from flink_agents.runtime.skill.skill_manager import SkillManager + +from pydantic import BaseModel, Field + +from flink_agents.api.tools.tool import Tool, ToolMetadata, ToolType + +logger = logging.getLogger(__name__) + + +class LoadSkillArgs(BaseModel): + """Arguments for LoadSkillTool.""" + + name: str = Field( + ..., description="The name of the skill to load (e.g., 'pdf-processing')." + ) + path: str | None = Field( + default="SKILL.md", + description=( + "Optional path to a specific resource within the skill. " + "If not provided, returns the full SKILL.md content." + ), + ) + + +class LoadSkillTool(Tool): + """Tool for loading skill content and resources. + + Accesses the SkillManager through the runtime ResourceContext + (not the public ResourceContext interface). + """ + + metadata: ToolMetadata = Field(exclude=True) + + def __init__(self, **kwargs: Any) -> None: + """Initialize the load skill tool.""" + super().__init__( + metadata=ToolMetadata( + name="load_skill", + description=( + "Load a skill's content or a specific resource. " + "Use this to access skill instructions and resources. " + ), + args_schema=LoadSkillArgs, + ), + **kwargs, + ) + + @classmethod + def tool_type(cls) -> ToolType: + """Return tool type of class.""" + return ToolType.FUNCTION + + def call(self, *args: Any, **kwargs: Any) -> str: + """Call the tool to load a skill.""" + if args: + parsed_args = LoadSkillArgs(name=args[0], **kwargs) + else: + parsed_args = LoadSkillArgs(**kwargs) + + skill_name = parsed_args.name + resource_path = parsed_args.path + logger.debug(f"Loading skill resource {resource_path} for {skill_name}.") + + manager = self._get_skill_manager() + if manager is None: + return "Skill manager not available. No skills have been registered." + + try: + skill = manager.get_skill(skill_name) + except ValueError: + available = manager.get_all_skill_names() + available_str = ( + ", ".join(available) if available else "No skills available." + ) + return f"Skill '{skill_name}' not found. Available skills: {available_str}" + + if resource_path is None or resource_path == "SKILL.md": + skill_dir = manager.get_skill_dir(skill_name) + if skill_dir is not None: + file_lines = [ + f"<file>{skill_dir / rel}</file>" + for rel in sorted(skill.get_resource_paths()) + ] + files_section = "\n".join(file_lines) if file_lines else "" + return ( + f'<skill_content name="{skill_name}">\n' + f"# Skill: {skill_name}\n\n" + f"{skill.content.strip()}\n\n" + f"Base directory for this skill: {skill_dir}\n" + f"Relative paths in this skill are relative to this base directory.\n" + f"<skill_files>\n" + f"{files_section}\n" + f"</skill_files>\n" + f"</skill_content>" + ) + return skill.content + + content = skill.get_resource(resource_path) + if content is None: + available = sorted(skill.get_resource_paths()) + return f"Resource '{resource_path}' not found in skill '{skill_name}', Available resources: {available}" + return content + + def _get_skill_manager(self) -> SkillManager | None: + from flink_agents.runtime.resource_context import ResourceContextImpl + + ctx = self.resource_context + if isinstance(ctx, ResourceContextImpl): + return ctx.get_skill_manager() + return None diff --git a/python/flink_agents/api/resource_context.py b/python/flink_agents/runtime/skill/tests/__init__.py similarity index 61% copy from python/flink_agents/api/resource_context.py copy to python/flink_agents/runtime/skill/tests/__init__.py index d0508508..65b48d4d 100644 --- a/python/flink_agents/api/resource_context.py +++ b/python/flink_agents/runtime/skill/tests/__init__.py @@ -15,22 +15,3 @@ # See the License for the specific language governing permissions and # limitations under the License. ################################################################################ -"""Public ResourceContext interface. - -Defines the capabilities available to a Resource during execution. -The concrete implementation lives in :mod:`flink_agents.runtime.resource_context`. -""" - -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from flink_agents.api.resource import Resource, ResourceType - - -class ResourceContext(ABC): - """Base abstract class for Resource Context.""" - - @abstractmethod - def get_resource(self, name: str, resource_type: "ResourceType") -> "Resource": - """Get another resource declared in the same Agent.""" diff --git a/python/flink_agents/runtime/skill/tests/resources/NOTICE b/python/flink_agents/runtime/skill/tests/resources/NOTICE new file mode 100644 index 00000000..d8d80677 --- /dev/null +++ b/python/flink_agents/runtime/skill/tests/resources/NOTICE @@ -0,0 +1,8 @@ +This product includes skills created by Peter Steinberger. + +- github + - source: https://clawhub.ai/steipete/github + - license: MIT-0 +- nano-banana-pro + - source: https://clawhub.ai/steipete/nano-banana-pro + - license: MIT-0 diff --git a/python/flink_agents/runtime/skill/tests/resources/skill_discovery_prompt.txt b/python/flink_agents/runtime/skill/tests/resources/skill_discovery_prompt.txt new file mode 100644 index 00000000..11d2e6ac --- /dev/null +++ b/python/flink_agents/runtime/skill/tests/resources/skill_discovery_prompt.txt @@ -0,0 +1,24 @@ +## Available Skills + +<usage> +Skills provide specialized capabilities and domain knowledge. Use them when they match your current task. + +Load a skill with `load_skill(name="<skill-name>")` to get its full instructions. +Individual resources (scripts, references, assets) can be loaded with a `path` argument. + +The loaded content includes the skill's base directory and the absolute paths of its resources. +</usage> + +<available_skills> + +<skill> +<name>github</name> +<description>Interact with GitHub using the `gh` CLI. Use `gh issue`, `gh pr`, `gh run`, and `gh api` for issues, PRs, CI runs, and advanced queries.</description> +</skill> + +<skill> +<name>nano-banana-pro</name> +<description>Generate/edit images with Nano Banana Pro (Gemini 3 Pro Image). Use for image create/modify requests incl. edits. Supports text-to-image + image-to-image; 1K/2K/4K; use --input-image.</description> +</skill> + +</available_skills> diff --git a/python/flink_agents/runtime/skill/tests/resources/skills/github/SKILL.md b/python/flink_agents/runtime/skill/tests/resources/skills/github/SKILL.md new file mode 100644 index 00000000..2c9356ce --- /dev/null +++ b/python/flink_agents/runtime/skill/tests/resources/skills/github/SKILL.md @@ -0,0 +1,47 @@ +--- +name: github +description: "Interact with GitHub using the `gh` CLI. Use `gh issue`, `gh pr`, `gh run`, and `gh api` for issues, PRs, CI runs, and advanced queries." +--- + +# GitHub Skill + +Use the `gh` CLI to interact with GitHub. Always specify `--repo owner/repo` when not in a git directory, or use URLs directly. + +## Pull Requests + +Check CI status on a PR: +```bash +gh pr checks 55 --repo owner/repo +``` + +List recent workflow runs: +```bash +gh run list --repo owner/repo --limit 10 +``` + +View a run and see which steps failed: +```bash +gh run view <run-id> --repo owner/repo +``` + +View logs for failed steps only: +```bash +gh run view <run-id> --repo owner/repo --log-failed +``` + +## API for Advanced Queries + +The `gh api` command is useful for accessing data not available through other subcommands. + +Get PR with specific fields: +```bash +gh api repos/owner/repo/pulls/55 --jq '.title, .state, .user.login' +``` + +## JSON Output + +Most commands support `--json` for structured output. You can use `--jq` to filter: + +```bash +gh issue list --repo owner/repo --json number,title --jq '.[] | "\(.number): \(.title)"' +``` \ No newline at end of file diff --git a/python/flink_agents/runtime/skill/tests/resources/skills/nano-banana-pro/SKILL.md b/python/flink_agents/runtime/skill/tests/resources/skills/nano-banana-pro/SKILL.md new file mode 100644 index 00000000..711ee3ff --- /dev/null +++ b/python/flink_agents/runtime/skill/tests/resources/skills/nano-banana-pro/SKILL.md @@ -0,0 +1,130 @@ +--- +name: nano-banana-pro +description: Generate/edit images with Nano Banana Pro (Gemini 3 Pro Image). Use for image create/modify requests incl. edits. Supports text-to-image + image-to-image; 1K/2K/4K; use --input-image. +--- + +# Nano Banana Pro Image Generation & Editing + +Generate new images or edit existing ones using Google's Nano Banana Pro API (Gemini 3 Pro Image). + +## Usage + +Run the script using absolute path (do NOT cd to skill directory first): + +**Generate new image:** +```bash +uv run ~/.codex/skills/nano-banana-pro/scripts/generate_image.py --prompt "your image description" --filename "output-name.png" [--resolution 1K|2K|4K] [--api-key KEY] +``` + +**Edit existing image:** +```bash +uv run ~/.codex/skills/nano-banana-pro/scripts/generate_image.py --prompt "editing instructions" --filename "output-name.png" --input-image "path/to/input.png" [--resolution 1K|2K|4K] [--api-key KEY] +``` + +**Important:** Always run from the user's current working directory so images are saved where the user is working, not in the skill directory. + +## Default Workflow (draft β iterate β final) + +Goal: fast iteration without burning time on 4K until the prompt is correct. + +- Draft (1K): quick feedback loop + - `uv run ~/.codex/skills/nano-banana-pro/scripts/generate_image.py --prompt "<draft prompt>" --filename "yyyy-mm-dd-hh-mm-ss-draft.png" --resolution 1K` +- Iterate: adjust prompt in small diffs; keep filename new per run + - If editing: keep the same `--input-image` for every iteration until youβre happy. +- Final (4K): only when prompt is locked + - `uv run ~/.codex/skills/nano-banana-pro/scripts/generate_image.py --prompt "<final prompt>" --filename "yyyy-mm-dd-hh-mm-ss-final.png" --resolution 4K` + +## Resolution Options + +The Gemini 3 Pro Image API supports three resolutions (uppercase K required): + +- **1K** (default) - ~1024px resolution +- **2K** - ~2048px resolution +- **4K** - ~4096px resolution + +Map user requests to API parameters: +- No mention of resolution β `1K` +- "low resolution", "1080", "1080p", "1K" β `1K` +- "2K", "2048", "normal", "medium resolution" β `2K` +- "high resolution", "high-res", "hi-res", "4K", "ultra" β `4K` + +## API Key + +The script checks for API key in this order: +1. `--api-key` argument (use if user provided key in chat) +2. `GEMINI_API_KEY` environment variable + +If neither is available, the script exits with an error message. + +## Preflight + Common Failures (fast fixes) + +- Preflight: + - `command -v uv` (must exist) + - `test -n \"$GEMINI_API_KEY\"` (or pass `--api-key`) + - If editing: `test -f \"path/to/input.png\"` + +- Common failures: + - `Error: No API key provided.` β set `GEMINI_API_KEY` or pass `--api-key` + - `Error loading input image:` β wrong path / unreadable file; verify `--input-image` points to a real image + - βquota/permission/403β style API errors β wrong key, no access, or quota exceeded; try a different key/account + +## Filename Generation + +Generate filenames with the pattern: `yyyy-mm-dd-hh-mm-ss-name.png` + +**Format:** `{timestamp}-{descriptive-name}.png` +- Timestamp: Current date/time in format `yyyy-mm-dd-hh-mm-ss` (24-hour format) +- Name: Descriptive lowercase text with hyphens +- Keep the descriptive part concise (1-5 words typically) +- Use context from user's prompt or conversation +- If unclear, use random identifier (e.g., `x9k2`, `a7b3`) + +Examples: +- Prompt "A serene Japanese garden" β `2025-11-23-14-23-05-japanese-garden.png` +- Prompt "sunset over mountains" β `2025-11-23-15-30-12-sunset-mountains.png` +- Prompt "create an image of a robot" β `2025-11-23-16-45-33-robot.png` +- Unclear context β `2025-11-23-17-12-48-x9k2.png` + +## Image Editing + +When the user wants to modify an existing image: +1. Check if they provide an image path or reference an image in the current directory +2. Use `--input-image` parameter with the path to the image +3. The prompt should contain editing instructions (e.g., "make the sky more dramatic", "remove the person", "change to cartoon style") +4. Common editing tasks: add/remove elements, change style, adjust colors, blur background, etc. + +## Prompt Handling + +**For generation:** Pass user's image description as-is to `--prompt`. Only rework if clearly insufficient. + +**For editing:** Pass editing instructions in `--prompt` (e.g., "add a rainbow in the sky", "make it look like a watercolor painting") + +Preserve user's creative intent in both cases. + +## Prompt Templates (high hit-rate) + +Use templates when the user is vague or when edits must be precise. + +- Generation template: + - βCreate an image of: <subject>. Style: <style>. Composition: <camera/shot>. Lighting: <lighting>. Background: <background>. Color palette: <palette>. Avoid: <list>.β + +- Editing template (preserve everything else): + - βChange ONLY: <single change>. Keep identical: subject, composition/crop, pose, lighting, color palette, background, text, and overall style. Do not add new objects. If text exists, keep it unchanged.β + +## Output + +- Saves PNG to current directory (or specified path if filename includes directory) +- Script outputs the full path to the generated image +- **Do not read the image back** - just inform the user of the saved path + +## Examples + +**Generate new image:** +```bash +uv run ~/.codex/skills/nano-banana-pro/scripts/generate_image.py --prompt "A serene Japanese garden with cherry blossoms" --filename "2025-11-23-14-23-05-japanese-garden.png" --resolution 4K +``` + +**Edit existing image:** +```bash +uv run ~/.codex/skills/nano-banana-pro/scripts/generate_image.py --prompt "make the sky more dramatic with storm clouds" --filename "2025-11-23-14-25-30-dramatic-sky.png" --input-image "original-photo.jpg" --resolution 2K +``` diff --git a/python/flink_agents/runtime/skill/tests/resources/skills/nano-banana-pro/_meta.json b/python/flink_agents/runtime/skill/tests/resources/skills/nano-banana-pro/_meta.json new file mode 100644 index 00000000..9d100c1d --- /dev/null +++ b/python/flink_agents/runtime/skill/tests/resources/skills/nano-banana-pro/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn70pywhg0fyz996kpa8xj89s57yhv26", + "slug": "nano-banana-pro", + "version": "1.0.1", + "publishedAt": 1767651987917 +} \ No newline at end of file diff --git a/python/flink_agents/runtime/skill/tests/resources/skills/nano-banana-pro/scripts/generate_image.py b/python/flink_agents/runtime/skill/tests/resources/skills/nano-banana-pro/scripts/generate_image.py new file mode 100644 index 00000000..84176c3e --- /dev/null +++ b/python/flink_agents/runtime/skill/tests/resources/skills/nano-banana-pro/scripts/generate_image.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "google-genai>=1.0.0", +# "pillow>=10.0.0", +# ] +# /// +"""Generate images using Google's Nano Banana Pro (Gemini 3 Pro Image) API. + +Usage: + uv run generate_image.py --prompt "your image description" --filename "output.png" [--resolution 1K|2K|4K] [--api-key KEY] +""" + +import argparse +import os +import sys +from pathlib import Path + + +def get_api_key(provided_key: str | None) -> str | None: + """Get API key from argument first, then environment.""" + if provided_key: + return provided_key + return os.environ.get("GEMINI_API_KEY") + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)" + ) + parser.add_argument( + "--prompt", "-p", required=True, help="Image description/prompt" + ) + parser.add_argument( + "--filename", + "-f", + required=True, + help="Output filename (e.g., sunset-mountains.png)", + ) + parser.add_argument( + "--input-image", "-i", help="Optional input image path for editing/modification" + ) + parser.add_argument( + "--resolution", + "-r", + choices=["1K", "2K", "4K"], + default="1K", + help="Output resolution: 1K (default), 2K, or 4K", + ) + parser.add_argument( + "--api-key", "-k", help="Gemini API key (overrides GEMINI_API_KEY env var)" + ) + + args = parser.parse_args() + + # Get API key + api_key = get_api_key(args.api_key) + if not api_key: + print("Error: No API key provided.", file=sys.stderr) + print("Please either:", file=sys.stderr) + print(" 1. Provide --api-key argument", file=sys.stderr) + print(" 2. Set GEMINI_API_KEY environment variable", file=sys.stderr) + sys.exit(1) + + # Import here after checking API key to avoid slow import on error + from google import genai + from google.genai import types + from PIL import Image as PILImage + + # Initialise client + client = genai.Client(api_key=api_key) + + # Set up output path + output_path = Path(args.filename) + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Load input image if provided + input_image = None + output_resolution = args.resolution + if args.input_image: + try: + input_image = PILImage.open(args.input_image) + print(f"Loaded input image: {args.input_image}") + + # Auto-detect resolution if not explicitly set by user + if args.resolution == "1K": # Default value + # Map input image size to resolution + width, height = input_image.size + max_dim = max(width, height) + if max_dim >= 3000: + output_resolution = "4K" + elif max_dim >= 1500: + output_resolution = "2K" + else: + output_resolution = "1K" + print( + f"Auto-detected resolution: {output_resolution} (from input {width}x{height})" + ) + except Exception as e: + print(f"Error loading input image: {e}", file=sys.stderr) + sys.exit(1) + + # Build contents (image first if editing, prompt only if generating) + if input_image: + contents = [input_image, args.prompt] + print(f"Editing image with resolution {output_resolution}...") + else: + contents = args.prompt + print(f"Generating image with resolution {output_resolution}...") + + try: + response = client.models.generate_content( + model="gemini-3-pro-image-preview", + contents=contents, + config=types.GenerateContentConfig( + response_modalities=["TEXT", "IMAGE"], + image_config=types.ImageConfig(image_size=output_resolution), + ), + ) + + # Process response and convert to PNG + image_saved = False + for part in response.parts: + if part.text is not None: + print(f"Model response: {part.text}") + elif part.inline_data is not None: + # Convert inline data to PIL Image and save as PNG + from io import BytesIO + + # inline_data.data is already bytes, not base64 + image_data = part.inline_data.data + if isinstance(image_data, str): + # If it's a string, it might be base64 + import base64 + + image_data = base64.b64decode(image_data) + + image = PILImage.open(BytesIO(image_data)) + + # Ensure RGB mode for PNG (convert RGBA to RGB with white background if needed) + if image.mode == "RGBA": + rgb_image = PILImage.new("RGB", image.size, (255, 255, 255)) + rgb_image.paste(image, mask=image.split()[3]) + rgb_image.save(str(output_path), "PNG") + elif image.mode == "RGB": + image.save(str(output_path), "PNG") + else: + image.convert("RGB").save(str(output_path), "PNG") + image_saved = True + + if image_saved: + full_path = output_path.resolve() + print(f"\nImage saved: {full_path}") + else: + print("Error: No image was generated in the response.", file=sys.stderr) + sys.exit(1) + + except Exception as e: + print(f"Error generating image: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python/flink_agents/runtime/skill/tests/test_load_skill.py b/python/flink_agents/runtime/skill/tests/test_load_skill.py new file mode 100644 index 00000000..76db7583 --- /dev/null +++ b/python/flink_agents/runtime/skill/tests/test_load_skill.py @@ -0,0 +1,112 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +"""Tests for LoadSkillTool.""" + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from flink_agents.api.resource_context import ResourceContext +from flink_agents.api.skills import Skills +from flink_agents.runtime.skill.skill_manager import SkillManager +from flink_agents.runtime.skill.skill_tools import LoadSkillTool + +base_dir = Path(__file__).parent +skills_dir = base_dir / "resources" / "skills" + + [email protected] +def manager() -> SkillManager: + """SkillManager loaded with test skills.""" + return SkillManager(Skills.from_local_dir(str(skills_dir))) + + [email protected] +def tool(manager: SkillManager) -> LoadSkillTool: + """LoadSkillTool wired to a real SkillManager.""" + from flink_agents.runtime.resource_context import ResourceContextImpl + + runtime_ctx = MagicMock(spec=ResourceContextImpl) + runtime_ctx.get_skill_manager.return_value = manager + return LoadSkillTool(resource_context=runtime_ctx) + + +class TestLoadSkillTool: + """Tests for LoadSkillTool.call.""" + + # -- load SKILL.md content ----------------------------------------------- + + def test_load_skill_default_path(self, tool: LoadSkillTool) -> None: + """Default path (SKILL.md) returns the skill content with base path and file list.""" + result = tool.call(name="nano-banana-pro") + assert "<skill_content" in result + # Base directory is included. + expected_dir = str((skills_dir / "nano-banana-pro").resolve()) + assert f"Base directory for this skill: {expected_dir}" in result + # Absolute paths to resources are listed. + assert "<skill_files>" in result + assert f"{expected_dir}/scripts/generate_image.py" in result + + def test_load_skill_explicit_skill_md(self, tool: LoadSkillTool) -> None: + """Explicitly passing path='SKILL.md' returns the same content.""" + result = tool.call(name="github", path="SKILL.md") + assert "gh" in result + + def test_load_skill_path_none(self, tool: LoadSkillTool) -> None: + """path=None returns the skill content.""" + result = tool.call(name="github", path=None) + assert "gh" in result + + # -- load specific resource ---------------------------------------------- + + def test_load_resource(self, tool: LoadSkillTool) -> None: + """Loading a specific resource returns its content.""" + result = tool.call(name="nano-banana-pro", path="scripts/generate_image.py") + assert "get_api_key" in result + + def test_load_resource_not_found(self, tool: LoadSkillTool) -> None: + """Loading a nonexistent resource returns an error with available list.""" + result = tool.call(name="nano-banana-pro", path="nonexistent.txt") + assert "not found" in result.lower() + assert "scripts/generate_image.py" in result + + # -- skill not found ----------------------------------------------------- + + def test_skill_not_found(self, tool: LoadSkillTool) -> None: + """A nonexistent skill returns an error listing available skills.""" + result = tool.call(name="nonexistent-skill") + assert "not found" in result.lower() + assert "github" in result + assert "nano-banana-pro" in result + + # -- no skill manager ---------------------------------------------------- + + def test_no_skill_manager(self) -> None: + """When no SkillManager is available, return error message.""" + mock_ctx = MagicMock(spec=ResourceContext) + tool = LoadSkillTool(resource_context=mock_ctx) + result = tool.call(name="github") + assert "not available" in result + + # -- positional args ----------------------------------------------------- + + def test_call_with_positional_args(self, tool: LoadSkillTool) -> None: + """call() should accept positional arg for skill name.""" + result = tool.call("github") + assert "gh" in result diff --git a/python/flink_agents/runtime/skill/tests/test_manager.py b/python/flink_agents/runtime/skill/tests/test_manager.py new file mode 100644 index 00000000..3e4cb253 --- /dev/null +++ b/python/flink_agents/runtime/skill/tests/test_manager.py @@ -0,0 +1,88 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +"""Unit tests for SkillManager and skill tools.""" + +from pathlib import Path + +import pytest + +from flink_agents.api.skills import Skills +from flink_agents.runtime.skill.skill_manager import SkillManager + +base_dir = Path(__file__).parent + + +class TestSkillManager: + """Tests for SkillManager class.""" + + @pytest.fixture + def skills_dir(self) -> Path: + """Create a temporary directory with test skills.""" + return base_dir / "resources" / "skills" + + def test_add_skills_from_path(self, skills_dir: Path) -> None: + """Test adding skills from path.""" + manager = SkillManager(Skills.from_local_dir(str(skills_dir))) + skill = manager.get_skill("github") + assert skill.name == "github" + assert skill.description == ( + "Interact with GitHub using the `gh` CLI. " + "Use `gh issue`, `gh pr`, `gh run`, and `gh api` " + "for issues, PRs, CI runs, and advanced queries." + ) + + def test_generate_discovery_prompt(self, skills_dir: Path) -> None: + """Test generating discovery prompt.""" + manager = SkillManager(Skills.from_local_dir(str(skills_dir))) + + prompt = manager.generate_discovery_prompt("github", "nano-banana-pro") + with Path.open(base_dir / "resources" / "skill_discovery_prompt.txt") as f: + content = f.read() + assert prompt == content + + def test_get_skill(self, skills_dir: Path) -> None: + """Test getting a skill.""" + manager = SkillManager(Skills.from_local_dir(str(skills_dir))) + + skill = manager.get_skill("github") + assert skill is not None + assert skill.name == "github" + assert ( + skill.description + == "Interact with GitHub using the `gh` CLI. Use `gh issue`, `gh pr`, `gh run`, and `gh api` for issues, PRs, CI runs, and advanced queries." + ) + + skill2 = manager.get_skill("nano-banana-pro") + assert skill2 is not None + assert skill2.name == "nano-banana-pro" + assert ( + skill2.description + == "Generate/edit images with Nano Banana Pro (Gemini 3 Pro Image). Use for image create/modify requests incl. edits. Supports text-to-image + image-to-image; 1K/2K/4K; use --input-image." + ) + + def test_load_skill_resource(self, skills_dir: Path) -> None: + """Test loading a skill resource.""" + manager = SkillManager(Skills.from_local_dir(str(skills_dir))) + + skill = manager.get_skill("nano-banana-pro") + content = skill.get_resource("scripts/generate_image.py") + assert content is not None + assert "get_api_key" in content + + nonexistent = skill.get_resource("nonexistent") + assert nonexistent is None diff --git a/python/flink_agents/runtime/skill/tests/test_skill_parser.py b/python/flink_agents/runtime/skill/tests/test_skill_parser.py new file mode 100644 index 00000000..317e2b96 --- /dev/null +++ b/python/flink_agents/runtime/skill/tests/test_skill_parser.py @@ -0,0 +1,166 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +"""Unit tests for Agent Skill components.""" + +from pathlib import Path + +import pytest + +from flink_agents.runtime.skill.skill_parser import ( + MarkdownSkillParser, + SkillParser, +) + +base_dir = Path(__file__).parent + + +class TestMarkdownSkillParser: + """Tests for MarkdownSkillParser class.""" + + def test_parse_with_frontmatter(self) -> None: + """Test parsing markdown with frontmatter.""" + markdown = """--- +name: my-skill +description: A test skill +--- +# My Skill + +This is the skill content. +""" + parsed = MarkdownSkillParser.parse(markdown) + assert parsed.metadata["name"] == "my-skill" + assert parsed.metadata["description"] == "A test skill" + assert parsed.content == "# My Skill\n\nThis is the skill content.\n" + + def test_parse_without_frontmatter(self) -> None: + """Test parsing markdown without frontmatter.""" + markdown = "# My Skill\n\nThis is just markdown." + parsed = MarkdownSkillParser.parse(markdown) + assert parsed.metadata == {} + assert markdown in parsed.content + + def test_parse_empty(self) -> None: + """Test parsing empty content.""" + parsed = MarkdownSkillParser.parse("") + assert parsed.metadata == {} + assert parsed.content == "" + + def test_parse_with_optional_fields(self) -> None: + """Test parsing with optional frontmatter fields.""" + markdown = """--- +name: pdf-processing +description: PDF skill +license: Apache-2.0 +compatibility: Requires python3 +metadata: + author: example-org + version: "1.0" +allowed-tools: Bash(git:*) Read +--- +Content here. +""" + parsed = MarkdownSkillParser.parse(markdown) + assert parsed.metadata["name"] == "pdf-processing" + assert parsed.metadata["license"] == "Apache-2.0" + assert parsed.metadata["compatibility"] == "Requires python3" + assert parsed.metadata["metadata"] == { + "author": "example-org", + "version": "1.0", + } + assert parsed.metadata["allowed-tools"] == "Bash(git:*) Read" + assert parsed.content == "Content here.\n" + + def test_parse_quoted_values(self) -> None: + """Test parsing quoted YAML values.""" + markdown = """--- +name: test +description: "A description with: colons" +--- +Content +""" + parsed = MarkdownSkillParser.parse(markdown) + assert parsed.metadata["description"] == "A description with: colons" + + def test_parse_skill(self) -> None: + with Path.open(base_dir / "resources" / "skills" / "github" / "SKILL.md") as f: + markdown = f.read() + parsed = MarkdownSkillParser.parse(markdown) + assert parsed.metadata["name"] == "github" + assert ( + parsed.metadata["description"] + == "Interact with GitHub using the `gh` CLI. Use `gh issue`, " + "`gh pr`, `gh run`, and `gh api` for issues, PRs, CI runs, " + "and advanced queries." + ) + + +class TestSkillParser: + """Tests for SkillParser class.""" + + def test_parse_skill(self) -> None: + """Test parsing a complete skill.""" + skill_md = """--- +name: my-skill +description: A test skill +license: Apache-2.0 +--- +# My Skill + +Instructions here. +""" + skill = SkillParser.parse_skill(skill_md) + + assert skill.name == "my-skill" + assert skill.description == "A test skill" + assert skill.license == "Apache-2.0" + assert "Instructions here" in skill.content + + def test_parse_skill_missing_name(self) -> None: + """Test parsing skill without name.""" + skill_md = """--- +description: A test skill +--- +Content +""" + with pytest.raises(ValueError): + SkillParser.parse_skill(skill_md) + + def test_parse_skill_missing_description(self) -> None: + """Test parsing skill without description.""" + skill_md = """--- +name: test +--- +Content +""" + with pytest.raises(ValueError): + SkillParser.parse_skill(skill_md) + + def test_parse_skill_missing_frontmatter(self) -> None: + skill_md = "# My Skill\n\nThis is just markdown." + with pytest.raises(ValueError): + SkillParser.parse_skill(skill_md) + + def test_parse_skill_missing_content(self) -> None: + skill_md = """--- +name: my-skill +description: A test skill +license: Apache-2.0 +--- +""" + with pytest.raises(ValueError): + SkillParser.parse_skill(skill_md) diff --git a/python/flink_agents/runtime/skill/tests/test_skill_repository.py b/python/flink_agents/runtime/skill/tests/test_skill_repository.py new file mode 100644 index 00000000..8a0f1bd2 --- /dev/null +++ b/python/flink_agents/runtime/skill/tests/test_skill_repository.py @@ -0,0 +1,96 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +"""Unit tests for Skill Repository components.""" + +from pathlib import Path + +import pytest + +from flink_agents.runtime.skill.repository.filesystem_repository import ( + FileSystemSkillRepository, +) + + +class TestFileSystemSkillRepository: + """Tests for FileSystemSkillRepository class.""" + + @pytest.fixture + def skills_dir(self) -> Path: + """Get the skills' directory.""" + base_dir = Path(__file__).parent + return base_dir / "resources" / "skills" + + def test_create_repository(self, skills_dir: Path) -> None: + """Test creating a repository.""" + repo = FileSystemSkillRepository(skills_dir) + # Path is resolved, so compare resolved paths + assert repo.base_dir == skills_dir.resolve() + + def test_create_repository_invalid_path(self) -> None: + """Test creating repository with invalid path.""" + with pytest.raises(ValueError): + FileSystemSkillRepository("/nonexistent/path") + + def test_get_all_skill_names(self, skills_dir: Path) -> None: + """Test getting all skill names.""" + repo = FileSystemSkillRepository(skills_dir) + names = repo._get_all_skill_names() + assert len(names) == 2 + assert "github" in names + assert "nano-banana-pro" in names + + def test_get_skill(self, skills_dir: Path) -> None: + """Test getting a specific skill.""" + repo = FileSystemSkillRepository(skills_dir) + skill = repo.get_skill("github") + + assert skill is not None + assert skill.name == "github" + assert ( + skill.description + == "Interact with GitHub using the `gh` CLI. Use `gh issue`, `gh pr`, `gh run`, and `gh api` for issues, PRs, CI runs, and advanced queries." + ) + assert "## JSON Output" in skill.content + + def test_get_resources(self, skills_dir: Path) -> None: + """Test getting a skill with resources.""" + repo = FileSystemSkillRepository(skills_dir) + skill = repo.get_skill("nano-banana-pro") + + assert skill is not None + skill.resources = repo.get_resources("nano-banana-pro") + assert "_meta.json" in skill.resources + + resource = skill.get_resource("scripts/generate_image.py") + assert resource is not None + assert "get_api_key" in resource + + def test_get_nonexistent_skill(self, skills_dir: Path) -> None: + """Test getting a nonexistent skill.""" + repo = FileSystemSkillRepository(skills_dir) + skill = repo.get_skill("nonexistent") + assert skill is None + + def test_get_all_skills(self, skills_dir: Path) -> None: + """Test getting all skills.""" + repo = FileSystemSkillRepository(skills_dir) + skills = repo.get_skills() + assert len(skills) == 2 + names = {s.name for s in skills} + assert "github" in names + assert "nano-banana-pro" in names diff --git a/tools/.rat-excludes b/tools/.rat-excludes index bea5f93f..ff986781 100644 --- a/tools/.rat-excludes +++ b/tools/.rat-excludes @@ -16,4 +16,5 @@ PULL_REQUEST_TEMPLATE.md .pytest_cache/* .ruff_cache/* .*\.egg-info/* -licenses/* \ No newline at end of file +licenses/* +skills/* \ No newline at end of file
