155 lines
5.4 KiB
Python
155 lines
5.4 KiB
Python
# -*- coding: utf-8 -*-
|
||
import os
|
||
import fire
|
||
from typing import List
|
||
|
||
from agentscope.model import DashScopeChatModel
|
||
from agentscope.formatter import DashScopeChatFormatter
|
||
from agentscope.memory import InMemoryMemory
|
||
from agentscope.agent import UserAgent
|
||
from agentscope.tool import Toolkit
|
||
|
||
from agent_factory import create_agent
|
||
from prompts import DJ_SYS_PROMPT, DJ_DEV_SYS_PROMPT, ROUTER_SYS_PROMPT, MCP_SYS_PROMPT
|
||
from tools import dj_toolkit, dj_dev_toolkit, mcp_tools, get_mcp_toolkit, agents2toolkit
|
||
|
||
# Create shared configuration
|
||
model = DashScopeChatModel(
|
||
model_name="qwen-max",
|
||
api_key=os.environ["DASHSCOPE_API_KEY"],
|
||
stream=True,
|
||
enable_thinking=False,
|
||
)
|
||
|
||
dev_model = DashScopeChatModel(
|
||
model_name="qwen3-coder-480b-a35b-instruct",
|
||
api_key=os.environ["DASHSCOPE_API_KEY"],
|
||
stream=True,
|
||
enable_thinking=False,
|
||
)
|
||
|
||
formatter = DashScopeChatFormatter()
|
||
memory = InMemoryMemory()
|
||
|
||
user = UserAgent("User")
|
||
|
||
|
||
async def main(
|
||
use_studio: bool = False,
|
||
available_agents: List[str] = ["dj", "dj_dev"],
|
||
retrieval_mode: str = "auto",
|
||
):
|
||
"""
|
||
Main function for running the agent.
|
||
|
||
:param use_studio: Whether to use agentscope studio.
|
||
:param available_agents: List of available agents. Options: dj, dj_dev, dj_mcp
|
||
:param retrieval_mode: Retrieval mode for operators. Options: auto, vector, llm
|
||
"""
|
||
|
||
if "dj" in available_agents:
|
||
# Set global retrieval mode for tools to use
|
||
os.environ["RETRIEVAL_MODE"] = retrieval_mode
|
||
print(f"Using retrieval mode: {retrieval_mode}")
|
||
|
||
agents = []
|
||
for agent_name in available_agents:
|
||
if agent_name == "dj":
|
||
# Create agents using unified create_agent function
|
||
dj_agent = create_agent(
|
||
"datajuicer_agent",
|
||
DJ_SYS_PROMPT,
|
||
dj_toolkit,
|
||
(
|
||
"A professional data preprocessing AI assistant with the following core capabilities: \n"
|
||
"Tool Matching \n"
|
||
"- Query and validate suitable DataJuicer operators; \n"
|
||
"Configuration Generation \n"
|
||
"- Create YAML configuration files and preview data; \n"
|
||
"Task Execution - Run data processing pipelines and output results"
|
||
),
|
||
model,
|
||
formatter,
|
||
memory,
|
||
)
|
||
agents.append(dj_agent)
|
||
|
||
if agent_name == "dj_dev":
|
||
# DJ Development Agent for operator development
|
||
dj_dev_agent = create_agent(
|
||
"dj_dev_agent",
|
||
DJ_DEV_SYS_PROMPT,
|
||
dj_dev_toolkit,
|
||
(
|
||
"An expert DataJuicer development assistant specializing in creating new DataJuicer operators. \n"
|
||
"Core capabilities: \n"
|
||
"Reference Retrieval - fetch base classes and examples; \n"
|
||
"Environment Configuration - handle DATA_JUICER_PATH setup. if user provides a DataJuicer path requiring setup/update, please call this agent;\n; "
|
||
"Code Generation - write complete, convention-compliant operator code"
|
||
),
|
||
dev_model,
|
||
formatter,
|
||
memory,
|
||
)
|
||
agents.append(dj_dev_agent)
|
||
|
||
if agent_name == "dj_mcp":
|
||
mcp_toolkit, _ = await get_mcp_toolkit()
|
||
for tool in mcp_tools:
|
||
mcp_toolkit.register_tool_function(tool)
|
||
|
||
mcp_agent = create_agent(
|
||
"mcp_datajuicer_agent",
|
||
MCP_SYS_PROMPT,
|
||
mcp_toolkit,
|
||
(
|
||
"DataJuicer MCP Agent powered by Recipe Flow MCP server. \n"
|
||
"Core capabilities: \n"
|
||
"- Filter operators by tags/categories using MCP protocol; \n"
|
||
"- Real-time data processing pipeline execution. \n"
|
||
),
|
||
model,
|
||
formatter,
|
||
memory,
|
||
)
|
||
agents.append(mcp_agent)
|
||
|
||
# Router agent - uses agents2tools to dynamically generate tools from all agents
|
||
router_agent = create_agent(
|
||
"Router",
|
||
ROUTER_SYS_PROMPT,
|
||
agents2toolkit(agents),
|
||
"A router agent that intelligently routes tasks to specialized DataJuicer agents",
|
||
model,
|
||
formatter,
|
||
InMemoryMemory(), # Router uses its own memory instance
|
||
)
|
||
|
||
if use_studio is True:
|
||
import agentscope
|
||
|
||
agentscope.init(
|
||
studio_url="http://localhost:3000",
|
||
project="data_agent",
|
||
)
|
||
|
||
msg = None
|
||
while True:
|
||
msg = await user(msg)
|
||
if msg.get_text_content() == "exit":
|
||
break
|
||
# Router agent handles the entire task with automatic multi-step routing
|
||
msg = await router_agent(msg)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# Example tasks
|
||
# project_root = os.path.abspath(os.path.dirname(__file__))
|
||
# task = f"数据存储在{project_root}/data/demo-dataset-images.jsonl,筛选掉样本中,文本字段长度小于5的样本,以及图片size小于100Kb的样本。并将输出结果保存到./outputs路径下。"
|
||
#
|
||
# DJ Development example task:
|
||
# task = "我想开发一个新的DataJuicer过滤算子,用于过滤掉没有人声的音频文件"
|
||
#
|
||
# MCP Agent will be automatically selected for advanced processing tasks
|
||
fire.Fire(main)
|