# -*- coding: utf-8 -*- import os import fire from typing import List from agentscope.model import DashScopeChatModel from agentscope.formatter import DashScopeChatFormatter from agentscope.memory import InMemoryMemory from agentscope.agent import UserAgent from agentscope.tool import Toolkit from agent_factory import create_agent from prompts import DJ_SYS_PROMPT, DJ_DEV_SYS_PROMPT, ROUTER_SYS_PROMPT, MCP_SYS_PROMPT from tools import dj_toolkit, dj_dev_toolkit, mcp_tools, get_mcp_toolkit, agents2toolkit # Create shared configuration model = DashScopeChatModel( model_name="qwen-max", api_key=os.environ["DASHSCOPE_API_KEY"], stream=True, enable_thinking=False, ) dev_model = DashScopeChatModel( model_name="qwen3-coder-480b-a35b-instruct", api_key=os.environ["DASHSCOPE_API_KEY"], stream=True, enable_thinking=False, ) formatter = DashScopeChatFormatter() memory = InMemoryMemory() user = UserAgent("User") async def main( use_studio: bool = False, available_agents: List[str] = ["dj", "dj_dev"], retrieval_mode: str = "auto", ): """ Main function for running the agent. :param use_studio: Whether to use agentscope studio. :param available_agents: List of available agents. Options: dj, dj_dev, dj_mcp :param retrieval_mode: Retrieval mode for operators. Options: auto, vector, llm """ if "dj" in available_agents: # Set global retrieval mode for tools to use os.environ["RETRIEVAL_MODE"] = retrieval_mode print(f"Using retrieval mode: {retrieval_mode}") agents = [] for agent_name in available_agents: if agent_name == "dj": # Create agents using unified create_agent function dj_agent = create_agent( "datajuicer_agent", DJ_SYS_PROMPT, dj_toolkit, ( "A professional data preprocessing AI assistant with the following core capabilities: \n" "Tool Matching \n" "- Query and validate suitable DataJuicer operators; \n" "Configuration Generation \n" "- Create YAML configuration files and preview data; \n" "Task Execution - Run data processing pipelines and output results" ), model, formatter, memory, ) agents.append(dj_agent) if agent_name == "dj_dev": # DJ Development Agent for operator development dj_dev_agent = create_agent( "dj_dev_agent", DJ_DEV_SYS_PROMPT, dj_dev_toolkit, ( "An expert DataJuicer development assistant specializing in creating new DataJuicer operators. \n" "Core capabilities: \n" "Reference Retrieval - fetch base classes and examples; \n" "Environment Configuration - handle DATA_JUICER_PATH setup. if user provides a DataJuicer path requiring setup/update, please call this agent;\n; " "Code Generation - write complete, convention-compliant operator code" ), dev_model, formatter, memory, ) agents.append(dj_dev_agent) if agent_name == "dj_mcp": mcp_toolkit, _ = await get_mcp_toolkit() for tool in mcp_tools: mcp_toolkit.register_tool_function(tool) mcp_agent = create_agent( "mcp_datajuicer_agent", MCP_SYS_PROMPT, mcp_toolkit, ( "DataJuicer MCP Agent powered by Recipe Flow MCP server. \n" "Core capabilities: \n" "- Filter operators by tags/categories using MCP protocol; \n" "- Real-time data processing pipeline execution. \n" ), model, formatter, memory, ) agents.append(mcp_agent) # Router agent - uses agents2tools to dynamically generate tools from all agents router_agent = create_agent( "Router", ROUTER_SYS_PROMPT, agents2toolkit(agents), "A router agent that intelligently routes tasks to specialized DataJuicer agents", model, formatter, InMemoryMemory(), # Router uses its own memory instance ) if use_studio is True: import agentscope agentscope.init( studio_url="http://localhost:3000", project="data_agent", ) msg = None while True: msg = await user(msg) if msg.get_text_content() == "exit": break # Router agent handles the entire task with automatic multi-step routing msg = await router_agent(msg) if __name__ == "__main__": # Example tasks # project_root = os.path.abspath(os.path.dirname(__file__)) # task = f"数据存储在{project_root}/data/demo-dataset-images.jsonl,筛选掉样本中,文本字段长度小于5的样本,以及图片size小于100Kb的样本。并将输出结果保存到./outputs路径下。" # # DJ Development example task: # task = "我想开发一个新的DataJuicer过滤算子,用于过滤掉没有人声的音频文件" # # MCP Agent will be automatically selected for advanced processing tasks fire.Fire(main)