init

2025-10-17 21:40:45 +08:00
commit 7d0451131f
155 changed files with 14873 additions and 0 deletions
--- a/tests/agent_deep_research_test.py
+++ b/tests/agent_deep_research_test.py
@@ -0,0 +1,287 @@
+# -*- coding: utf-8 -*-
+import os
+import shutil
+import tempfile
+from unittest.mock import Mock, patch
+
+import pytest
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.mcp import StdIOStatefulClient
+from agentscope.memory import InMemoryMemory
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+
+from deep_research.agent_deep_research.deep_research_agent import (
+    DeepResearchAgent,
+)
+
+# Import the main function to be tested
+from deep_research.agent_deep_research.main import main
+
+
+@pytest.fixture
+def mock_env_vars(monkeypatch):
+    """Fixture to set required environment variables"""
+    monkeypatch.setenv("TAVILY_API_KEY", "test_tavily_key")
+    monkeypatch.setenv("DASHSCOPE_API_KEY", "test_dashscope_key")
+    return {
+        "TAVILY_API_KEY": "test_tavily_key",
+        "DASHSCOPE_API_KEY": "test_dashscope_key",
+    }
+
+
+@pytest.fixture
+def temp_working_dir():
+    """Create a temporary working directory"""
+    temp_dir = tempfile.mkdtemp()
+    yield temp_dir
+    shutil.rmtree(temp_dir)
+
+
+@pytest.fixture
+def mock_tavily_client():
+    """Create a mocked Tavily client"""
+    client = Mock(spec=StdIOStatefulClient)
+    client.name = "tavily_mcp"
+    client.connect = AsyncMock()
+    client.close = AsyncMock()
+    return client
+
+
+@pytest.fixture
+def mock_formatter():
+    """Create a mocked formatter"""
+    return Mock(spec=DashScopeChatFormatter)
+
+
+@pytest.fixture
+def mock_memory():
+    """Create a mocked memory instance"""
+    return Mock(spec=InMemoryMemory)
+
+
+@pytest.fixture
+def mock_model():
+    """Create a mocked model instance"""
+    model = Mock(spec=DashScopeChatModel)
+    model.call = AsyncMock(return_value=Mock(content="test response"))
+    return model
+
+
+@pytest.fixture
+def mock_agent(mock_model, mock_formatter, mock_memory, mock_tavily_client):
+    """Create a mocked DeepResearchAgent instance"""
+    agent = Mock(spec=DeepResearchAgent)
+    agent.return_value = agent  # Make the mock instance return itself
+    agent.model = mock_model
+    agent.formatter = mock_formatter
+    agent.memory = mock_memory
+    agent.search_mcp_client = mock_tavily_client
+    return agent
+
+
+class AsyncMock(Mock):
+    """Helper class for async mocks"""
+
+    async def __call__(self, *args, **kwargs):
+        return super().__call__(*args, **kwargs)
+
+
+class TestDeepResearchAgent:
+    """Test suite for Deep Research Agent functionality"""
+
+    def test_agent_initialization(
+        self,
+        mock_model,
+        mock_tavily_client,
+        temp_working_dir,
+    ):
+        """Test agent initialization with valid parameters"""
+        agent = DeepResearchAgent(
+            name="Friday",
+            sys_prompt="You are a helpful assistant named Friday.",
+            model=mock_model,
+            formatter=DashScopeChatFormatter(),
+            memory=InMemoryMemory(),
+            search_mcp_client=mock_tavily_client,
+            tmp_file_storage_dir=temp_working_dir,
+        )
+
+        assert agent.name == "Friday"
+        assert agent.sys_prompt == "You are a helpful assistant named Friday."
+        assert agent.tmp_file_storage_dir == temp_working_dir
+        assert os.path.exists(temp_working_dir)
+
+    @pytest.mark.asyncio
+    async def test_main_function_success(
+        self,
+        mock_env_vars,
+        mock_tavily_client,
+        mock_model,
+        temp_working_dir,
+    ):
+        """Test main function with successful execution"""
+        # Mock the StdIOStatefulClient constructor
+        with patch(
+            "deep_research.agent_deep_research.main.StdIOStatefulClient",
+            return_value=mock_tavily_client,
+        ):
+            # Mock the DeepResearchAgent constructor
+            with patch(
+                "deep_research.agent_deep_research.main.DeepResearchAgent",
+                autospec=True,
+            ) as mock_agent_class:
+                mock_agent_instance = Mock()
+                mock_agent_instance.return_value = mock_agent_instance
+                mock_agent_instance.__call__ = AsyncMock(
+                    return_value=Msg("Friday", "Test response", "assistant"),
+                )
+                mock_agent_class.return_value = mock_agent_instance
+
+                # Mock os.makedirs
+                with patch("os.makedirs") as mock_makedirs:
+                    # Run the main function with a test query
+                    test_query = "Test research question"
+                    msg = Msg("Bob", test_query, "user")
+
+                    await main(test_query)
+
+                    # Verify initialization calls
+                    mock_makedirs.assert_called_once_with(
+                        temp_working_dir,
+                        exist_ok=True,
+                    )
+                    mock_agent_class.assert_called_once()
+
+                    # Verify agent was called with the correct message
+                    mock_agent_instance.__call__.assert_called_once_with(msg)
+
+    @pytest.mark.asyncio
+    async def test_main_function_with_missing_env_vars(self):
+        """Test main function handles missing environment variables"""
+        # Test missing Tavily API key
+        with patch.dict(os.environ, clear=True):
+            with pytest.raises(Exception):
+                await main("Test query")
+
+    @pytest.mark.asyncio
+    async def test_main_function_connection_failure(
+        self,
+        mock_env_vars,
+        temp_working_dir,
+    ):
+        """Test main function handles connection failures"""
+        # Mock the StdIOStatefulClient to raise an exception
+        with patch(
+            "deep_research.agent_deep_research.main.StdIOStatefulClient",
+        ) as mock_client:
+            mock_client_instance = Mock()
+            mock_client_instance.connect = AsyncMock(
+                side_effect=Exception("Connection failed"),
+            )
+            mock_client.return_value = mock_client_instance
+
+            # Run the main function and expect exception
+            with pytest.raises(Exception) as exc_info:
+                await main("Test query")
+
+            assert "Connection failed" in str(exc_info.value)
+
+    @pytest.mark.asyncio
+    async def test_agent_cleanup(
+        self,
+        mock_env_vars,
+        mock_tavily_client,
+    ):
+        """Test proper cleanup of resources"""
+        with patch(
+            "deep_research.agent_deep_research.main.StdIOStatefulClient",
+            return_value=mock_tavily_client,
+        ):
+            # Run main function
+            await main("Test query")
+
+            # Verify client close was called
+            mock_tavily_client.close.assert_called_once()
+
+    def test_working_directory_creation(self, temp_working_dir):
+        """Test working directory is created correctly"""
+        test_dir = os.path.join(temp_working_dir, "test_subdir")
+
+        # Test directory creation
+        os.makedirs(test_dir, exist_ok=True)
+        assert os.path.exists(test_dir)
+
+        # Test exist_ok=True behavior
+        os.makedirs(test_dir, exist_ok=True)  # Should not raise error
+
+
+class TestErrorHandling:
+    """Test suite for error handling scenarios"""
+
+    @pytest.mark.asyncio
+    async def test_model_failure(self, mock_env_vars, mock_tavily_client):
+        """Test handling of model failures"""
+        with patch(
+            "deep_research.agent_deep_research.main.StdIOStatefulClient",
+            return_value=mock_tavily_client,
+        ):
+            with patch(
+                "deep_research.agent_deep_research.main.DeepResearchAgent",
+            ) as mock_agent_class:
+                mock_agent = Mock()
+                mock_agent.__call__ = AsyncMock(
+                    side_effect=Exception("Model error"),
+                )
+                mock_agent_class.return_value = mock_agent
+
+                with pytest.raises(Exception) as exc_info:
+                    await main("Test query")
+
+                assert "Model error" in str(exc_info.value)
+
+    @pytest.mark.asyncio
+    async def test_filesystem_errors(self, mock_env_vars, mock_tavily_client):
+        """Test handling of filesystem errors"""
+        # Test with invalid directory path
+        invalid_dir = "/invalid/path/that/does/not/exist"
+
+        with patch.dict(os.environ, {"AGENT_OPERATION_DIR": invalid_dir}):
+            with patch(
+                "os.makedirs",
+                side_effect=PermissionError("Permission denied"),
+            ):
+                with pytest.raises(PermissionError):
+                    await main("Test query")
+
+    @pytest.mark.asyncio
+    async def test_logging_output(
+        self,
+        mock_env_vars,
+        mock_tavily_client,
+        caplog,
+    ):
+        """Test logging output is generated correctly"""
+        with patch(
+            "deep_research.agent_deep_research.main.StdIOStatefulClient",
+            return_value=mock_tavily_client,
+        ):
+            with patch(
+                "deep_research.agent_deep_research.main.DeepResearchAgent",
+            ) as mock_agent_class:
+                mock_agent = Mock()
+                mock_agent.__call__ = AsyncMock(
+                    return_value=Msg("Friday", "Test response", "assistant"),
+                )
+                mock_agent_class.return_value = mock_agent
+
+                await main("Test query")
+
+                # Verify debug logs are present
+                assert any(
+                    "DEBUG" in record.levelname for record in caplog.records
+                )
+
+
+if __name__ == "__main__":
+    pytest.main(["-v", __file__])
--- a/tests/browser_agent_test.py
+++ b/tests/browser_agent_test.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+import os
+from unittest.mock import patch
+
+import pytest
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.mcp import StdIOStatefulClient
+from agentscope.memory import InMemoryMemory
+from agentscope.model import DashScopeChatModel
+from agentscope.tool import Toolkit
+
+from browser_use.agent_browser.browser_agent import BrowserAgent
+
+
+class TestBrowserAgentSingleton:
+    _instance = None
+
+    @classmethod
+    def get_instance(cls) -> BrowserAgent:
+        """Singleton access method"""
+        if cls._instance is None:
+            cls._instance = BrowserAgent(
+                name="BrowserBot",
+                model=DashScopeChatModel(
+                    api_key=os.environ.get("DASHSCOPE_API_KEY"),
+                    model_name="qwen-max",
+                    stream=True,
+                ),
+                formatter=DashScopeChatFormatter(),
+                memory=InMemoryMemory(),
+                toolkit=Toolkit(),
+                max_iters=50,
+                start_url="https://www.google.com",
+            )
+        return cls._instance
+
+    def test_singleton_pattern(self) -> None:
+        """Test that only one instance of BrowserAgent is created"""
+        instance1 = TestBrowserAgentSingleton.get_instance()
+        instance2 = TestBrowserAgentSingleton.get_instance()
+
+        assert (
+            instance1 is instance2
+        ), "BrowserAgent instances are not the same"
+
+    def test_instance_properties(self) -> None:
+        """Test browser agent instance properties"""
+        instance = TestBrowserAgentSingleton.get_instance()
+
+        assert instance.name == "BrowserBot"
+        assert isinstance(instance.model, DashScopeChatModel)
+        assert isinstance(instance.formatter, DashScopeChatFormatter)
+        assert isinstance(instance.memory, InMemoryMemory)
+        assert isinstance(instance.toolkit, Toolkit)
+        assert instance.max_iters == 50
+        assert instance.start_url == "https://www.google.com"
+
+    @pytest.mark.asyncio
+    async def test_browser_connection(self, monkeypatch) -> None:
+        """Test browser connection functionality"""
+
+        # Mock async methods
+        async def mock_connect():
+            return True
+
+        async def mock_close():
+            return True
+
+        # Patch the StdIOStatefulClient
+        with patch("agentscope.mcp.StdIOStatefulClient.connect", mock_connect):
+            with patch("agentscope.mcp.StdIOStatefulClient.close", mock_close):
+                instance = TestBrowserAgentSingleton.get_instance()
+
+                # Test connection
+                connected = await instance.toolkit._mcp_clients[0].connect()
+                assert connected is True
+
+                # Test cleanup
+                closed = await instance.toolkit._mcp_clients[0].close()
+                assert closed is True
+
+
+if __name__ == "__main__":
+    pytest.main(["-v", __file__])
--- a/tests/evaluation_test.py
+++ b/tests/evaluation_test.py
@@ -0,0 +1,271 @@
+# -*- coding: utf-8 -*-
+# tests/evaluation_test.py
+import asyncio
+
+import pytest
+import os
+from unittest.mock import Mock, patch, AsyncMock
+from typing import List, Dict, Any, Tuple, Callable
+
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.agent import ReActAgent
+from agentscope.evaluate import Task, ACEPhone, SolutionOutput, ACEBenchmark
+from agentscope.tool import Toolkit
+
+# Import the main module from the correct path
+from ..evaluation.ace_bench import main as ace_main
+
+
+class TestReActAgentSolution:
+    """Test suite for the ReAct agent solution function"""
+
+    @pytest.fixture
+    def mock_task(self) -> Task:
+        """Create a mock ACEBench task"""
+        task = Mock(spec=Task)
+        task.input = "Test input query"
+        task.metadata = {
+            "tools": self._create_mock_tools(),
+            "phone": Mock(spec=ACEPhone),
+        }
+        return task
+
+    @pytest.fixture
+    def mock_pre_hook(self) -> Mock:
+        """Create a mock pre-hook function"""
+        return Mock()
+
+    def _create_mock_tools(self) -> List[Tuple[Callable, Dict[str, Any]]]:
+        """Create mock tool functions with schemas"""
+
+        def mock_tool(*args, **kwargs):
+            return "tool_response"
+
+        tool_schema = {
+            "name": "mock_tool",
+            "description": "A mock tool for testing",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "param1": {"type": "string"},
+                    "param2": {"type": "number"},
+                },
+                "required": ["param1"],
+            },
+        }
+
+        return [(mock_tool, tool_schema)]
+
+    @pytest.mark.asyncio
+    async def test_agent_initialization(
+        self,
+        mock_task: Task,
+        mock_pre_hook: Mock,
+    ) -> None:
+        """Test ReAct agent initialization with valid configuration"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Run the solution function
+            await ace_main.react_agent_solution(mock_task, mock_pre_hook)
+
+            # Verify agent creation
+            assert mock_task.metadata["tools"] is not None
+            assert len(mock_task.metadata["tools"]) > 0
+
+    @pytest.mark.asyncio
+    async def test_tool_registration(
+        self,
+        mock_task: Task,
+        mock_pre_hook: Mock,
+    ) -> None:
+        """Test tool registration in the toolkit"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            with patch(
+                "evaluation.ace_bench.main.Toolkit",
+            ) as mock_toolkit_class:
+                mock_toolkit = Mock(spec=Toolkit)
+                mock_toolkit_class.return_value = mock_toolkit
+
+                # Run the solution function
+                await ace_main.react_agent_solution(mock_task, mock_pre_hook)
+
+                # Verify tool registration calls
+                tools = mock_task.metadata["tools"]
+                assert mock_toolkit.register_tool_function.call_count == len(
+                    tools,
+                )
+
+                # Verify all tools were registered
+                for tool, schema in tools:
+                    mock_toolkit.register_tool_function.assert_any_call(
+                        tool,
+                        json_schema=schema,
+                    )
+
+    @pytest.mark.asyncio
+    async def test_agent_interaction(
+        self,
+        mock_task: Task,
+        mock_pre_hook: Mock,
+    ) -> None:
+        """Test agent interaction with input messages"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            with patch(
+                "evaluation.ace_bench.main.ReActAgent",
+            ) as mock_agent_class:
+                mock_agent = Mock(spec=ReActAgent)
+                mock_agent_class.return_value = mock_agent
+
+                # Set up async response
+                mock_agent.__call__ = AsyncMock()
+
+                # Create input message
+                msg_input = Msg("user", mock_task.input, role="user")
+
+                # Run the solution function
+                await ace_main.react_agent_solution(mock_task, mock_pre_hook)
+
+                # Verify agent interaction
+                mock_agent.print.assert_called_once_with(msg_input)
+                mock_agent.__call__.assert_called_once_with(msg_input)
+
+    @pytest.mark.asyncio
+    async def test_solution_output(
+        self,
+        mock_task: Task,
+        mock_pre_hook: Mock,
+    ) -> None:
+        """Test solution output format and content"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Mock memory and phone responses
+            mock_memory = AsyncMock()
+            mock_memory.get_memory.return_value = [
+                Msg(
+                    "assistant",
+                    "Test response",
+                    role="assistant",
+                    content=[
+                        {
+                            "type": "tool_use",
+                            "content": {
+                                "name": "mock_tool",
+                                "arguments": {"param1": "test", "param2": 42},
+                            },
+                        },
+                    ],
+                ),
+            ]
+
+            mock_phone = Mock(spec=ACEPhone)
+            mock_phone.get_current_state.return_value = {"status": "completed"}
+
+            # Patch the phone in task metadata
+            mock_task.metadata["phone"] = mock_phone
+
+            # Patch the agent's memory property
+            with patch.object(ReActAgent, "memory", mock_memory):
+                # Run the solution function
+                solution = await ace_main.react_agent_solution(
+                    mock_task,
+                    mock_pre_hook,
+                )
+
+                # Verify solution output
+                assert isinstance(solution, SolutionOutput)
+                assert solution.success is True
+                assert solution.output == {"status": "completed"}
+                assert len(solution.trajectory) == 1
+                assert solution.trajectory[0]["name"] == "mock_tool"
+
+    @pytest.mark.asyncio
+    async def test_error_handling(
+        self,
+        mock_task: Task,
+        mock_pre_hook: Mock,
+    ) -> None:
+        """Test error handling in the solution function"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Mock a failure case
+            with patch(
+                "evaluation.ace_bench.main.Toolkit.register_tool_function",
+                side_effect=Exception("Registration error"),
+            ):
+                with pytest.raises(Exception) as exc_info:
+                    await ace_main.react_agent_solution(
+                        mock_task,
+                        mock_pre_hook,
+                    )
+
+                assert "Registration error" in str(exc_info.value)
+
+
+class TestMainFunction:
+    """Test suite for the main function"""
+
+    @pytest.fixture
+    def mock_args(self) -> Mock:
+        """Create mock command-line arguments"""
+        args = Mock()
+        args.data_dir = "/test/data"
+        args.result_dir = "/test/results"
+        args.n_workers = 2
+        return args
+
+    def test_directory_validation(self, mock_args: Mock) -> None:
+        """Test directory validation in main function"""
+        with patch(
+            "evaluation.ace_bench.main.ArgumentParser.parse_args",
+            return_value=mock_args,
+        ):
+            with patch("os.makedirs") as mock_makedirs:
+                # Run main function
+                asyncio.run(ace_main.main())
+
+                # Verify directory creation
+                mock_makedirs.assert_any_call("/test/data", exist_ok=True)
+                mock_makedirs.assert_any_call("/test/results", exist_ok=True)
+
+    @pytest.mark.asyncio
+    async def test_evaluator_initialization(self, mock_args: Mock) -> None:
+        """Test evaluator initialization"""
+        with patch(
+            "evaluation.ace_bench.main.ArgumentParser.parse_args",
+            return_value=mock_args,
+        ):
+            with patch(
+                "evaluation.ace_bench.main.RayEvaluator",
+            ) as mock_evaluator_class:
+                mock_evaluator = Mock()
+                mock_evaluator_class.return_value = mock_evaluator
+
+                # Run main function
+                await ace_main.main()
+
+                # Verify evaluator initialization
+                mock_evaluator_class.assert_called_once()
+                call_args = mock_evaluator_class.call_args[1]
+                assert call_args["n_workers"] == 2
+                assert isinstance(call_args["benchmark"], ACEBenchmark)
+                assert call_args["benchmark"].data_dir == "/test/data"
+
+    @pytest.mark.asyncio
+    async def test_evaluation_execution(self, mock_args: Mock) -> None:
+        """Test evaluation execution"""
+        with patch(
+            "evaluation.ace_bench.main.ArgumentParser.parse_args",
+            return_value=mock_args,
+        ):
+            with patch(
+                "evaluation.ace_bench.main.RayEvaluator",
+            ) as mock_evaluator_class:
+                mock_evaluator = Mock()
+                mock_evaluator.run = AsyncMock()
+                mock_evaluator_class.return_value = mock_evaluator
+
+                # Run main function
+                await ace_main.main()
+
+                # Verify evaluation execution
+                mock_evaluator.run.assert_called_once_with(
+                    ace_main.react_agent_solution,
+                )
--- a/tests/functionality_agent_plan_test.py
+++ b/tests/functionality_agent_plan_test.py
@@ -0,0 +1,206 @@
+# -*- coding: utf-8 -*-
+# test_main.py
+import os
+import pytest
+import asyncio
+from unittest.mock import AsyncMock, Mock, patch
+from agentscope.agent import ReActAgent, UserAgent
+from agentscope.model import DashScopeChatModel
+from agentscope.tool import Toolkit
+from agentscope.message import Msg
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.plan import PlanNotebook
+from agentscope.tool import (
+    execute_shell_command,
+    execute_python_code,
+    write_text_file,
+    insert_text_file,
+    view_text_file,
+)
+
+from browser_use.functionality.plan.main_agent_managed_plan import main
+
+
+class TestMainFunctionality:
+    """Test suite for the main.py functionality"""
+
+    @pytest.fixture
+    def mock_toolkit(self):
+        """Create a mocked Toolkit instance"""
+        return Mock(spec=Toolkit)
+
+    @pytest.fixture
+    def mock_model(self):
+        """Create a mocked DashScopeChatModel"""
+        model = Mock(spec=DashScopeChatModel)
+        model.call = AsyncMock(return_value=Mock(content="test response"))
+        return model
+
+    @pytest.fixture
+    def mock_formatter(self):
+        """Create a mocked DashScopeChatFormatter"""
+        return Mock(spec=DashScopeChatFormatter)
+
+    @pytest.fixture
+    def mock_plan_notebook(self):
+        """Create a mocked PlanNotebook"""
+        return Mock(spec=PlanNotebook)
+
+    @pytest.fixture
+    def mock_agent(
+        self,
+        mock_model,
+        mock_formatter,
+        mock_toolkit,
+        mock_plan_notebook,
+    ):
+        """Create a mocked ReActAgent instance"""
+        agent = Mock(spec=ReActAgent)
+        agent.model = mock_model
+        agent.formatter = mock_formatter
+        agent.toolkit = mock_toolkit
+        agent.plan_notebook = mock_plan_notebook
+        agent.__call__ = AsyncMock(
+            return_value=Msg("assistant", "test response", role="assistant"),
+        )
+        return agent
+
+    @pytest.fixture
+    def mock_user(self):
+        """Create a mocked UserAgent instance"""
+        user = Mock(spec=UserAgent)
+        user.__call__ = AsyncMock(
+            return_value=Msg("user", "exit", role="user"),
+        )
+        return user
+
+    def test_toolkit_initialization(self):
+        """Test toolkit initialization and tool registration"""
+        toolkit = Toolkit()
+        # Register all required tools
+        toolkit.register_tool_function(execute_shell_command)
+        toolkit.register_tool_function(execute_python_code)
+        toolkit.register_tool_function(write_text_file)
+        toolkit.register_tool_function(insert_text_file)
+        toolkit.register_tool_function(view_text_file)
+
+        # ✅ 通过 hasattr 和 callable 验证工具是否注册成功
+        assert hasattr(toolkit, "execute_shell_command")
+        assert hasattr(toolkit, "execute_python_code")
+        assert hasattr(toolkit, "write_text_file")
+        assert hasattr(toolkit, "insert_text_file")
+        assert hasattr(toolkit, "view_text_file")
+
+        assert callable(toolkit.execute_shell_command)
+        assert callable(toolkit.execute_python_code)
+        assert callable(toolkit.write_text_file)
+        assert callable(toolkit.insert_text_file)
+        assert callable(toolkit.view_text_file)
+
+    @pytest.mark.asyncio
+    async def test_agent_initialization(
+        self,
+        mock_model,
+        mock_formatter,
+        mock_toolkit,
+        mock_plan_notebook,
+    ):
+        """Test ReActAgent initialization"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            agent = ReActAgent(
+                name="Friday",
+                sys_prompt="You're a helpful assistant named Friday.",
+                model=mock_model,
+                formatter=mock_formatter,
+                toolkit=mock_toolkit,
+                enable_meta_tool=True,
+                plan_notebook=mock_plan_notebook,
+            )
+
+            assert agent.name == "Friday"
+            assert (
+                agent.sys_prompt == "You're a helpful assistant named Friday."
+            )
+            assert agent.model == mock_model
+            assert agent.formatter == mock_formatter
+            assert agent.toolkit == mock_toolkit
+            assert agent.enable_meta_tool is True
+            assert agent.plan_notebook == mock_plan_notebook
+
+    @pytest.mark.asyncio
+    async def test_message_loop_exits_on_exit(self, mock_agent, mock_user):
+        """Test the message loop exits when user sends 'exit'"""
+        with patch("main.asyncio.sleep") as mock_sleep, patch.dict(
+            os.environ,
+            {"DASHSCOPE_API_KEY": "test_key"},
+        ):
+            # 避免无限循环
+            mock_sleep.side_effect = asyncio.TimeoutError()
+
+            # 替换 main.py 中的 agent 和 user
+            with patch("main.ReActAgent", return_value=mock_agent), patch(
+                "main.UserAgent",
+                return_value=mock_user,
+            ):
+                try:
+                    await main()
+                except asyncio.TimeoutError:
+                    pass  # 期望的退出方式
+
+                # ✅ 验证 agent 和 user 被正确调用
+                mock_agent.__call__.assert_awaited_once()
+                mock_user.__call__.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_full_message_flow(self, mock_agent, mock_user):
+        """Test the complete message flow between agent and user"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # 模拟 agent 返回的响应
+            mock_agent.__call__ = AsyncMock(
+                side_effect=[
+                    Msg("assistant", "response 1", role="assistant"),
+                    Msg("assistant", "response 2", role="assistant"),
+                ],
+            )
+
+            # 模拟 user 返回的响应
+            mock_user.__call__ = AsyncMock(
+                side_effect=[
+                    Msg("user", "first message", role="user"),
+                    Msg("user", "exit", role="user"),
+                ],
+            )
+
+            # 替换 main.py 中的 agent 和 user
+            with patch("main.ReActAgent", return_value=mock_agent), patch(
+                "main.UserAgent",
+                return_value=mock_user,
+            ):
+                try:
+                    await main()
+                except asyncio.TimeoutError:
+                    pass  # 期望的退出方式
+
+                # ✅ 验证消息流程
+                assert mock_agent.__call__.await_count == 2
+                assert mock_user.__call__.await_count == 2
+
+                # ✅ 验证最终消息是 "exit"
+                final_msg = mock_user.__call__.call_args_list[-1][0][0]
+                assert final_msg.get_text_content() == "exit"
+
+    @pytest.mark.asyncio
+    async def test_main_runs_without_error(self, mock_agent, mock_user):
+        """Test the main function runs without raising exceptions"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}), patch(
+            "main.ReActAgent",
+            return_value=mock_agent,
+        ), patch("main.UserAgent", return_value=mock_user), patch(
+            "main.asyncio.sleep",
+            AsyncMock(),
+        ):
+            # 使用 asyncio.run(main()) 来启动测试
+            try:
+                await main()
+            except Exception as e:
+                pytest.fail(f"main() raised an unexpected exception: {e}")
--- a/tests/functionality_long_term_memory.py
+++ b/tests/functionality_long_term_memory.py
--- a/tests/functionality_mcp_test.py
+++ b/tests/functionality_mcp_test.py
@@ -0,0 +1,255 @@
+# -*- coding: utf-8 -*-
+import os
+
+"""This module contains utility functions for data processing."""
+from unittest.mock import AsyncMock, Mock, patch
+
+import pytest
+from agentscope.agent import ReActAgent
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.mcp import HttpStatefulClient, HttpStatelessClient
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.tool import Toolkit
+from browser_use.functionality.mcp import main
+from pydantic import BaseModel, Field
+
+
+class NumberResult(BaseModel):
+    """A simple number result model for structured output."""
+
+    result: int = Field(description="The result of the calculation")
+
+
+class TestMCPReActAgent:
+    """Test suite for MCP ReAct agent functionality"""
+
+    @pytest.fixture
+    def mock_toolkit(self) -> Toolkit:
+        """Create a mocked Toolkit instance"""
+        return Mock(spec=Toolkit)
+
+    @pytest.fixture
+    def mock_stateful_client(self) -> HttpStatefulClient:
+        """Create a mocked HttpStatefulClient"""
+        client = Mock(spec=HttpStatefulClient)
+        client.connect = AsyncMock()
+        client.close = AsyncMock()
+        client.get_callable_function = AsyncMock()
+        return client
+
+    @pytest.fixture
+    def mock_stateless_client(self) -> HttpStatelessClient:
+        """Create a mocked HttpStatelessClient"""
+        client = Mock(spec=HttpStatelessClient)
+        return client
+
+    @pytest.fixture
+    def mock_model(self) -> DashScopeChatModel:
+        """Create a mocked DashScopeChatModel"""
+        model = Mock(spec=DashScopeChatModel)
+        model.call = AsyncMock(return_value=Mock(content="test response"))
+        return model
+
+    @pytest.fixture
+    def mock_formatter(self) -> DashScopeChatFormatter:
+        """Create a mocked DashScopeChatFormatter"""
+        return Mock(spec=DashScopeChatFormatter)
+
+    @pytest.fixture
+    def mock_agent(
+        self,
+        mock_model: DashScopeChatModel,
+        mock_formatter: DashScopeChatFormatter,
+        mock_toolkit: Toolkit,
+    ) -> Mock:
+        """Create a mocked ReActAgent instance"""
+        agent = Mock(spec=ReActAgent)
+        agent.model = mock_model
+        agent.formatter = mock_formatter
+        agent.toolkit = mock_toolkit
+        agent.__call__ = AsyncMock(
+            return_value=Mock(
+                metadata={"result": 123456},
+            ),
+        )
+        return agent
+
+    @pytest.mark.asyncio
+    async def test_mcp_client_initialization(self) -> None:
+        """Test MCP client initialization with different transports"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Test stateful client creation
+            stateful_client = HttpStatefulClient(
+                name="add_client",
+                transport="sse",
+                url="http://localhost:8080",
+            )
+            assert stateful_client.name == "add_client"
+            assert stateful_client.transport == "sse"
+            assert stateful_client.url == "http://localhost:8080"
+
+            # Test stateless client creation
+            stateless_client = HttpStatelessClient(
+                name="multiply_client",
+                transport="streamable_http",
+                url="http://localhost:8081",
+            )
+            assert stateless_client.name == "multiply_client"
+            assert stateless_client.transport == "streamable_http"
+            assert stateless_client.url == "http://localhost:8081"
+
+    @pytest.mark.asyncio
+    async def test_toolkit_registration(
+        self,
+        mock_toolkit: Toolkit,
+        mock_stateful_client: HttpStatefulClient,
+        mock_stateless_client: HttpStatelessClient,
+    ) -> None:
+        """Test MCP client registration with toolkit"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Mock connect and register methods
+            mock_toolkit.register_mcp_client = AsyncMock()
+
+            # Verify registration of both clients
+            await mock_toolkit.register_mcp_client(mock_stateful_client)
+            await mock_toolkit.register_mcp_client(mock_stateless_client)
+
+            assert mock_toolkit.register_mcp_client.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_agent_initialization(
+        self,
+        mock_model: DashScopeChatModel,
+        mock_formatter: DashScopeChatFormatter,
+        mock_toolkit: Toolkit,
+    ) -> None:
+        """Test ReAct agent initialization"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            agent = ReActAgent(
+                name="Jarvis",
+                sys_prompt="You're a helpful assistant named Jarvis.",
+                model=mock_model,
+                formatter=mock_formatter,
+                toolkit=mock_toolkit,
+            )
+
+            assert agent.name == "Jarvis"
+            assert (
+                agent.sys_prompt == "You're a helpful assistant named Jarvis."
+            )
+            assert agent.model == mock_model
+            assert agent.formatter == mock_formatter
+            assert agent.toolkit == mock_toolkit
+
+    @pytest.mark.asyncio
+    async def test_structured_output(
+        self,
+        mock_agent: ReActAgent,
+    ) -> None:
+        """Test structured output handling"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Create test message
+            test_msg = Msg(
+                "user",
+                "Calculate 2345 multiplied by 3456, then add 4567 to the result,"
+                " what is the final outcome?",
+                "user",
+            )
+
+            # Run agent with structured model
+            result = await mock_agent(test_msg, structured_model=NumberResult)
+
+            # Verify structured output
+            assert isinstance(result, Mock)
+            assert result.metadata["result"] == 123456
+
+    @pytest.mark.asyncio
+    async def test_manual_tool_call(
+        self,
+        mock_stateful_client: HttpStatefulClient,
+    ) -> None:
+        """Test manual tool call functionality"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Mock callable function
+            mock_callable = AsyncMock(return_value=Mock(content="15"))
+            mock_stateful_client.get_callable_function = AsyncMock(
+                return_value=mock_callable,
+            )
+
+            # Call tool manually
+            tool_function = await mock_stateful_client.get_callable_function(
+                "add",
+            )
+            response = await tool_function(a=5, b=10)
+
+            # Verify tool call
+            mock_stateful_client.get_callable_function.assert_called_once_with(
+                "add",
+                wrap_tool_result=True,
+            )
+            mock_callable.assert_called_once_with(a=5, b=10)
+            assert response.content == "15"
+
+    @pytest.mark.asyncio
+    async def test_client_lifecycle(
+        self,
+        mock_stateful_client: HttpStatefulClient,
+    ) -> None:
+        """Test MCP client connection and cleanup"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Test connection
+            await mock_stateful_client.connect()
+            mock_stateful_client.connect.assert_awaited_once()
+
+            # Test cleanup
+            await mock_stateful_client.close()
+            mock_stateful_client.close.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_full_integration_flow(
+        self,
+        mock_stateful_client: HttpStatefulClient,
+        mock_stateless_client: HttpStatelessClient,
+        mock_toolkit: Toolkit,
+        mock_model: DashScopeChatModel,
+        mock_formatter: DashScopeChatFormatter,
+    ) -> None:
+        """Test full integration flow with mocked dependencies"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Mock async methods
+            mock_toolkit.register_mcp_client = AsyncMock()
+            mock_stateful_client.connect = AsyncMock()
+            mock_model.call = AsyncMock(
+                return_value=Mock(
+                    content="Final answer: 8101807",
+                ),
+            )
+
+            # Patch the agent class
+            with patch("main.ReActAgent") as mock_agent_class:
+                mock_agent = Mock()
+                mock_agent.__call__ = AsyncMock(
+                    return_value=Mock(
+                        metadata={"result": 8101807},
+                    ),
+                )
+                mock_agent_class.return_value = mock_agent
+
+                # Run the main function
+                await main.main()
+
+                # Verify full flow
+                mock_stateful_client.connect.assert_awaited_once()
+                mock_toolkit.register_mcp_client.assert_any_call(
+                    mock_stateful_client,
+                )
+                mock_toolkit.register_mcp_client.assert_any_call(
+                    mock_stateless_client,
+                )
+                mock_agent_class.assert_called_once()
+                mock_agent.__call__.assert_called_once()
+
+
+if __name__ == "__main__":
+    pytest.main(["-v", __file__])
--- a/tests/functionality_plan_test.py
+++ b/tests/functionality_plan_test.py
@@ -0,0 +1,247 @@
+# -*- coding: utf-8 -*-
+
+# test_manual_plan_example.py
+import os
+import pytest
+import asyncio
+from unittest.mock import AsyncMock, Mock, patch
+from agentscope.agent import ReActAgent, UserAgent
+from agentscope.model import DashScopeChatModel
+from agentscope.tool import Toolkit
+from agentscope.message import Msg
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.plan import PlanNotebook, SubTask
+from agentscope.tool import (
+    execute_shell_command,
+    execute_python_code,
+    write_text_file,
+    insert_text_file,
+    view_text_file,
+)
+
+# 导入 main.py 中的 main 函数
+from browser_use.functionality.plan.main_manual_plan import main, plan_notebook
+
+
+class TestManualPlanExample:
+    """Test suite for the manual meta_planner_agent example"""
+
+    @pytest.fixture
+    def mock_toolkit(self):
+        """Create a mocked Toolkit instance"""
+        return Mock(spec=Toolkit)
+
+    @pytest.fixture
+    def mock_model(self):
+        """Create a mocked DashScopeChatModel"""
+        model = Mock(spec=DashScopeChatModel)
+        model.call = AsyncMock(
+            return_value=Msg("assistant", "test response", role="assistant"),
+        )
+        return model
+
+    @pytest.fixture
+    def mock_formatter(self):
+        """Create a mocked DashScopeChatFormatter"""
+        return Mock(spec=DashScopeChatFormatter)
+
+    @pytest.fixture
+    def mock_plan_notebook(self):
+        """Create a mocked PlanNotebook instance"""
+        return Mock(spec=PlanNotebook)
+
+    @pytest.fixture
+    def mock_agent(
+        self,
+        mock_model,
+        mock_formatter,
+        mock_toolkit,
+        mock_plan_notebook,
+    ):
+        """Create a mocked ReActAgent instance"""
+        agent = Mock(spec=ReActAgent)
+        agent.model = mock_model
+        agent.formatter = mock_formatter
+        agent.toolkit = mock_toolkit
+        agent.plan_notebook = mock_plan_notebook
+        agent.__call__ = AsyncMock(
+            return_value=Msg("assistant", "test response", role="assistant"),
+        )
+        return agent
+
+    @pytest.fixture
+    def mock_user(self):
+        """Create a mocked UserAgent instance"""
+        user = Mock(spec=UserAgent)
+        user.__call__ = AsyncMock(
+            return_value=Msg("user", "exit", role="user"),
+        )
+        return user
+
+    def test_plan_creation(self):
+        """Test meta_planner_agent creation and subtasks registration"""
+        assert plan_notebook.current_plan is not None
+        assert (
+            plan_notebook.current_plan.name
+            == "Comprehensive Report on AgentScope"
+        )
+        assert len(plan_notebook.current_plan.subtasks) == 4
+
+        # 验证子任务名称
+        subtask_names = [
+            subtask.name for subtask in plan_notebook.current_plan.subtasks
+        ]
+        expected_names = [
+            "Clone the repository",
+            "View the documentation",
+            "Study the code",
+            "Summarize the findings",
+        ]
+        assert subtask_names == expected_names
+
+        # 验证子任务描述
+        subtask_descriptions = [
+            subtask.description
+            for subtask in plan_notebook.current_plan.subtasks
+        ]
+        expected_descriptions = [
+            "Clone the AgentScope GitHub repository from agentscope-ai/agentscope, and ensure it's the latest version.",
+            "View the documentation of AgentScope in the repository.",
+            "Study the code of AgentScope, focusing on the core modules and their interactions.",
+            "Summarize the findings from the documentation and code study, and write a comprehensive report in markdown format.",
+        ]
+        assert subtask_descriptions == expected_descriptions
+
+    def test_toolkit_initialization(self):
+        """Test toolkit initialization and tool registration"""
+        toolkit = Toolkit()
+        # Register all required tools
+        toolkit.register_tool_function(execute_shell_command)
+        toolkit.register_tool_function(execute_python_code)
+        toolkit.register_tool_function(write_text_file)
+        toolkit.register_tool_function(insert_text_file)
+        toolkit.register_tool_function(view_text_file)
+
+        # ✅ 通过 hasattr 和 callable 验证工具是否注册成功
+        assert hasattr(toolkit, "execute_shell_command")
+        assert hasattr(toolkit, "execute_python_code")
+        assert hasattr(toolkit, "write_text_file")
+        assert hasattr(toolkit, "insert_text_file")
+        assert hasattr(toolkit, "view_text_file")
+
+        assert callable(toolkit.execute_shell_command)
+        assert callable(toolkit.execute_python_code)
+        assert callable(toolkit.write_text_file)
+        assert callable(toolkit.insert_text_file)
+        assert callable(toolkit.view_text_file)
+
+    @pytest.mark.asyncio
+    async def test_agent_initialization(
+        self,
+        mock_model,
+        mock_formatter,
+        mock_toolkit,
+        mock_plan_notebook,
+    ):
+        """Test ReActAgent initialization"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            agent = ReActAgent(
+                name="Friday",
+                sys_prompt="You're a helpful assistant named Friday.",
+                model=mock_model,
+                formatter=mock_formatter,
+                toolkit=mock_toolkit,
+                plan_notebook=mock_plan_notebook,
+            )
+
+            assert agent.name == "Friday"
+            assert (
+                agent.sys_prompt == "You're a helpful assistant named Friday."
+            )
+            assert agent.model == mock_model
+            assert agent.formatter == mock_formatter
+            assert agent.toolkit == mock_toolkit
+            assert agent.plan_notebook == mock_plan_notebook
+
+    @pytest.mark.asyncio
+    async def test_message_loop_exits_on_exit(self, mock_agent, mock_user):
+        """Test the message loop exits when user sends 'exit'"""
+        with patch(
+            "manual_plan_example.asyncio.sleep",
+        ) as mock_sleep, patch.dict(
+            os.environ,
+            {"DASHSCOPE_API_KEY": "test_key"},
+        ):
+            # 避免无限循环
+            mock_sleep.side_effect = asyncio.TimeoutError()
+
+            # 替换 main.py 中的 agent 和 user
+            with patch(
+                "manual_plan_example.ReActAgent",
+                return_value=mock_agent,
+            ), patch("manual_plan_example.UserAgent", return_value=mock_user):
+                try:
+                    await main()
+                except asyncio.TimeoutError:
+                    pass  # 期望的退出方式
+
+                # ✅ 验证 agent 和 user 被正确调用
+                mock_agent.__call__.assert_awaited_once()
+                mock_user.__call__.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_full_message_flow(self, mock_agent, mock_user):
+        """Test the complete message flow between agent and user"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # 模拟 agent 返回的响应
+            mock_agent.__call__ = AsyncMock(
+                side_effect=[
+                    Msg("assistant", "response 1", role="assistant"),
+                    Msg("assistant", "response 2", role="assistant"),
+                ],
+            )
+
+            # 模拟 user 返回的响应
+            mock_user.__call__ = AsyncMock(
+                side_effect=[
+                    Msg("user", "first message", role="user"),
+                    Msg("user", "exit", role="user"),
+                ],
+            )
+
+            # 替换 main.py 中的 agent 和 user
+            with patch(
+                "manual_plan_example.ReActAgent",
+                return_value=mock_agent,
+            ), patch("manual_plan_example.UserAgent", return_value=mock_user):
+                try:
+                    await main()
+                except asyncio.TimeoutError:
+                    pass  # 期望的退出方式
+
+                # ✅ 验证消息流程
+                assert mock_agent.__call__.await_count == 2
+                assert mock_user.__call__.await_count == 2
+
+                # ✅ 验证最终消息是 "exit"
+                final_msg = mock_user.__call__.call_args_list[-1][0][0]
+                assert final_msg.get_text_content() == "exit"
+
+    @pytest.mark.asyncio
+    async def test_main_runs_without_error(self, mock_agent, mock_user):
+        """Test the main function runs without raising exceptions"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}), patch(
+            "manual_plan_example.ReActAgent",
+            return_value=mock_agent,
+        ), patch(
+            "manual_plan_example.UserAgent",
+            return_value=mock_user,
+        ), patch(
+            "manual_plan_example.asyncio.sleep",
+            AsyncMock(),
+        ):
+            # 使用 asyncio.run(main()) 来启动测试
+            try:
+                await main()
+            except Exception as e:
+                pytest.fail(f"main() raised an unexpected exception: {e}")
--- a/tests/functionality_session_with_sqlite_test.py
+++ b/tests/functionality_session_with_sqlite_test.py
--- a/tests/functionality_structured_output_test.py
+++ b/tests/functionality_structured_output_test.py
--- a/tests/game_test.py
+++ b/tests/game_test.py
--- a/tests/meta_planner_agent_test.py
+++ b/tests/meta_planner_agent_test.py
--- a/tests/react_agent_test.py
+++ b/tests/react_agent_test.py