init

2025-10-17 21:40:45 +08:00
commit 7d0451131f
155 changed files with 14873 additions and 0 deletions
--- a/tests/evaluation_test.py
+++ b/tests/evaluation_test.py
@@ -0,0 +1,271 @@
+# -*- coding: utf-8 -*-
+# tests/evaluation_test.py
+import asyncio
+
+import pytest
+import os
+from unittest.mock import Mock, patch, AsyncMock
+from typing import List, Dict, Any, Tuple, Callable
+
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.agent import ReActAgent
+from agentscope.evaluate import Task, ACEPhone, SolutionOutput, ACEBenchmark
+from agentscope.tool import Toolkit
+
+# Import the main module from the correct path
+from ..evaluation.ace_bench import main as ace_main
+
+
+class TestReActAgentSolution:
+    """Test suite for the ReAct agent solution function"""
+
+    @pytest.fixture
+    def mock_task(self) -> Task:
+        """Create a mock ACEBench task"""
+        task = Mock(spec=Task)
+        task.input = "Test input query"
+        task.metadata = {
+            "tools": self._create_mock_tools(),
+            "phone": Mock(spec=ACEPhone),
+        }
+        return task
+
+    @pytest.fixture
+    def mock_pre_hook(self) -> Mock:
+        """Create a mock pre-hook function"""
+        return Mock()
+
+    def _create_mock_tools(self) -> List[Tuple[Callable, Dict[str, Any]]]:
+        """Create mock tool functions with schemas"""
+
+        def mock_tool(*args, **kwargs):
+            return "tool_response"
+
+        tool_schema = {
+            "name": "mock_tool",
+            "description": "A mock tool for testing",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "param1": {"type": "string"},
+                    "param2": {"type": "number"},
+                },
+                "required": ["param1"],
+            },
+        }
+
+        return [(mock_tool, tool_schema)]
+
+    @pytest.mark.asyncio
+    async def test_agent_initialization(
+        self,
+        mock_task: Task,
+        mock_pre_hook: Mock,
+    ) -> None:
+        """Test ReAct agent initialization with valid configuration"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Run the solution function
+            await ace_main.react_agent_solution(mock_task, mock_pre_hook)
+
+            # Verify agent creation
+            assert mock_task.metadata["tools"] is not None
+            assert len(mock_task.metadata["tools"]) > 0
+
+    @pytest.mark.asyncio
+    async def test_tool_registration(
+        self,
+        mock_task: Task,
+        mock_pre_hook: Mock,
+    ) -> None:
+        """Test tool registration in the toolkit"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            with patch(
+                "evaluation.ace_bench.main.Toolkit",
+            ) as mock_toolkit_class:
+                mock_toolkit = Mock(spec=Toolkit)
+                mock_toolkit_class.return_value = mock_toolkit
+
+                # Run the solution function
+                await ace_main.react_agent_solution(mock_task, mock_pre_hook)
+
+                # Verify tool registration calls
+                tools = mock_task.metadata["tools"]
+                assert mock_toolkit.register_tool_function.call_count == len(
+                    tools,
+                )
+
+                # Verify all tools were registered
+                for tool, schema in tools:
+                    mock_toolkit.register_tool_function.assert_any_call(
+                        tool,
+                        json_schema=schema,
+                    )
+
+    @pytest.mark.asyncio
+    async def test_agent_interaction(
+        self,
+        mock_task: Task,
+        mock_pre_hook: Mock,
+    ) -> None:
+        """Test agent interaction with input messages"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            with patch(
+                "evaluation.ace_bench.main.ReActAgent",
+            ) as mock_agent_class:
+                mock_agent = Mock(spec=ReActAgent)
+                mock_agent_class.return_value = mock_agent
+
+                # Set up async response
+                mock_agent.__call__ = AsyncMock()
+
+                # Create input message
+                msg_input = Msg("user", mock_task.input, role="user")
+
+                # Run the solution function
+                await ace_main.react_agent_solution(mock_task, mock_pre_hook)
+
+                # Verify agent interaction
+                mock_agent.print.assert_called_once_with(msg_input)
+                mock_agent.__call__.assert_called_once_with(msg_input)
+
+    @pytest.mark.asyncio
+    async def test_solution_output(
+        self,
+        mock_task: Task,
+        mock_pre_hook: Mock,
+    ) -> None:
+        """Test solution output format and content"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Mock memory and phone responses
+            mock_memory = AsyncMock()
+            mock_memory.get_memory.return_value = [
+                Msg(
+                    "assistant",
+                    "Test response",
+                    role="assistant",
+                    content=[
+                        {
+                            "type": "tool_use",
+                            "content": {
+                                "name": "mock_tool",
+                                "arguments": {"param1": "test", "param2": 42},
+                            },
+                        },
+                    ],
+                ),
+            ]
+
+            mock_phone = Mock(spec=ACEPhone)
+            mock_phone.get_current_state.return_value = {"status": "completed"}
+
+            # Patch the phone in task metadata
+            mock_task.metadata["phone"] = mock_phone
+
+            # Patch the agent's memory property
+            with patch.object(ReActAgent, "memory", mock_memory):
+                # Run the solution function
+                solution = await ace_main.react_agent_solution(
+                    mock_task,
+                    mock_pre_hook,
+                )
+
+                # Verify solution output
+                assert isinstance(solution, SolutionOutput)
+                assert solution.success is True
+                assert solution.output == {"status": "completed"}
+                assert len(solution.trajectory) == 1
+                assert solution.trajectory[0]["name"] == "mock_tool"
+
+    @pytest.mark.asyncio
+    async def test_error_handling(
+        self,
+        mock_task: Task,
+        mock_pre_hook: Mock,
+    ) -> None:
+        """Test error handling in the solution function"""
+        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
+            # Mock a failure case
+            with patch(
+                "evaluation.ace_bench.main.Toolkit.register_tool_function",
+                side_effect=Exception("Registration error"),
+            ):
+                with pytest.raises(Exception) as exc_info:
+                    await ace_main.react_agent_solution(
+                        mock_task,
+                        mock_pre_hook,
+                    )
+
+                assert "Registration error" in str(exc_info.value)
+
+
+class TestMainFunction:
+    """Test suite for the main function"""
+
+    @pytest.fixture
+    def mock_args(self) -> Mock:
+        """Create mock command-line arguments"""
+        args = Mock()
+        args.data_dir = "/test/data"
+        args.result_dir = "/test/results"
+        args.n_workers = 2
+        return args
+
+    def test_directory_validation(self, mock_args: Mock) -> None:
+        """Test directory validation in main function"""
+        with patch(
+            "evaluation.ace_bench.main.ArgumentParser.parse_args",
+            return_value=mock_args,
+        ):
+            with patch("os.makedirs") as mock_makedirs:
+                # Run main function
+                asyncio.run(ace_main.main())
+
+                # Verify directory creation
+                mock_makedirs.assert_any_call("/test/data", exist_ok=True)
+                mock_makedirs.assert_any_call("/test/results", exist_ok=True)
+
+    @pytest.mark.asyncio
+    async def test_evaluator_initialization(self, mock_args: Mock) -> None:
+        """Test evaluator initialization"""
+        with patch(
+            "evaluation.ace_bench.main.ArgumentParser.parse_args",
+            return_value=mock_args,
+        ):
+            with patch(
+                "evaluation.ace_bench.main.RayEvaluator",
+            ) as mock_evaluator_class:
+                mock_evaluator = Mock()
+                mock_evaluator_class.return_value = mock_evaluator
+
+                # Run main function
+                await ace_main.main()
+
+                # Verify evaluator initialization
+                mock_evaluator_class.assert_called_once()
+                call_args = mock_evaluator_class.call_args[1]
+                assert call_args["n_workers"] == 2
+                assert isinstance(call_args["benchmark"], ACEBenchmark)
+                assert call_args["benchmark"].data_dir == "/test/data"
+
+    @pytest.mark.asyncio
+    async def test_evaluation_execution(self, mock_args: Mock) -> None:
+        """Test evaluation execution"""
+        with patch(
+            "evaluation.ace_bench.main.ArgumentParser.parse_args",
+            return_value=mock_args,
+        ):
+            with patch(
+                "evaluation.ace_bench.main.RayEvaluator",
+            ) as mock_evaluator_class:
+                mock_evaluator = Mock()
+                mock_evaluator.run = AsyncMock()
+                mock_evaluator_class.return_value = mock_evaluator
+
+                # Run main function
+                await ace_main.main()
+
+                # Verify evaluation execution
+                mock_evaluator.run.assert_called_once_with(
+                    ace_main.react_agent_solution,
+                )