evotraders/tests/evaluation_test.py

# -*- coding: utf-8 -*-
# tests/evaluation_test.py
import asyncio

import pytest
import os
from unittest.mock import Mock, patch, AsyncMock
from typing import List, Dict, Any, Tuple, Callable

from agentscope.message import Msg
from agentscope.model import DashScopeChatModel
from agentscope.agent import ReActAgent
from agentscope.evaluate import Task, ACEPhone, SolutionOutput, ACEBenchmark
from agentscope.tool import Toolkit

# Import the main module from the correct path
from ..evaluation.ace_bench import main as ace_main


class TestReActAgentSolution:
    """Test suite for the ReAct agent solution function"""

    @pytest.fixture
    def mock_task(self) -> Task:
        """Create a mock ACEBench task"""
        task = Mock(spec=Task)
        task.input = "Test input query"
        task.metadata = {
            "tools": self._create_mock_tools(),
            "phone": Mock(spec=ACEPhone),
        }
        return task

    @pytest.fixture
    def mock_pre_hook(self) -> Mock:
        """Create a mock pre-hook function"""
        return Mock()

    def _create_mock_tools(self) -> List[Tuple[Callable, Dict[str, Any]]]:
        """Create mock tool functions with schemas"""

        def mock_tool(*args, **kwargs):
            return "tool_response"

        tool_schema = {
            "name": "mock_tool",
            "description": "A mock tool for testing",
            "parameters": {
                "type": "object",
                "properties": {
                    "param1": {"type": "string"},
                    "param2": {"type": "number"},
                },
                "required": ["param1"],
            },
        }

        return [(mock_tool, tool_schema)]

    @pytest.mark.asyncio
    async def test_agent_initialization(
        self,
        mock_task: Task,
        mock_pre_hook: Mock,
    ) -> None:
        """Test ReAct agent initialization with valid configuration"""
        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
            # Run the solution function
            await ace_main.react_agent_solution(mock_task, mock_pre_hook)

            # Verify agent creation
            assert mock_task.metadata["tools"] is not None
            assert len(mock_task.metadata["tools"]) > 0

    @pytest.mark.asyncio
    async def test_tool_registration(
        self,
        mock_task: Task,
        mock_pre_hook: Mock,
    ) -> None:
        """Test tool registration in the toolkit"""
        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
            with patch(
                "evaluation.ace_bench.main.Toolkit",
            ) as mock_toolkit_class:
                mock_toolkit = Mock(spec=Toolkit)
                mock_toolkit_class.return_value = mock_toolkit

                # Run the solution function
                await ace_main.react_agent_solution(mock_task, mock_pre_hook)

                # Verify tool registration calls
                tools = mock_task.metadata["tools"]
                assert mock_toolkit.register_tool_function.call_count == len(
                    tools,
                )

                # Verify all tools were registered
                for tool, schema in tools:
                    mock_toolkit.register_tool_function.assert_any_call(
                        tool,
                        json_schema=schema,
                    )

    @pytest.mark.asyncio
    async def test_agent_interaction(
        self,
        mock_task: Task,
        mock_pre_hook: Mock,
    ) -> None:
        """Test agent interaction with input messages"""
        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
            with patch(
                "evaluation.ace_bench.main.ReActAgent",
            ) as mock_agent_class:
                mock_agent = Mock(spec=ReActAgent)
                mock_agent_class.return_value = mock_agent

                # Set up async response
                mock_agent.__call__ = AsyncMock()

                # Create input message
                msg_input = Msg("user", mock_task.input, role="user")

                # Run the solution function
                await ace_main.react_agent_solution(mock_task, mock_pre_hook)

                # Verify agent interaction
                mock_agent.print.assert_called_once_with(msg_input)
                mock_agent.__call__.assert_called_once_with(msg_input)

    @pytest.mark.asyncio
    async def test_solution_output(
        self,
        mock_task: Task,
        mock_pre_hook: Mock,
    ) -> None:
        """Test solution output format and content"""
        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
            # Mock memory and phone responses
            mock_memory = AsyncMock()
            mock_memory.get_memory.return_value = [
                Msg(
                    "assistant",
                    "Test response",
                    role="assistant",
                    content=[
                        {
                            "type": "tool_use",
                            "content": {
                                "name": "mock_tool",
                                "arguments": {"param1": "test", "param2": 42},
                            },
                        },
                    ],
                ),
            ]

            mock_phone = Mock(spec=ACEPhone)
            mock_phone.get_current_state.return_value = {"status": "completed"}

            # Patch the phone in task metadata
            mock_task.metadata["phone"] = mock_phone

            # Patch the agent's memory property
            with patch.object(ReActAgent, "memory", mock_memory):
                # Run the solution function
                solution = await ace_main.react_agent_solution(
                    mock_task,
                    mock_pre_hook,
                )

                # Verify solution output
                assert isinstance(solution, SolutionOutput)
                assert solution.success is True
                assert solution.output == {"status": "completed"}
                assert len(solution.trajectory) == 1
                assert solution.trajectory[0]["name"] == "mock_tool"

    @pytest.mark.asyncio
    async def test_error_handling(
        self,
        mock_task: Task,
        mock_pre_hook: Mock,
    ) -> None:
        """Test error handling in the solution function"""
        with patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"}):
            # Mock a failure case
            with patch(
                "evaluation.ace_bench.main.Toolkit.register_tool_function",
                side_effect=Exception("Registration error"),
            ):
                with pytest.raises(Exception) as exc_info:
                    await ace_main.react_agent_solution(
                        mock_task,
                        mock_pre_hook,
                    )

                assert "Registration error" in str(exc_info.value)


class TestMainFunction:
    """Test suite for the main function"""

    @pytest.fixture
    def mock_args(self) -> Mock:
        """Create mock command-line arguments"""
        args = Mock()
        args.data_dir = "/test/data"
        args.result_dir = "/test/results"
        args.n_workers = 2
        return args

    def test_directory_validation(self, mock_args: Mock) -> None:
        """Test directory validation in main function"""
        with patch(
            "evaluation.ace_bench.main.ArgumentParser.parse_args",
            return_value=mock_args,
        ):
            with patch("os.makedirs") as mock_makedirs:
                # Run main function
                asyncio.run(ace_main.main())

                # Verify directory creation
                mock_makedirs.assert_any_call("/test/data", exist_ok=True)
                mock_makedirs.assert_any_call("/test/results", exist_ok=True)

    @pytest.mark.asyncio
    async def test_evaluator_initialization(self, mock_args: Mock) -> None:
        """Test evaluator initialization"""
        with patch(
            "evaluation.ace_bench.main.ArgumentParser.parse_args",
            return_value=mock_args,
        ):
            with patch(
                "evaluation.ace_bench.main.RayEvaluator",
            ) as mock_evaluator_class:
                mock_evaluator = Mock()
                mock_evaluator_class.return_value = mock_evaluator

                # Run main function
                await ace_main.main()

                # Verify evaluator initialization
                mock_evaluator_class.assert_called_once()
                call_args = mock_evaluator_class.call_args[1]
                assert call_args["n_workers"] == 2
                assert isinstance(call_args["benchmark"], ACEBenchmark)
                assert call_args["benchmark"].data_dir == "/test/data"

    @pytest.mark.asyncio
    async def test_evaluation_execution(self, mock_args: Mock) -> None:
        """Test evaluation execution"""
        with patch(
            "evaluation.ace_bench.main.ArgumentParser.parse_args",
            return_value=mock_args,
        ):
            with patch(
                "evaluation.ace_bench.main.RayEvaluator",
            ) as mock_evaluator_class:
                mock_evaluator = Mock()
                mock_evaluator.run = AsyncMock()
                mock_evaluator_class.return_value = mock_evaluator

                # Run main function
                await ace_main.main()

                # Verify evaluation execution
                mock_evaluator.run.assert_called_once_with(
                    ace_main.react_agent_solution,
                )