feat: Add evaluation hooks, skill adaptation and team pipeline config

- Add EvaluationHook for post-execution agent evaluation - Add SkillAdaptationHook for dynamic skill adaptation - Add team/ directory with team coordination logic - Add TEAM_PIPELINE.yaml for smoke_fullstack pipeline config - Update RuntimeView, TraderView and RuntimeSettingsPanel UI - Add runtimeApi and websocket services - Add runtime_state.json to smoke_fullstack state Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 18:52:12 +08:00
parent f4a2b7f3af
commit 4b5ac86b83
87 changed files with 5042 additions and 744 deletions
--- a/backend/agents/base/init.py
+++ b/backend/agents/base/init.py
@@ -13,6 +13,26 @@ from .command_handler import (
    create_command_dispatcher,
 )

+# 评估钩子 (从evaluation_hook.py导入)
+from .evaluation_hook import (
+    EvaluationHook,
+    EvaluationCollector,
+    MetricType,
+    EvaluationMetric,
+    EvaluationResult,
+    parse_evaluation_hooks,
+)
+
+# 技能适配钩子 (从skill_adaptation_hook.py导入)
+from .skill_adaptation_hook import (
+    AdaptationAction,
+    AdaptationThreshold,
+    AdaptationEvent,
+    SkillAdaptationHook,
+    AdaptationManager,
+    get_adaptation_manager,
+)
+
 __all__ = [
    # 命令处理
    "AgentCommandDispatcher",
@@ -20,4 +40,18 @@ __all__ = [
    "CommandHandler",
    "CommandResult",
    "create_command_dispatcher",
+    # 评估钩子
+    "EvaluationHook",
+    "EvaluationCollector",
+    "MetricType",
+    "EvaluationMetric",
+    "EvaluationResult",
+    "parse_evaluation_hooks",
+    # 技能适配钩子
+    "AdaptationAction",
+    "AdaptationThreshold",
+    "AdaptationEvent",
+    "SkillAdaptationHook",
+    "AdaptationManager",
+    "get_adaptation_manager",
 ]
--- a/backend/agents/base/evo_agent.py
+++ b/backend/agents/base/evo_agent.py
@@ -27,6 +27,7 @@ from .hooks import (
    HookManager,
    BootstrapHook,
    MemoryCompactionHook,
+    WorkspaceWatchHook,
    HOOK_PRE_REASONING,
 )
 from ..prompts.builder import (
@@ -36,6 +37,16 @@ from ..prompts.builder import (
 from ..agent_workspace import load_agent_workspace_config
 from ..skills_manager import SkillsManager

+# Team infrastructure imports (graceful import - may not exist yet)
+try:
+    from backend.agents.team.messenger import AgentMessenger
+    from backend.agents.team.task_delegator import TaskDelegator
+    TEAM_INFRA_AVAILABLE = True
+except ImportError:
+    TEAM_INFRA_AVAILABLE = False
+    AgentMessenger = None
+    TaskDelegator = None
+
 if TYPE_CHECKING:
    from agentscope.formatter import FormatterBase
    from agentscope.model import ModelWrapperBase
@@ -152,6 +163,12 @@ class EvoAgent(ToolGuardMixin, ReActAgent):
            memory_compact_threshold=memory_compact_threshold,
        )

+        # Initialize team infrastructure if available
+        self._messenger: Optional["AgentMessenger"] = None
+        self._task_delegator: Optional["TaskDelegator"] = None
+        if TEAM_INFRA_AVAILABLE:
+            self._init_team_infrastructure()
+
        logger.info(
            "EvoAgent initialized: %s (workspace: %s)",
            agent_id,
@@ -268,6 +285,17 @@ class EvoAgent(ToolGuardMixin, ReActAgent):
            )
            logger.debug("Registered memory compaction hook")

+        # Workspace watch hook - auto-reload markdown files on change
+        workspace_watch_hook = WorkspaceWatchHook(
+            workspace_dir=self.workspace_dir,
+        )
+        self._hook_manager.register(
+            hook_type=HOOK_PRE_REASONING,
+            hook_name="workspace_watch",
+            hook=workspace_watch_hook,
+        )
+        logger.debug("Registered workspace watch hook")
+
    async def _reasoning(self, **kwargs) -> Msg:
        """Override reasoning to execute pre-reasoning hooks.

@@ -405,7 +433,78 @@ class EvoAgent(ToolGuardMixin, ReActAgent):
                )
            ]),
            "registered_hooks": self._hook_manager.list_hooks(),
+            "team_infra_available": TEAM_INFRA_AVAILABLE,
        }

+    def _init_team_infrastructure(self) -> None:
+        """Initialize team infrastructure components (messenger and task delegator).
+
+        This method initializes the AgentMessenger for inter-agent communication
+        and the TaskDelegator for subagent delegation.
+        """
+        if not TEAM_INFRA_AVAILABLE:
+            return
+
+        try:
+            self._messenger = AgentMessenger(agent_id=self.agent_id)
+            self._task_delegator = TaskDelegator(agent=self)
+            logger.debug(
+                "Team infrastructure initialized for agent: %s",
+                self.agent_id,
+            )
+        except Exception as e:
+            logger.warning(
+                "Failed to initialize team infrastructure for %s: %s",
+                self.agent_id,
+                e,
+            )
+            self._messenger = None
+            self._task_delegator = None
+
+    @property
+    def messenger(self) -> Optional["AgentMessenger"]:
+        """Get the agent's messenger for inter-agent communication.
+
+        Returns:
+            AgentMessenger instance if available, None otherwise
+        """
+        return self._messenger
+
+    def delegate_task(
+        self,
+        task_type: str,
+        task_data: Dict[str, Any],
+        target_agent: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Delegate a task to a subagent using the TaskDelegator.
+
+        Args:
+            task_type: Type of task to delegate
+            task_data: Data/payload for the task
+            target_agent: Optional specific agent ID to delegate to
+
+        Returns:
+            Dict containing the delegation result
+        """
+        if not TEAM_INFRA_AVAILABLE or self._task_delegator is None:
+            return {
+                "success": False,
+                "error": "Team infrastructure not available",
+            }
+
+        try:
+            return self._task_delegator.delegate_task(
+                task_type=task_type,
+                task_data=task_data,
+                target_agent=target_agent,
+            )
+        except Exception as e:
+            logger.error(
+                "Task delegation failed for %s: %s",
+                self.agent_id,
+                e,
+            )
+            return {"success": False, "error": str(e)}
+

 __all__ = ["EvoAgent"]
--- a/backend/agents/base/hooks.py
+++ b/backend/agents/base/hooks.py
@@ -284,19 +284,120 @@ class BootstrapHook(Hook):
        return None


+class WorkspaceWatchHook(Hook):
+    """Hook for auto-reloading workspace markdown files on change.
+
+    Monitors SOUL.md, AGENTS.md, PROFILE.md, etc. and triggers
+    a prompt rebuild when any of them change. Based on CoPaw's
+    AgentConfigWatcher approach but for markdown files.
+    """
+
+    # Files to monitor (same as PromptBuilder.DEFAULT_FILES)
+    WATCHED_FILES = frozenset([
+        "SOUL.md", "AGENTS.md", "PROFILE.md", "ROLE.md",
+        "POLICY.md", "MEMORY.md", "HEARTBEAT.md", "STYLE.md",
+        "BOOTSTRAP.md",
+    ])
+
+    def __init__(
+        self,
+        workspace_dir: Path,
+        poll_interval: float = 2.0,
+    ):
+        """Initialize workspace watch hook.
+
+        Args:
+            workspace_dir: Workspace directory to monitor
+            poll_interval: How often to check for changes (seconds)
+        """
+        self.workspace_dir = Path(workspace_dir)
+        self.poll_interval = poll_interval
+        self._last_mtimes: dict[str, float] = {}
+        self._initialized = False
+
+    def _scan_mtimes(self) -> dict[str, float]:
+        """Scan watched files and return their current mtimes."""
+        mtimes = {}
+        for name in self.WATCHED_FILES:
+            path = self.workspace_dir / name
+            if path.exists():
+                mtimes[name] = path.stat().st_mtime
+        return mtimes
+
+    def _has_changes(self) -> bool:
+        """Check if any watched file has changed since last check."""
+        current = self._scan_mtimes()
+
+        if not self._initialized:
+            self._last_mtimes = current
+            self._initialized = True
+            return False
+
+        # Check for new, modified, or deleted files
+        if set(current.keys()) != set(self._last_mtimes.keys()):
+            self._last_mtimes = current
+            return True
+
+        for name, mtime in current.items():
+            if mtime != self._last_mtimes.get(name):
+                self._last_mtimes = current
+                return True
+
+        return False
+
+    async def __call__(
+        self,
+        agent: "ReActAgent",
+        kwargs: Dict[str, Any],
+    ) -> Optional[Dict[str, Any]]:
+        """Check for file changes and rebuild prompt if needed.
+
+        Args:
+            agent: The agent instance
+            kwargs: Input arguments (unused)
+
+        Returns:
+            None
+        """
+        try:
+            if self._has_changes():
+                logger.info(
+                    "Workspace files changed, triggering prompt rebuild for: %s",
+                    getattr(agent, "agent_id", "unknown"),
+                )
+                if hasattr(agent, "rebuild_sys_prompt"):
+                    agent.rebuild_sys_prompt()
+                else:
+                    logger.warning(
+                        "Agent %s has no rebuild_sys_prompt method",
+                        getattr(agent, "agent_id", "unknown"),
+                    )
+        except Exception as e:
+            logger.error("Workspace watch hook failed: %s", e, exc_info=True)
+
+        return None
+
+
 class MemoryCompactionHook(Hook):
    """Hook for automatic memory compaction when context is full.

    This hook monitors the token count of messages and triggers compaction
    when it exceeds the threshold. It preserves the system prompt and recent
    messages while summarizing older conversation history.
+
+    Based on CoPaw's memory compaction design with additional improvements:
+    - memory_compact_ratio: Ratio to compact when threshold reached
+    - memory_reserve_ratio: Always keep a reserve of tokens for recent messages
+    - enable_tool_result_compact: Compact tool results separately
+    - tool_result_compact_keep_n: Number of tool results to keep
    """

    def __init__(
        self,
        memory_manager: Any,
        memory_compact_threshold: Optional[int] = None,
-        memory_compact_reserve: Optional[int] = None,
+        memory_compact_ratio: float = 0.75,
+        memory_reserve_ratio: float = 0.1,
        enable_tool_result_compact: bool = False,
        tool_result_compact_keep_n: int = 5,
    ):
@@ -305,13 +406,15 @@ class MemoryCompactionHook(Hook):
        Args:
            memory_manager: Memory manager instance for compaction
            memory_compact_threshold: Token threshold for compaction
-            memory_compact_reserve: Reserve tokens for recent messages
+            memory_compact_ratio: Target ratio to compact to (e.g., 0.75 = compact to 75%)
+            memory_reserve_ratio: Reserve ratio to always keep free (e.g., 0.1 = 10%)
            enable_tool_result_compact: Enable tool result compaction
            tool_result_compact_keep_n: Number of tool results to keep
        """
        self.memory_manager = memory_manager
        self.memory_compact_threshold = memory_compact_threshold
-        self.memory_compact_reserve = memory_compact_reserve
+        self.memory_compact_ratio = memory_compact_ratio
+        self.memory_reserve_ratio = memory_reserve_ratio
        self.enable_tool_result_compact = enable_tool_result_compact
        self.tool_result_compact_keep_n = tool_result_compact_keep_n

@@ -382,32 +485,61 @@ class MemoryCompactionHook(Hook):
    ) -> None:
        """Compact memory by summarizing older messages.

+        Uses CoPaw-style memory management:
+        - memory_compact_ratio: Target ratio to compact to (e.g., 0.75 means compact to 75%)
+        - memory_reserve_ratio: Always keep this ratio free (e.g., 0.1 means keep 10% for recent)
+
        Args:
            agent: The agent instance
            messages: Current messages in memory
        """
-        if self.memory_compact_reserve is None:
+        if self.memory_compact_threshold is None:
            return

-        # Keep recent messages
-        keep_count = min(
-            len(messages) // 4,
-            10,  # Max 10 recent messages
-        )
-        keep_count = max(keep_count, 2)  # At least 2
+        # Estimate total tokens
+        total_tokens = self._estimate_tokens(messages)

-        messages_to_compact = messages[:-keep_count] if keep_count < len(messages) else []
+        # Calculate reserve based on ratio (CoPaw-style)
+        reserve_tokens = int(total_tokens * self.memory_reserve_ratio)
+
+        # Calculate target tokens after compaction
+        target_tokens = int(total_tokens * self.memory_compact_ratio)
+        target_tokens = max(target_tokens, total_tokens - reserve_tokens)
+
+        # Find messages to compact (older ones)
+        # Keep recent messages that fit within target
+        messages_to_compact = []
+        kept_tokens = 0
+
+        # Start from oldest, stop when we've kept enough
+        for msg in messages:
+            msg_tokens = self._estimate_tokens([msg])
+            if kept_tokens + msg_tokens > target_tokens:
+                messages_to_compact.append(msg)
+            else:
+                kept_tokens += msg_tokens

        if not messages_to_compact:
            return

+        logger.info(
+            "Compacting %d messages (%d tokens) to target %d tokens",
+            len(messages_to_compact),
+            self._estimate_tokens(messages_to_compact),
+            target_tokens,
+        )
+
        # Use memory manager to compact if available
        if hasattr(self.memory_manager, "compact_memory"):
            try:
                summary = await self.memory_manager.compact_memory(
                    messages=messages_to_compact,
                )
-                logger.info("Memory compacted: %d messages summarized", len(messages_to_compact))
+                logger.info(
+                    "Memory compacted: %d messages summarized, summary: %s",
+                    len(messages_to_compact),
+                    summary[:200] if summary else "N/A",
+                )

                # Mark messages as compressed if supported
                if hasattr(agent.memory, "update_messages_mark"):
@@ -420,6 +552,142 @@ class MemoryCompactionHook(Hook):
            except Exception as e:
                logger.error("Memory manager compaction failed: %s", e)

+        # Tool result compaction (CoPaw-style)
+        if self.enable_tool_result_compact:
+            await self._compact_tool_results(agent, messages)
+
+    async def _compact_tool_results(
+        self,
+        agent: "ReActAgent",
+        messages: List[Any],
+    ) -> None:
+        """Compact tool results by keeping only recent ones.
+
+        Based on CoPaw's tool_result_compact_keep_n pattern.
+        Tool results can be very verbose, so we keep only the N most recent ones.
+
+        Args:
+            agent: The agent instance
+            messages: Current messages in memory
+        """
+        if not hasattr(agent.memory, "content"):
+            return
+
+        # Find tool result messages (usually have "tool" role or tool_related content)
+        tool_results = []
+        for msg, _ in agent.memory.content:
+            if hasattr(msg, "role") and msg.role == "tool":
+                tool_results.append(msg)
+
+        if len(tool_results) <= self.tool_result_compact_keep_n:
+            return
+
+        # Keep only the most recent N tool results
+        excess_results = tool_results[:-self.tool_result_compact_keep_n]
+
+        logger.info(
+            "Tool result compaction: %d tool results found, keeping %d, compacting %d",
+            len(tool_results),
+            self.tool_result_compact_keep_n,
+            len(excess_results),
+        )
+
+        # Mark excess tool results as compressed if supported
+        if hasattr(agent.memory, "update_messages_mark"):
+            from agentscope.agent._react_agent import _MemoryMark
+            await agent.memory.update_messages_mark(
+                new_mark=_MemoryMark.COMPRESSED,
+                msg_ids=[msg.id for msg in excess_results],
+            )
+
+
+class HeartbeatHook(Hook):
+    """Pre-reasoning hook that injects HEARTBEAT.md content.
+
+    Reads the agent's HEARTBEAT.md file and prepends it to the
+    reasoning input, causing the agent to perform self-checks.
+
+    This enables "主动检查" (proactive monitoring) - periodic
+    market condition and position checks during trading hours.
+    """
+
+    HEARTBEAT_FILE = "HEARTBEAT.md"
+
+    def __init__(self, workspace_dir: Path):
+        """Initialize heartbeat hook.
+
+        Args:
+            workspace_dir: Working directory containing HEARTBEAT.md
+        """
+        self.workspace_dir = Path(workspace_dir)
+        self._completed_flag = self.workspace_dir / ".heartbeat_completed"
+
+    def _read_heartbeat_content(self) -> Optional[str]:
+        """Read HEARTBEAT.md if it exists and is non-empty.
+
+        Returns:
+            The HEARTBEAT.md content stripped of whitespace, or None
+            if the file is absent or empty.
+        """
+        hb_path = self.workspace_dir / self.HEARTBEAT_FILE
+        if not hb_path.exists():
+            return None
+        content = hb_path.read_text(encoding="utf-8").strip()
+        return content if content else None
+
+    async def __call__(
+        self,
+        agent: "ReActAgent",
+        kwargs: Dict[str, Any],
+    ) -> Optional[Dict[str, Any]]:
+        """Prepend heartbeat task to user message.
+
+        Args:
+            agent: The agent instance
+            kwargs: Input arguments to the _reasoning method
+
+        Returns:
+            Modified kwargs with heartbeat content prepended, or None
+            if no HEARTBEAT.md content is available.
+        """
+        try:
+            content = self._read_heartbeat_content()
+            if not content:
+                return None
+
+            logger.debug(
+                "Heartbeat: found HEARTBEAT.md for agent %s",
+                getattr(agent, "agent_id", "unknown"),
+            )
+
+            # Build heartbeat task instruction (Chinese)
+            hb_task = (
+                "# 定期主动检查\n\n"
+                f"{content}\n\n"
+                "请执行上述检查并报告结果。"
+            )
+
+            # Inject into the first user message in memory
+            if hasattr(agent, "memory") and agent.memory.content:
+                system_count = sum(
+                    1 for msg, _ in agent.memory.content if msg.role == "system"
+                )
+                for msg, _ in agent.memory.content[system_count:]:
+                    if msg.role == "user":
+                        original_content = msg.content
+                        msg.content = hb_task + "\n\n" + original_content
+                        break
+
+            logger.debug(
+                "Heartbeat task prepended for agent %s",
+                getattr(agent, "agent_id", "unknown"),
+            )
+
+        except Exception as e:
+            logger.error("Heartbeat hook failed: %s", e, exc_info=True)
+
+        return None
+

 __all__ = [
    "Hook",
@@ -428,5 +696,7 @@ __all__ = [
    "HOOK_PRE_REASONING",
    "HOOK_POST_ACTING",
    "BootstrapHook",
+    "HeartbeatHook",
    "MemoryCompactionHook",
+    "WorkspaceWatchHook",
 ]