feat: Add evaluation hooks, skill adaptation and team pipeline config

- Add EvaluationHook for post-execution agent evaluation
- Add SkillAdaptationHook for dynamic skill adaptation
- Add team/ directory with team coordination logic
- Add TEAM_PIPELINE.yaml for smoke_fullstack pipeline config
- Update RuntimeView, TraderView and RuntimeSettingsPanel UI
- Add runtimeApi and websocket services
- Add runtime_state.json to smoke_fullstack state

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-19 18:52:12 +08:00
parent f4a2b7f3af
commit 4b5ac86b83
87 changed files with 5042 additions and 744 deletions

View File

@@ -13,6 +13,26 @@ from .command_handler import (
create_command_dispatcher,
)
# 评估钩子 (从evaluation_hook.py导入)
from .evaluation_hook import (
EvaluationHook,
EvaluationCollector,
MetricType,
EvaluationMetric,
EvaluationResult,
parse_evaluation_hooks,
)
# 技能适配钩子 (从skill_adaptation_hook.py导入)
from .skill_adaptation_hook import (
AdaptationAction,
AdaptationThreshold,
AdaptationEvent,
SkillAdaptationHook,
AdaptationManager,
get_adaptation_manager,
)
__all__ = [
# 命令处理
"AgentCommandDispatcher",
@@ -20,4 +40,18 @@ __all__ = [
"CommandHandler",
"CommandResult",
"create_command_dispatcher",
# 评估钩子
"EvaluationHook",
"EvaluationCollector",
"MetricType",
"EvaluationMetric",
"EvaluationResult",
"parse_evaluation_hooks",
# 技能适配钩子
"AdaptationAction",
"AdaptationThreshold",
"AdaptationEvent",
"SkillAdaptationHook",
"AdaptationManager",
"get_adaptation_manager",
]

View File

@@ -27,6 +27,7 @@ from .hooks import (
HookManager,
BootstrapHook,
MemoryCompactionHook,
WorkspaceWatchHook,
HOOK_PRE_REASONING,
)
from ..prompts.builder import (
@@ -36,6 +37,16 @@ from ..prompts.builder import (
from ..agent_workspace import load_agent_workspace_config
from ..skills_manager import SkillsManager
# Team infrastructure imports (graceful import - may not exist yet)
try:
from backend.agents.team.messenger import AgentMessenger
from backend.agents.team.task_delegator import TaskDelegator
TEAM_INFRA_AVAILABLE = True
except ImportError:
TEAM_INFRA_AVAILABLE = False
AgentMessenger = None
TaskDelegator = None
if TYPE_CHECKING:
from agentscope.formatter import FormatterBase
from agentscope.model import ModelWrapperBase
@@ -152,6 +163,12 @@ class EvoAgent(ToolGuardMixin, ReActAgent):
memory_compact_threshold=memory_compact_threshold,
)
# Initialize team infrastructure if available
self._messenger: Optional["AgentMessenger"] = None
self._task_delegator: Optional["TaskDelegator"] = None
if TEAM_INFRA_AVAILABLE:
self._init_team_infrastructure()
logger.info(
"EvoAgent initialized: %s (workspace: %s)",
agent_id,
@@ -268,6 +285,17 @@ class EvoAgent(ToolGuardMixin, ReActAgent):
)
logger.debug("Registered memory compaction hook")
# Workspace watch hook - auto-reload markdown files on change
workspace_watch_hook = WorkspaceWatchHook(
workspace_dir=self.workspace_dir,
)
self._hook_manager.register(
hook_type=HOOK_PRE_REASONING,
hook_name="workspace_watch",
hook=workspace_watch_hook,
)
logger.debug("Registered workspace watch hook")
async def _reasoning(self, **kwargs) -> Msg:
"""Override reasoning to execute pre-reasoning hooks.
@@ -405,7 +433,78 @@ class EvoAgent(ToolGuardMixin, ReActAgent):
)
]),
"registered_hooks": self._hook_manager.list_hooks(),
"team_infra_available": TEAM_INFRA_AVAILABLE,
}
def _init_team_infrastructure(self) -> None:
"""Initialize team infrastructure components (messenger and task delegator).
This method initializes the AgentMessenger for inter-agent communication
and the TaskDelegator for subagent delegation.
"""
if not TEAM_INFRA_AVAILABLE:
return
try:
self._messenger = AgentMessenger(agent_id=self.agent_id)
self._task_delegator = TaskDelegator(agent=self)
logger.debug(
"Team infrastructure initialized for agent: %s",
self.agent_id,
)
except Exception as e:
logger.warning(
"Failed to initialize team infrastructure for %s: %s",
self.agent_id,
e,
)
self._messenger = None
self._task_delegator = None
@property
def messenger(self) -> Optional["AgentMessenger"]:
"""Get the agent's messenger for inter-agent communication.
Returns:
AgentMessenger instance if available, None otherwise
"""
return self._messenger
def delegate_task(
self,
task_type: str,
task_data: Dict[str, Any],
target_agent: Optional[str] = None,
) -> Dict[str, Any]:
"""Delegate a task to a subagent using the TaskDelegator.
Args:
task_type: Type of task to delegate
task_data: Data/payload for the task
target_agent: Optional specific agent ID to delegate to
Returns:
Dict containing the delegation result
"""
if not TEAM_INFRA_AVAILABLE or self._task_delegator is None:
return {
"success": False,
"error": "Team infrastructure not available",
}
try:
return self._task_delegator.delegate_task(
task_type=task_type,
task_data=task_data,
target_agent=target_agent,
)
except Exception as e:
logger.error(
"Task delegation failed for %s: %s",
self.agent_id,
e,
)
return {"success": False, "error": str(e)}
__all__ = ["EvoAgent"]

View File

@@ -284,19 +284,120 @@ class BootstrapHook(Hook):
return None
class WorkspaceWatchHook(Hook):
"""Hook for auto-reloading workspace markdown files on change.
Monitors SOUL.md, AGENTS.md, PROFILE.md, etc. and triggers
a prompt rebuild when any of them change. Based on CoPaw's
AgentConfigWatcher approach but for markdown files.
"""
# Files to monitor (same as PromptBuilder.DEFAULT_FILES)
WATCHED_FILES = frozenset([
"SOUL.md", "AGENTS.md", "PROFILE.md", "ROLE.md",
"POLICY.md", "MEMORY.md", "HEARTBEAT.md", "STYLE.md",
"BOOTSTRAP.md",
])
def __init__(
self,
workspace_dir: Path,
poll_interval: float = 2.0,
):
"""Initialize workspace watch hook.
Args:
workspace_dir: Workspace directory to monitor
poll_interval: How often to check for changes (seconds)
"""
self.workspace_dir = Path(workspace_dir)
self.poll_interval = poll_interval
self._last_mtimes: dict[str, float] = {}
self._initialized = False
def _scan_mtimes(self) -> dict[str, float]:
"""Scan watched files and return their current mtimes."""
mtimes = {}
for name in self.WATCHED_FILES:
path = self.workspace_dir / name
if path.exists():
mtimes[name] = path.stat().st_mtime
return mtimes
def _has_changes(self) -> bool:
"""Check if any watched file has changed since last check."""
current = self._scan_mtimes()
if not self._initialized:
self._last_mtimes = current
self._initialized = True
return False
# Check for new, modified, or deleted files
if set(current.keys()) != set(self._last_mtimes.keys()):
self._last_mtimes = current
return True
for name, mtime in current.items():
if mtime != self._last_mtimes.get(name):
self._last_mtimes = current
return True
return False
async def __call__(
self,
agent: "ReActAgent",
kwargs: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
"""Check for file changes and rebuild prompt if needed.
Args:
agent: The agent instance
kwargs: Input arguments (unused)
Returns:
None
"""
try:
if self._has_changes():
logger.info(
"Workspace files changed, triggering prompt rebuild for: %s",
getattr(agent, "agent_id", "unknown"),
)
if hasattr(agent, "rebuild_sys_prompt"):
agent.rebuild_sys_prompt()
else:
logger.warning(
"Agent %s has no rebuild_sys_prompt method",
getattr(agent, "agent_id", "unknown"),
)
except Exception as e:
logger.error("Workspace watch hook failed: %s", e, exc_info=True)
return None
class MemoryCompactionHook(Hook):
"""Hook for automatic memory compaction when context is full.
This hook monitors the token count of messages and triggers compaction
when it exceeds the threshold. It preserves the system prompt and recent
messages while summarizing older conversation history.
Based on CoPaw's memory compaction design with additional improvements:
- memory_compact_ratio: Ratio to compact when threshold reached
- memory_reserve_ratio: Always keep a reserve of tokens for recent messages
- enable_tool_result_compact: Compact tool results separately
- tool_result_compact_keep_n: Number of tool results to keep
"""
def __init__(
self,
memory_manager: Any,
memory_compact_threshold: Optional[int] = None,
memory_compact_reserve: Optional[int] = None,
memory_compact_ratio: float = 0.75,
memory_reserve_ratio: float = 0.1,
enable_tool_result_compact: bool = False,
tool_result_compact_keep_n: int = 5,
):
@@ -305,13 +406,15 @@ class MemoryCompactionHook(Hook):
Args:
memory_manager: Memory manager instance for compaction
memory_compact_threshold: Token threshold for compaction
memory_compact_reserve: Reserve tokens for recent messages
memory_compact_ratio: Target ratio to compact to (e.g., 0.75 = compact to 75%)
memory_reserve_ratio: Reserve ratio to always keep free (e.g., 0.1 = 10%)
enable_tool_result_compact: Enable tool result compaction
tool_result_compact_keep_n: Number of tool results to keep
"""
self.memory_manager = memory_manager
self.memory_compact_threshold = memory_compact_threshold
self.memory_compact_reserve = memory_compact_reserve
self.memory_compact_ratio = memory_compact_ratio
self.memory_reserve_ratio = memory_reserve_ratio
self.enable_tool_result_compact = enable_tool_result_compact
self.tool_result_compact_keep_n = tool_result_compact_keep_n
@@ -382,32 +485,61 @@ class MemoryCompactionHook(Hook):
) -> None:
"""Compact memory by summarizing older messages.
Uses CoPaw-style memory management:
- memory_compact_ratio: Target ratio to compact to (e.g., 0.75 means compact to 75%)
- memory_reserve_ratio: Always keep this ratio free (e.g., 0.1 means keep 10% for recent)
Args:
agent: The agent instance
messages: Current messages in memory
"""
if self.memory_compact_reserve is None:
if self.memory_compact_threshold is None:
return
# Keep recent messages
keep_count = min(
len(messages) // 4,
10, # Max 10 recent messages
)
keep_count = max(keep_count, 2) # At least 2
# Estimate total tokens
total_tokens = self._estimate_tokens(messages)
messages_to_compact = messages[:-keep_count] if keep_count < len(messages) else []
# Calculate reserve based on ratio (CoPaw-style)
reserve_tokens = int(total_tokens * self.memory_reserve_ratio)
# Calculate target tokens after compaction
target_tokens = int(total_tokens * self.memory_compact_ratio)
target_tokens = max(target_tokens, total_tokens - reserve_tokens)
# Find messages to compact (older ones)
# Keep recent messages that fit within target
messages_to_compact = []
kept_tokens = 0
# Start from oldest, stop when we've kept enough
for msg in messages:
msg_tokens = self._estimate_tokens([msg])
if kept_tokens + msg_tokens > target_tokens:
messages_to_compact.append(msg)
else:
kept_tokens += msg_tokens
if not messages_to_compact:
return
logger.info(
"Compacting %d messages (%d tokens) to target %d tokens",
len(messages_to_compact),
self._estimate_tokens(messages_to_compact),
target_tokens,
)
# Use memory manager to compact if available
if hasattr(self.memory_manager, "compact_memory"):
try:
summary = await self.memory_manager.compact_memory(
messages=messages_to_compact,
)
logger.info("Memory compacted: %d messages summarized", len(messages_to_compact))
logger.info(
"Memory compacted: %d messages summarized, summary: %s",
len(messages_to_compact),
summary[:200] if summary else "N/A",
)
# Mark messages as compressed if supported
if hasattr(agent.memory, "update_messages_mark"):
@@ -420,6 +552,142 @@ class MemoryCompactionHook(Hook):
except Exception as e:
logger.error("Memory manager compaction failed: %s", e)
# Tool result compaction (CoPaw-style)
if self.enable_tool_result_compact:
await self._compact_tool_results(agent, messages)
async def _compact_tool_results(
self,
agent: "ReActAgent",
messages: List[Any],
) -> None:
"""Compact tool results by keeping only recent ones.
Based on CoPaw's tool_result_compact_keep_n pattern.
Tool results can be very verbose, so we keep only the N most recent ones.
Args:
agent: The agent instance
messages: Current messages in memory
"""
if not hasattr(agent.memory, "content"):
return
# Find tool result messages (usually have "tool" role or tool_related content)
tool_results = []
for msg, _ in agent.memory.content:
if hasattr(msg, "role") and msg.role == "tool":
tool_results.append(msg)
if len(tool_results) <= self.tool_result_compact_keep_n:
return
# Keep only the most recent N tool results
excess_results = tool_results[:-self.tool_result_compact_keep_n]
logger.info(
"Tool result compaction: %d tool results found, keeping %d, compacting %d",
len(tool_results),
self.tool_result_compact_keep_n,
len(excess_results),
)
# Mark excess tool results as compressed if supported
if hasattr(agent.memory, "update_messages_mark"):
from agentscope.agent._react_agent import _MemoryMark
await agent.memory.update_messages_mark(
new_mark=_MemoryMark.COMPRESSED,
msg_ids=[msg.id for msg in excess_results],
)
class HeartbeatHook(Hook):
"""Pre-reasoning hook that injects HEARTBEAT.md content.
Reads the agent's HEARTBEAT.md file and prepends it to the
reasoning input, causing the agent to perform self-checks.
This enables "主动检查" (proactive monitoring) - periodic
market condition and position checks during trading hours.
"""
HEARTBEAT_FILE = "HEARTBEAT.md"
def __init__(self, workspace_dir: Path):
"""Initialize heartbeat hook.
Args:
workspace_dir: Working directory containing HEARTBEAT.md
"""
self.workspace_dir = Path(workspace_dir)
self._completed_flag = self.workspace_dir / ".heartbeat_completed"
def _read_heartbeat_content(self) -> Optional[str]:
"""Read HEARTBEAT.md if it exists and is non-empty.
Returns:
The HEARTBEAT.md content stripped of whitespace, or None
if the file is absent or empty.
"""
hb_path = self.workspace_dir / self.HEARTBEAT_FILE
if not hb_path.exists():
return None
content = hb_path.read_text(encoding="utf-8").strip()
return content if content else None
async def __call__(
self,
agent: "ReActAgent",
kwargs: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
"""Prepend heartbeat task to user message.
Args:
agent: The agent instance
kwargs: Input arguments to the _reasoning method
Returns:
Modified kwargs with heartbeat content prepended, or None
if no HEARTBEAT.md content is available.
"""
try:
content = self._read_heartbeat_content()
if not content:
return None
logger.debug(
"Heartbeat: found HEARTBEAT.md for agent %s",
getattr(agent, "agent_id", "unknown"),
)
# Build heartbeat task instruction (Chinese)
hb_task = (
"# 定期主动检查\n\n"
f"{content}\n\n"
"请执行上述检查并报告结果。"
)
# Inject into the first user message in memory
if hasattr(agent, "memory") and agent.memory.content:
system_count = sum(
1 for msg, _ in agent.memory.content if msg.role == "system"
)
for msg, _ in agent.memory.content[system_count:]:
if msg.role == "user":
original_content = msg.content
msg.content = hb_task + "\n\n" + original_content
break
logger.debug(
"Heartbeat task prepended for agent %s",
getattr(agent, "agent_id", "unknown"),
)
except Exception as e:
logger.error("Heartbeat hook failed: %s", e, exc_info=True)
return None
__all__ = [
"Hook",
@@ -428,5 +696,7 @@ __all__ = [
"HOOK_PRE_REASONING",
"HOOK_POST_ACTING",
"BootstrapHook",
"HeartbeatHook",
"MemoryCompactionHook",
"WorkspaceWatchHook",
]