Initial commit of integrated agent system

This commit is contained in:
cillin
2026-03-30 17:46:44 +08:00
commit 0fa413380c
337 changed files with 75268 additions and 0 deletions

View File

@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
"""Base agent module for 大时代.
提供Agent基础类、命令处理、工具守卫和钩子管理等功能。
"""
# 命令处理器 (从command_handler.py导入)
from .command_handler import (
AgentCommandDispatcher,
CommandContext,
CommandHandler,
CommandResult,
create_command_dispatcher,
)
# 评估钩子 (从evaluation_hook.py导入)
from .evaluation_hook import (
EvaluationHook,
EvaluationCollector,
MetricType,
EvaluationMetric,
EvaluationResult,
parse_evaluation_hooks,
)
# 技能适配钩子 (从skill_adaptation_hook.py导入)
from .skill_adaptation_hook import (
AdaptationAction,
AdaptationThreshold,
AdaptationEvent,
SkillAdaptationHook,
AdaptationManager,
get_adaptation_manager,
)
__all__ = [
# 命令处理
"AgentCommandDispatcher",
"CommandContext",
"CommandHandler",
"CommandResult",
"create_command_dispatcher",
# 评估钩子
"EvaluationHook",
"EvaluationCollector",
"MetricType",
"EvaluationMetric",
"EvaluationResult",
"parse_evaluation_hooks",
# 技能适配钩子
"AdaptationAction",
"AdaptationThreshold",
"AdaptationEvent",
"SkillAdaptationHook",
"AdaptationManager",
"get_adaptation_manager",
]

View File

@@ -0,0 +1,543 @@
# -*- coding: utf-8 -*-
"""Agent command handler for system commands.
This module handles system commands like /save, /compact, /skills, /reload, etc.
参考CoPaw设计为EvoAgent提供命令处理能力。
"""
import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Protocol
if TYPE_CHECKING:
from .agent import EvoAgent
logger = logging.getLogger(__name__)
@dataclass
class CommandResult:
"""命令执行结果"""
success: bool
message: str
data: Dict[str, Any] = field(default_factory=dict)
class CommandContext:
"""命令执行上下文"""
def __init__(self, agent: "EvoAgent", raw_query: str, args: str = ""):
self.agent = agent
self.raw_query = raw_query
self.args = args
self.config_name = getattr(agent, "config_name", "default")
self.agent_id = getattr(agent, "agent_id", "unknown")
class CommandHandler(ABC):
"""命令处理器抽象基类"""
@abstractmethod
async def handle(self, ctx: CommandContext) -> CommandResult:
"""处理命令"""
pass
class SaveCommandHandler(CommandHandler):
"""处理 /save <message> 命令 - 保存内容到MEMORY.md"""
async def handle(self, ctx: CommandContext) -> CommandResult:
message = ctx.args.strip()
if not message:
return CommandResult(
success=False,
message="Usage: /save <message>\n请提供要保存的内容。"
)
try:
memory_path = self._get_memory_path(ctx)
memory_path.parent.mkdir(parents=True, exist_ok=True)
timestamp = self._get_timestamp()
entry = f"\n## {timestamp}\n\n{message}\n"
with open(memory_path, "a", encoding="utf-8") as f:
f.write(entry)
return CommandResult(
success=True,
message=f"✅ 内容已保存到 MEMORY.md\n- 路径: {memory_path}\n- 长度: {len(message)} 字符",
data={"path": str(memory_path), "length": len(message)}
)
except Exception as e:
logger.error(f"Failed to save to MEMORY.md: {e}")
return CommandResult(
success=False,
message=f"❌ 保存失败: {str(e)}"
)
def _get_memory_path(self, ctx: CommandContext) -> Path:
"""获取MEMORY.md路径"""
from backend.agents.skills_manager import SkillsManager
sm = SkillsManager()
asset_dir = sm.get_agent_asset_dir(ctx.config_name, ctx.agent_id)
return asset_dir / "MEMORY.md"
def _get_timestamp(self) -> str:
"""获取当前时间戳"""
from datetime import datetime
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
class CompactCommandHandler(CommandHandler):
"""处理 /compact 命令 - 压缩记忆"""
async def handle(self, ctx: CommandContext) -> CommandResult:
try:
agent = ctx.agent
memory_manager = getattr(agent, "memory_manager", None)
if memory_manager is None:
return CommandResult(
success=False,
message="❌ Memory Manager 未启用\n\n- 记忆压缩功能不可用\n- 请在配置中启用 memory_manager"
)
messages = await self._get_messages(agent)
if not messages:
return CommandResult(
success=False,
message="⚠️ 没有可压缩的消息\n\n- 当前记忆为空\n- 无需执行压缩"
)
compact_content = await memory_manager.compact_memory(messages)
await self._update_compressed_summary(agent, compact_content)
return CommandResult(
success=True,
message=f"✅ 记忆压缩完成\n\n- 压缩了 {len(messages)} 条消息\n- 摘要长度: {len(compact_content)} 字符",
data={"message_count": len(messages), "summary_length": len(compact_content)}
)
except Exception as e:
logger.error(f"Failed to compact memory: {e}")
return CommandResult(
success=False,
message=f"❌ 压缩失败: {str(e)}"
)
async def _get_messages(self, agent: "EvoAgent") -> List[Any]:
"""获取Agent的记忆消息"""
memory = getattr(agent, "memory", None)
if memory is None:
return []
return await memory.get_memory() if hasattr(memory, "get_memory") else []
async def _update_compressed_summary(self, agent: "EvoAgent", content: str) -> None:
"""更新压缩摘要"""
memory = getattr(agent, "memory", None)
if memory and hasattr(memory, "update_compressed_summary"):
await memory.update_compressed_summary(content)
class SkillsListCommandHandler(CommandHandler):
"""处理 /skills list 命令 - 列出已激活技能"""
async def handle(self, ctx: CommandContext) -> CommandResult:
try:
from backend.agents.skills_manager import SkillsManager
sm = SkillsManager()
active_skills = sm.list_active_skill_metadata(ctx.config_name, ctx.agent_id)
catalog = sm.list_agent_skill_catalog(ctx.config_name, ctx.agent_id)
lines = ["📋 技能列表", ""]
if active_skills:
lines.append("✅ 已激活技能:")
for skill in active_skills:
lines.append(f"{skill.name} - {skill.description[:50]}...")
else:
lines.append("⚠️ 当前没有激活的技能")
lines.append("")
lines.append(f"📚 可用技能总数: {len(catalog)}")
lines.append("💡 使用 /skills enable <name> 启用技能")
return CommandResult(
success=True,
message="\n".join(lines),
data={
"active_count": len(active_skills),
"catalog_count": len(catalog),
"active": [s.skill_name for s in active_skills]
}
)
except Exception as e:
logger.error(f"Failed to list skills: {e}")
return CommandResult(
success=False,
message=f"❌ 获取技能列表失败: {str(e)}"
)
class SkillsEnableCommandHandler(CommandHandler):
"""处理 /skills enable <name> 命令 - 启用技能"""
async def handle(self, ctx: CommandContext) -> CommandResult:
skill_name = ctx.args.strip()
if not skill_name:
return CommandResult(
success=False,
message="Usage: /skills enable <skill_name>\n请提供技能名称。"
)
try:
from backend.agents.skills_manager import SkillsManager
sm = SkillsManager()
result = sm.update_agent_skill_overrides(
ctx.config_name,
ctx.agent_id,
enable=[skill_name]
)
return CommandResult(
success=True,
message=f"✅ 技能已启用: {skill_name}\n\n已启用技能: {', '.join(result['enabled_skills'])}",
data=result
)
except Exception as e:
logger.error(f"Failed to enable skill: {e}")
return CommandResult(
success=False,
message=f"❌ 启用技能失败: {str(e)}"
)
class SkillsDisableCommandHandler(CommandHandler):
"""处理 /skills disable <name> 命令 - 禁用技能"""
async def handle(self, ctx: CommandContext) -> CommandResult:
skill_name = ctx.args.strip()
if not skill_name:
return CommandResult(
success=False,
message="Usage: /skills disable <skill_name>\n请提供技能名称。"
)
try:
from backend.agents.skills_manager import SkillsManager
sm = SkillsManager()
result = sm.update_agent_skill_overrides(
ctx.config_name,
ctx.agent_id,
disable=[skill_name]
)
return CommandResult(
success=True,
message=f"✅ 技能已禁用: {skill_name}\n\n已禁用技能: {', '.join(result['disabled_skills'])}",
data=result
)
except Exception as e:
logger.error(f"Failed to disable skill: {e}")
return CommandResult(
success=False,
message=f"❌ 禁用技能失败: {str(e)}"
)
class SkillsInstallCommandHandler(CommandHandler):
"""处理 /skills install <name> 命令 - 安装技能"""
async def handle(self, ctx: CommandContext) -> CommandResult:
skill_name = ctx.args.strip()
if not skill_name:
return CommandResult(
success=False,
message="Usage: /skills install <skill_name>\n请提供技能名称。"
)
try:
from backend.agents.skills_manager import SkillsManager
from backend.agents.skill_loader import load_skill_from_dir
sm = SkillsManager()
# 查找技能源目录
source_dir = self._resolve_skill_source(sm, skill_name)
if not source_dir:
return CommandResult(
success=False,
message=f"❌ 技能未找到: {skill_name}\n\n请检查技能名称是否正确,或技能是否存在于 builtin/customized 目录。"
)
# 加载并验证技能
skill_info = load_skill_from_dir(source_dir)
if not skill_info:
return CommandResult(
success=False,
message=f"❌ 技能加载失败: {skill_name}\n\n技能格式可能不正确。"
)
# 安装到agent的installed目录
installed_root = sm.get_agent_installed_root(ctx.config_name, ctx.agent_id)
target_dir = installed_root / skill_name
import shutil
if target_dir.exists():
shutil.rmtree(target_dir)
shutil.copytree(source_dir, target_dir)
return CommandResult(
success=True,
message=f"✅ 技能已安装: {skill_name}\n\n- 名称: {skill_info.get('name', skill_name)}\n- 版本: {skill_info.get('version', 'unknown')}\n- 路径: {target_dir}",
data={"skill_name": skill_name, "target_dir": str(target_dir)}
)
except Exception as e:
logger.error(f"Failed to install skill: {e}")
return CommandResult(
success=False,
message=f"❌ 安装技能失败: {str(e)}"
)
def _resolve_skill_source(self, sm: "SkillsManager", skill_name: str) -> Optional[Path]:
"""解析技能源目录"""
for root in [sm.customized_root, sm.builtin_root]:
candidate = root / skill_name
if candidate.exists() and (candidate / "SKILL.md").exists():
return candidate
return None
class ReloadCommandHandler(CommandHandler):
"""处理 /reload 命令 - 重新加载配置"""
async def handle(self, ctx: CommandContext) -> CommandResult:
try:
agent = ctx.agent
# 重新加载配置
if hasattr(agent, "reload_config"):
await agent.reload_config()
# 重新加载技能
from backend.agents.skills_manager import SkillsManager
sm = SkillsManager()
# 刷新技能同步
active_root = sm.get_agent_active_root(ctx.config_name, ctx.agent_id)
if active_root.exists():
# 清除缓存,强制重新加载
import shutil
for item in active_root.iterdir():
if item.is_dir():
shutil.rmtree(item)
return CommandResult(
success=True,
message="✅ 配置已重新加载\n\n- Agent配置已刷新\n- 技能缓存已清除\n- 请重启对话以应用所有更改",
data={"config_name": ctx.config_name, "agent_id": ctx.agent_id}
)
except Exception as e:
logger.error(f"Failed to reload config: {e}")
return CommandResult(
success=False,
message=f"❌ 重新加载失败: {str(e)}"
)
class StatusCommandHandler(CommandHandler):
"""处理 /status 命令 - 显示Agent状态"""
async def handle(self, ctx: CommandContext) -> CommandResult:
try:
agent = ctx.agent
lines = ["📊 Agent 状态", ""]
lines.append(f"🆔 Agent ID: {ctx.agent_id}")
lines.append(f"⚙️ Config: {ctx.config_name}")
# 模型信息
model = getattr(agent, "model", None)
if model:
lines.append(f"🤖 Model: {model}")
# 记忆状态
memory = getattr(agent, "memory", None)
if memory:
msg_count = len(getattr(memory, "content", []))
lines.append(f"💾 Memory: {msg_count} messages")
# 技能状态
from backend.agents.skills_manager import SkillsManager
sm = SkillsManager()
active_skills = sm.list_active_skill_metadata(ctx.config_name, ctx.agent_id)
lines.append(f"🔧 Active Skills: {len(active_skills)}")
# 工具组状态
toolkit = getattr(agent, "toolkit", None)
if toolkit:
groups = getattr(toolkit, "tool_groups", {})
active_groups = [name for name, g in groups.items() if getattr(g, "active", False)]
lines.append(f"🛠️ Active Tool Groups: {', '.join(active_groups) if active_groups else 'None'}")
return CommandResult(
success=True,
message="\n".join(lines),
data={
"agent_id": ctx.agent_id,
"config_name": ctx.config_name,
"active_skills_count": len(active_skills)
}
)
except Exception as e:
logger.error(f"Failed to get status: {e}")
return CommandResult(
success=False,
message=f"❌ 获取状态失败: {str(e)}"
)
class HelpCommandHandler(CommandHandler):
"""处理 /help 命令 - 显示帮助"""
async def handle(self, ctx: CommandContext) -> CommandResult:
help_text = """📖 EvoAgent 命令帮助
可用命令:
/save <message> - 保存内容到 MEMORY.md
/compact - 压缩记忆
/skills list - 列出已激活技能
/skills enable <name> - 启用技能
/skills disable <name>- 禁用技能
/skills install <name>- 安装技能
/reload - 重新加载配置
/status - 显示Agent状态
/help - 显示此帮助信息
提示:
• 所有命令以 / 开头
• 命令不区分大小写
• 使用 Tab 键可自动补全命令
"""
return CommandResult(success=True, message=help_text)
class AgentCommandDispatcher:
"""Agent命令分发器
参考CoPaw的CommandHandler设计为EvoAgent提供统一的命令处理入口。
"""
# 支持的系统命令
SYSTEM_COMMANDS = frozenset({
"save", "compact",
"skills", "reload",
"status", "help"
})
def __init__(self):
self._handlers: Dict[str, CommandHandler] = {}
self._subcommands: Dict[str, Dict[str, CommandHandler]] = {}
self._register_default_handlers()
def _register_default_handlers(self) -> None:
"""注册默认命令处理器"""
self._handlers["save"] = SaveCommandHandler()
self._handlers["compact"] = CompactCommandHandler()
self._handlers["reload"] = ReloadCommandHandler()
self._handlers["status"] = StatusCommandHandler()
self._handlers["help"] = HelpCommandHandler()
# 子命令: /skills list/enable/disable/install
self._subcommands["skills"] = {
"list": SkillsListCommandHandler(),
"enable": SkillsEnableCommandHandler(),
"disable": SkillsDisableCommandHandler(),
"install": SkillsInstallCommandHandler(),
}
def is_command(self, query: str | None) -> bool:
"""检查是否为命令
Args:
query: 用户输入字符串
Returns:
True 如果是系统命令
"""
if not isinstance(query, str) or not query.startswith("/"):
return False
parts = query.strip().lstrip("/").split()
if not parts:
return False
cmd = parts[0].lower()
# 检查主命令
if cmd in self.SYSTEM_COMMANDS:
return True
return False
async def handle(self, agent: "EvoAgent", query: str) -> CommandResult:
"""处理命令
Args:
agent: EvoAgent实例
query: 命令字符串
Returns:
命令执行结果
"""
if not self.is_command(query):
return CommandResult(
success=False,
message=f"未知命令: {query}\n使用 /help 查看可用命令。"
)
# 解析命令和参数
parts = query.strip().lstrip("/").split(maxsplit=1)
cmd = parts[0].lower()
args = parts[1] if len(parts) > 1 else ""
logger.info(f"Processing command: {cmd}, args: {args}")
# 处理子命令 (e.g., /skills list)
if cmd in self._subcommands:
sub_parts = args.split(maxsplit=1)
sub_cmd = sub_parts[0].lower() if sub_parts else ""
sub_args = sub_parts[1] if len(sub_parts) > 1 else ""
handlers = self._subcommands[cmd]
handler = handlers.get(sub_cmd)
if handler is None:
available = ", ".join(handlers.keys())
return CommandResult(
success=False,
message=f"未知子命令: {sub_cmd}\n可用子命令: {available}"
)
ctx = CommandContext(agent, query, sub_args)
return await handler.handle(ctx)
# 处理主命令
handler = self._handlers.get(cmd)
if handler is None:
return CommandResult(
success=False,
message=f"命令未实现: {cmd}"
)
ctx = CommandContext(agent, query, args)
return await handler.handle(ctx)
# 便捷函数
def create_command_dispatcher() -> AgentCommandDispatcher:
"""创建命令分发器实例"""
return AgentCommandDispatcher()

View File

@@ -0,0 +1,452 @@
# -*- coding: utf-8 -*-
"""Evaluation hooks system for skills.
Provides evaluation metric collection and storage for skill performance tracking.
Based on the evaluation hooks design in SKILL_TEMPLATE.md.
"""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass, field, asdict
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional, Set
logger = logging.getLogger(__name__)
class MetricType(Enum):
"""Types of evaluation metrics."""
HIT_RATE = "hit_rate" # 信号命中率
RISK_VIOLATION = "risk_violation" # 风控违例率
POSITION_DEVIATION = "position_deviation" # 仓位偏离率
PnL_ATTRIBUTION = "pnl_attribution" # P&L 归因一致性
SIGNAL_CONSISTENCY = "signal_consistency" # 信号一致性
DECISION_LATENCY = "decision_latency" # 决策延迟
TOOL_USAGE = "tool_usage" # 工具使用率
CUSTOM = "custom" # 自定义指标
@dataclass
class EvaluationMetric:
"""A single evaluation metric."""
name: str
metric_type: MetricType
value: float
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"name": self.name,
"metric_type": self.metric_type.value,
"value": self.value,
"timestamp": self.timestamp,
"metadata": self.metadata,
}
@dataclass
class EvaluationResult:
"""Evaluation result for a skill execution."""
skill_name: str
run_id: str
agent_id: str
metrics: List[EvaluationMetric] = field(default_factory=list)
inputs: Dict[str, Any] = field(default_factory=dict)
outputs: Dict[str, Any] = field(default_factory=dict)
decision: Optional[str] = None
success: bool = True
error_message: Optional[str] = None
started_at: Optional[str] = None
completed_at: Optional[str] = field(default_factory=lambda: datetime.now().isoformat())
def to_dict(self) -> Dict[str, Any]:
return {
"skill_name": self.skill_name,
"run_id": self.run_id,
"agent_id": self.agent_id,
"metrics": [m.to_dict() for m in self.metrics],
"inputs": self.inputs,
"outputs": self.outputs,
"decision": self.decision,
"success": self.success,
"error_message": self.error_message,
"started_at": self.started_at,
"completed_at": self.completed_at,
}
class EvaluationHook:
"""Hook for collecting skill evaluation metrics.
This hook collects and stores evaluation metrics after skill execution
for later analysis and memory/reflection stages.
"""
def __init__(
self,
storage_dir: Path,
run_id: str,
agent_id: str,
):
"""Initialize evaluation hook.
Args:
storage_dir: Directory to store evaluation results
run_id: Current run identifier
agent_id: Current agent identifier
"""
self.storage_dir = Path(storage_dir)
self.run_id = run_id
self.agent_id = agent_id
self._current_evaluation: Optional[EvaluationResult] = None
def start_evaluation(
self,
skill_name: str,
inputs: Dict[str, Any],
) -> None:
"""Start a new evaluation session.
Args:
skill_name: Name of the skill being evaluated
inputs: Input parameters for the skill
"""
self._current_evaluation = EvaluationResult(
skill_name=skill_name,
run_id=self.run_id,
agent_id=self.agent_id,
inputs=inputs,
started_at=datetime.now().isoformat(),
)
logger.debug(f"Started evaluation for skill: {skill_name}")
def add_metric(
self,
name: str,
metric_type: MetricType,
value: float,
metadata: Optional[Dict[str, Any]] = None,
) -> None:
"""Add an evaluation metric.
Args:
name: Metric name
metric_type: Type of metric
value: Metric value
metadata: Additional metadata
"""
if self._current_evaluation is None:
logger.warning("No active evaluation session, ignoring metric")
return
metric = EvaluationMetric(
name=name,
metric_type=metric_type,
value=value,
metadata=metadata or {},
)
self._current_evaluation.metrics.append(metric)
logger.debug(f"Added metric: {name} = {value}")
def add_metrics(self, metrics: List[EvaluationMetric]) -> None:
"""Add multiple evaluation metrics at once.
Args:
metrics: List of metrics to add
"""
if self._current_evaluation is None:
logger.warning("No active evaluation session, ignoring metrics")
return
self._current_evaluation.metrics.extend(metrics)
def record_outputs(self, outputs: Dict[str, Any]) -> None:
"""Record skill outputs.
Args:
outputs: Output from skill execution
"""
if self._current_evaluation is None:
logger.warning("No active evaluation session, ignoring outputs")
return
self._current_evaluation.outputs = outputs
def record_decision(self, decision: str) -> None:
"""Record the final decision.
Args:
decision: Final decision made by the skill
"""
if self._current_evaluation is None:
logger.warning("No active evaluation session, ignoring decision")
return
self._current_evaluation.decision = decision
def complete_evaluation(
self,
success: bool = True,
error_message: Optional[str] = None,
) -> Optional[EvaluationResult]:
"""Complete the evaluation session and persist results.
Args:
success: Whether the skill execution was successful
error_message: Error message if failed
Returns:
The completed evaluation result, or None if no active evaluation
"""
if self._current_evaluation is None:
logger.warning("No active evaluation to complete")
return None
self._current_evaluation.success = success
self._current_evaluation.error_message = error_message
self._current_evaluation.completed_at = datetime.now().isoformat()
# Persist to storage
result = self._persist_evaluation(self._current_evaluation)
self._current_evaluation = None
logger.debug(f"Completed evaluation for skill: {result.skill_name}")
return result
def _persist_evaluation(self, evaluation: EvaluationResult) -> EvaluationResult:
"""Persist evaluation result to storage.
Args:
evaluation: Evaluation result to persist
Returns:
The persisted evaluation
"""
# Create run-specific directory
run_dir = self.storage_dir / self.run_id
run_dir.mkdir(parents=True, exist_ok=True)
# Create agent-specific subdirectory
agent_dir = run_dir / self.agent_id
agent_dir.mkdir(parents=True, exist_ok=True)
# Generate filename with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
filename = f"{evaluation.skill_name}_{timestamp}.json"
filepath = agent_dir / filename
# Write evaluation result
try:
with open(filepath, "w", encoding="utf-8") as f:
json.dump(evaluation.to_dict(), f, ensure_ascii=False, indent=2)
logger.info(f"Persisted evaluation to: {filepath}")
except Exception as e:
logger.error(f"Failed to persist evaluation: {e}")
return evaluation
def cancel_evaluation(self) -> None:
"""Cancel the current evaluation session without saving."""
if self._current_evaluation is not None:
logger.debug(f"Cancelled evaluation for: {self._current_evaluation.skill_name}")
self._current_evaluation = None
class EvaluationCollector:
"""Collector for aggregating evaluation metrics across runs.
Provides methods to query and analyze evaluation results.
"""
def __init__(self, storage_dir: Path):
"""Initialize evaluation collector.
Args:
storage_dir: Root directory containing evaluation results
"""
self.storage_dir = Path(storage_dir)
def get_run_evaluations(
self,
run_id: str,
agent_id: Optional[str] = None,
) -> List[EvaluationResult]:
"""Get all evaluations for a run.
Args:
run_id: Run identifier
agent_id: Optional agent identifier to filter by
Returns:
List of evaluation results
"""
run_dir = self.storage_dir / run_id
if not run_dir.exists():
return []
evaluations = []
agent_dirs = [run_dir / agent_id] if agent_id else run_dir.iterdir()
for agent_dir in agent_dirs:
if not agent_dir.is_dir():
continue
for eval_file in agent_dir.glob("*.json"):
try:
with open(eval_file, "r", encoding="utf-8") as f:
data = json.load(f)
evaluations.append(self._parse_evaluation(data))
except Exception as e:
logger.warning(f"Failed to load evaluation {eval_file}: {e}")
return evaluations
def get_skill_metrics(
self,
skill_name: str,
run_ids: Optional[List[str]] = None,
) -> List[EvaluationMetric]:
"""Get all metrics for a specific skill.
Args:
skill_name: Name of the skill
run_ids: Optional list of run IDs to filter by
Returns:
List of metrics for the skill
"""
metrics = []
if run_ids is None:
run_ids = [d.name for d in self.storage_dir.iterdir() if d.is_dir()]
for run_id in run_ids:
evaluations = self.get_run_evaluations(run_id)
for eval_result in evaluations:
if eval_result.skill_name == skill_name:
metrics.extend(eval_result.metrics)
return metrics
def calculate_skill_stats(
self,
skill_name: str,
metric_type: MetricType,
run_ids: Optional[List[str]] = None,
) -> Dict[str, float]:
"""Calculate statistics for a specific metric type.
Args:
skill_name: Name of the skill
metric_type: Type of metric to calculate
run_ids: Optional list of run IDs to filter by
Returns:
Dictionary with min, max, avg, count statistics
"""
metrics = self.get_skill_metrics(skill_name, run_ids)
filtered = [m for m in metrics if m.metric_type == metric_type]
if not filtered:
return {"count": 0}
values = [m.value for m in filtered]
return {
"count": len(values),
"min": min(values),
"max": max(values),
"avg": sum(values) / len(values),
}
def _parse_evaluation(self, data: Dict[str, Any]) -> EvaluationResult:
"""Parse evaluation data into EvaluationResult.
Args:
data: Raw evaluation data
Returns:
Parsed EvaluationResult
"""
metrics = []
for m in data.get("metrics", []):
metrics.append(EvaluationMetric(
name=m["name"],
metric_type=MetricType(m["metric_type"]),
value=m["value"],
timestamp=m.get("timestamp", ""),
metadata=m.get("metadata", {}),
))
return EvaluationResult(
skill_name=data["skill_name"],
run_id=data["run_id"],
agent_id=data["agent_id"],
metrics=metrics,
inputs=data.get("inputs", {}),
outputs=data.get("outputs", {}),
decision=data.get("decision"),
success=data.get("success", True),
error_message=data.get("error_message"),
started_at=data.get("started_at"),
completed_at=data.get("completed_at"),
)
def parse_evaluation_hooks(skill_dir: Path) -> Dict[str, Any]:
"""Parse evaluation hooks from SKILL.md.
Extracts the Optional: Evaluation hooks section from skill documentation.
Args:
skill_dir: Skill directory path
Returns:
Dictionary containing evaluation hook definitions
"""
skill_md = skill_dir / "SKILL.md"
if not skill_md.exists():
return {}
try:
content = skill_md.read_text(encoding="utf-8")
# Extract evaluation hooks section
if "## Optional: Evaluation hooks" in content:
start = content.find("## Optional: Evaluation hooks")
# Find the next ## section or end of file
next_section = content.find("\n## ", start + 1)
if next_section == -1:
eval_section = content[start:]
else:
eval_section = content[start:next_section]
# Parse metrics from the section
metrics = []
for metric_type in MetricType:
if metric_type.value.replace("_", " ") in eval_section.lower():
metrics.append(metric_type.value)
return {
"supported_metrics": metrics,
"section_content": eval_section.strip(),
}
except Exception as e:
logger.warning(f"Failed to parse evaluation hooks: {e}")
return {}
__all__ = [
"MetricType",
"EvaluationMetric",
"EvaluationResult",
"EvaluationHook",
"EvaluationCollector",
"parse_evaluation_hooks",
]

View File

@@ -0,0 +1,510 @@
# -*- coding: utf-8 -*-
"""EvoAgent - Core agent implementation for 大时代.
This module provides the main EvoAgent class built on AgentScope's ReActAgent,
with integrated tools, skills, and memory management based on CoPaw design.
Key features:
- Workspace-driven configuration from Markdown files
- Dynamic skill loading from skills/active directories
- Tool-guard security interception
- Hook system for extensibility
- Runtime skill and prompt reloading
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Type, TYPE_CHECKING
from agentscope.agent import ReActAgent
from agentscope.memory import InMemoryMemory
from agentscope.message import Msg
from agentscope.tool import Toolkit
from .tool_guard import ToolGuardMixin
from .hooks import (
HookManager,
BootstrapHook,
MemoryCompactionHook,
WorkspaceWatchHook,
HOOK_PRE_REASONING,
)
from ..prompts.builder import (
PromptBuilder,
build_system_prompt_from_workspace,
)
from ..agent_workspace import load_agent_workspace_config
from ..skills_manager import SkillsManager
# Team infrastructure imports (graceful import - may not exist yet)
try:
from backend.agents.team.messenger import AgentMessenger
from backend.agents.team.task_delegator import TaskDelegator
TEAM_INFRA_AVAILABLE = True
except ImportError:
TEAM_INFRA_AVAILABLE = False
AgentMessenger = None
TaskDelegator = None
if TYPE_CHECKING:
from agentscope.formatter import FormatterBase
from agentscope.model import ModelWrapperBase
logger = logging.getLogger(__name__)
class EvoAgent(ToolGuardMixin, ReActAgent):
"""EvoAgent with integrated tools, skills, and memory management.
This agent extends ReActAgent with:
- Workspace-driven configuration from AGENTS.md/SOUL.md/PROFILE.md/etc.
- Dynamic skill loading from skills/active directories
- Tool-guard security interception (via ToolGuardMixin)
- Hook system for extensibility (bootstrap, memory compaction)
- Runtime skill and prompt reloading
MRO note
~~~~~~~~
``ToolGuardMixin`` overrides ``_acting`` and ``_reasoning`` via
Python's MRO: EvoAgent → ToolGuardMixin → ReActAgent.
Example:
agent = EvoAgent(
agent_id="fundamentals_analyst",
config_name="smoke_fullstack",
workspace_dir=Path("runs/smoke_fullstack/agents/fundamentals_analyst"),
model=model_instance,
formatter=formatter_instance,
)
"""
def __init__(
self,
agent_id: str,
config_name: str,
workspace_dir: Path,
model: "ModelWrapperBase",
formatter: "FormatterBase",
skills_manager: Optional[SkillsManager] = None,
sys_prompt: Optional[str] = None,
max_iters: int = 10,
memory: Optional[Any] = None,
enable_tool_guard: bool = True,
enable_bootstrap_hook: bool = True,
enable_memory_compaction: bool = False,
memory_manager: Optional[Any] = None,
memory_compact_threshold: Optional[int] = None,
env_context: Optional[str] = None,
prompt_files: Optional[List[str]] = None,
):
"""Initialize EvoAgent.
Args:
agent_id: Unique identifier for this agent
config_name: Run configuration name (e.g., "smoke_fullstack")
workspace_dir: Agent workspace directory containing markdown files
model: LLM model instance
formatter: Message formatter instance
skills_manager: Optional SkillsManager instance
sys_prompt: Optional override for system prompt
max_iters: Maximum reasoning-acting iterations
memory: Optional memory instance (defaults to InMemoryMemory)
enable_tool_guard: Enable tool-guard security interception
enable_bootstrap_hook: Enable bootstrap guidance on first interaction
enable_memory_compaction: Enable automatic memory compaction
memory_manager: Optional memory manager for compaction
memory_compact_threshold: Token threshold for memory compaction
env_context: Optional environment context to prepend to system prompt
prompt_files: List of markdown files to load (defaults to standard set)
"""
self.agent_id = agent_id
self.config_name = config_name
self.workspace_dir = Path(workspace_dir)
self._skills_manager = skills_manager or SkillsManager()
self._env_context = env_context
self._prompt_files = prompt_files
# Initialize tool guard
if enable_tool_guard:
self._init_tool_guard()
# Load agent configuration from workspace
self._agent_config = self._load_agent_config()
# Build or use provided system prompt
if sys_prompt is not None:
self._sys_prompt = sys_prompt
else:
self._sys_prompt = self._build_system_prompt()
# Create toolkit with skills
toolkit = self._create_toolkit()
# Initialize hook manager
self._hook_manager = HookManager()
# Initialize parent ReActAgent
super().__init__(
name=agent_id,
model=model,
sys_prompt=self._sys_prompt,
toolkit=toolkit,
memory=memory or InMemoryMemory(),
formatter=formatter,
max_iters=max_iters,
)
# Register hooks
self._register_hooks(
enable_bootstrap=enable_bootstrap_hook,
enable_memory_compaction=enable_memory_compaction,
memory_manager=memory_manager,
memory_compact_threshold=memory_compact_threshold,
)
# Initialize team infrastructure if available
self._messenger: Optional["AgentMessenger"] = None
self._task_delegator: Optional["TaskDelegator"] = None
if TEAM_INFRA_AVAILABLE:
self._init_team_infrastructure()
logger.info(
"EvoAgent initialized: %s (workspace: %s)",
agent_id,
workspace_dir,
)
def _load_agent_config(self) -> Dict[str, Any]:
"""Load agent configuration from workspace.
Returns:
Agent configuration dictionary
"""
config_path = self.workspace_dir / "agent.yaml"
if config_path.exists():
loaded = load_agent_workspace_config(config_path)
return dict(loaded.values)
return {}
def _build_system_prompt(self) -> str:
"""Build system prompt from workspace markdown files.
Uses PromptBuilder to load and combine AGENTS.md, SOUL.md,
PROFILE.md, and other configured files.
Returns:
Complete system prompt string
"""
prompt = build_system_prompt_from_workspace(
workspace_dir=self.workspace_dir,
enabled_files=self._prompt_files,
agent_id=self.agent_id,
extra_context=self._env_context,
)
return prompt
def _create_toolkit(self) -> Toolkit:
"""Create and populate toolkit with agent skills.
Loads skills from the agent's active skills directory and
registers them with the toolkit.
Returns:
Configured Toolkit instance
"""
toolkit = Toolkit(
agent_skill_instruction=(
"<system-info>You have access to specialized skills. "
"Each skill lives in a directory and is described by SKILL.md. "
"Follow the skill instructions when they are relevant to the current task."
"</system-info>"
),
agent_skill_template="- {name} (dir: {dir}): {description}",
)
# Register skills from active directory
active_skills_dir = self._skills_manager.get_agent_active_root(
self.config_name,
self.agent_id,
)
if active_skills_dir.exists():
for skill_dir in sorted(active_skills_dir.iterdir()):
if skill_dir.is_dir() and (skill_dir / "SKILL.md").exists():
try:
toolkit.register_agent_skill(str(skill_dir))
logger.debug("Registered skill: %s", skill_dir.name)
except Exception as e:
logger.error(
"Failed to register skill '%s': %s",
skill_dir.name,
e,
)
return toolkit
def _register_hooks(
self,
enable_bootstrap: bool,
enable_memory_compaction: bool,
memory_manager: Optional[Any],
memory_compact_threshold: Optional[int],
) -> None:
"""Register agent hooks.
Args:
enable_bootstrap: Enable bootstrap hook
enable_memory_compaction: Enable memory compaction hook
memory_manager: Memory manager instance
memory_compact_threshold: Token threshold for compaction
"""
# Bootstrap hook - checks BOOTSTRAP.md on first interaction
if enable_bootstrap:
bootstrap_hook = BootstrapHook(
workspace_dir=self.workspace_dir,
language="zh",
)
self._hook_manager.register(
hook_type=HOOK_PRE_REASONING,
hook_name="bootstrap",
hook=bootstrap_hook,
)
logger.debug("Registered bootstrap hook")
# Memory compaction hook
if enable_memory_compaction and memory_manager is not None:
compaction_hook = MemoryCompactionHook(
memory_manager=memory_manager,
memory_compact_threshold=memory_compact_threshold,
)
self._hook_manager.register(
hook_type=HOOK_PRE_REASONING,
hook_name="memory_compaction",
hook=compaction_hook,
)
logger.debug("Registered memory compaction hook")
# Workspace watch hook - auto-reload markdown files on change
workspace_watch_hook = WorkspaceWatchHook(
workspace_dir=self.workspace_dir,
)
self._hook_manager.register(
hook_type=HOOK_PRE_REASONING,
hook_name="workspace_watch",
hook=workspace_watch_hook,
)
logger.debug("Registered workspace watch hook")
async def _reasoning(self, **kwargs) -> Msg:
"""Override reasoning to execute pre-reasoning hooks.
Args:
**kwargs: Arguments for reasoning
Returns:
Response message
"""
# Execute pre-reasoning hooks
kwargs = await self._hook_manager.execute(
hook_type=HOOK_PRE_REASONING,
agent=self,
kwargs=kwargs,
)
# Call parent (which may be ToolGuardMixin's _reasoning)
return await super()._reasoning(**kwargs)
def reload_skills(self, active_skill_dirs: Optional[List[Path]] = None) -> None:
"""Reload skills at runtime.
Rebuilds the toolkit with current skills from the active directory.
Args:
active_skill_dirs: Optional list of specific skill directories to load
"""
logger.info("Reloading skills for agent: %s", self.agent_id)
# Create new toolkit
new_toolkit = Toolkit(
agent_skill_instruction=(
"<system-info>You have access to specialized skills. "
"Each skill lives in a directory and is described by SKILL.md. "
"Follow the skill instructions when they are relevant to the current task."
"</system-info>"
),
agent_skill_template="- {name} (dir: {dir}): {description}",
)
# Register skills
if active_skill_dirs is None:
active_skills_dir = self._skills_manager.get_agent_active_root(
self.config_name,
self.agent_id,
)
if active_skills_dir.exists():
active_skill_dirs = [
d for d in active_skills_dir.iterdir()
if d.is_dir() and (d / "SKILL.md").exists()
]
else:
active_skill_dirs = []
for skill_dir in active_skill_dirs:
if skill_dir.exists() and (skill_dir / "SKILL.md").exists():
try:
new_toolkit.register_agent_skill(str(skill_dir))
logger.debug("Reloaded skill: %s", skill_dir.name)
except Exception as e:
logger.error(
"Failed to reload skill '%s': %s",
skill_dir.name,
e,
)
# Replace toolkit
self.toolkit = new_toolkit
logger.info("Skills reloaded for agent: %s", self.agent_id)
def rebuild_sys_prompt(self) -> None:
"""Rebuild and replace the system prompt at runtime.
Useful after updating AGENTS.md, SOUL.md, PROFILE.md, etc.
to ensure the prompt reflects the latest configuration.
Updates both self._sys_prompt and the first system-role
message stored in self.memory.content.
"""
logger.info("Rebuilding system prompt for agent: %s", self.agent_id)
# Reload agent config in case it changed
self._agent_config = self._load_agent_config()
# Rebuild prompt
self._sys_prompt = self._build_system_prompt()
# Update memory if system message exists
if hasattr(self, "memory") and self.memory.content:
for msg, _marks in self.memory.content:
if getattr(msg, "role", None) == "system":
msg.content = self._sys_prompt
logger.debug("Updated system message in memory")
break
logger.info("System prompt rebuilt for agent: %s", self.agent_id)
async def reply(
self,
msg: Msg | List[Msg] | None = None,
structured_model: Optional[Type[Any]] = None,
) -> Msg:
"""Process a message and return a response.
Args:
msg: Input message(s) from user
structured_model: Optional pydantic model for structured output
Returns:
Response message
"""
# Handle list of messages
if isinstance(msg, list):
# Process each message in sequence
for m in msg[:-1]:
await self.memory.add(m)
msg = msg[-1] if msg else None
return await super().reply(msg=msg, structured_model=structured_model)
def get_agent_info(self) -> Dict[str, Any]:
"""Get agent information.
Returns:
Dictionary with agent metadata
"""
return {
"agent_id": self.agent_id,
"config_name": self.config_name,
"workspace_dir": str(self.workspace_dir),
"skills_count": len([
s for s in self._skills_manager.list_active_skill_metadata(
self.config_name,
self.agent_id,
)
]),
"registered_hooks": self._hook_manager.list_hooks(),
"team_infra_available": TEAM_INFRA_AVAILABLE,
}
def _init_team_infrastructure(self) -> None:
"""Initialize team infrastructure components (messenger and task delegator).
This method initializes the AgentMessenger for inter-agent communication
and the TaskDelegator for subagent delegation.
"""
if not TEAM_INFRA_AVAILABLE:
return
try:
self._messenger = AgentMessenger(agent_id=self.agent_id)
self._task_delegator = TaskDelegator(agent=self)
logger.debug(
"Team infrastructure initialized for agent: %s",
self.agent_id,
)
except Exception as e:
logger.warning(
"Failed to initialize team infrastructure for %s: %s",
self.agent_id,
e,
)
self._messenger = None
self._task_delegator = None
@property
def messenger(self) -> Optional["AgentMessenger"]:
"""Get the agent's messenger for inter-agent communication.
Returns:
AgentMessenger instance if available, None otherwise
"""
return self._messenger
async def delegate_task(
self,
task_type: str,
task_data: Dict[str, Any],
target_agent: Optional[str] = None,
) -> Dict[str, Any]:
"""Delegate a task to a subagent using the TaskDelegator.
Args:
task_type: Type of task to delegate
task_data: Data/payload for the task
target_agent: Optional specific agent ID to delegate to
Returns:
Dict containing the delegation result
"""
if not TEAM_INFRA_AVAILABLE or self._task_delegator is None:
return {
"success": False,
"error": "Team infrastructure not available",
}
try:
return await self._task_delegator.delegate_task(
task_type=task_type,
task_data=task_data,
target_agent=target_agent,
)
except Exception as e:
logger.error(
"Task delegation failed for %s: %s",
self.agent_id,
e,
)
return {"success": False, "error": str(e)}
__all__ = ["EvoAgent"]

View File

@@ -0,0 +1,613 @@
# -*- coding: utf-8 -*-
"""Hook system for EvoAgent.
Provides pre_reasoning and post_acting hooks with built-in implementations:
- BootstrapHook: First-time setup guidance
- MemoryCompactionHook: Automatic memory compression
Based on CoPaw's hooks design.
"""
from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING
if TYPE_CHECKING:
from agentscope.agent import ReActAgent
logger = logging.getLogger(__name__)
# Hook types
HookType = str
HOOK_PRE_REASONING: HookType = "pre_reasoning"
HOOK_POST_ACTING: HookType = "post_acting"
class Hook(ABC):
"""Abstract base class for agent hooks."""
@abstractmethod
async def __call__(
self,
agent: "ReActAgent",
kwargs: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
"""Execute the hook.
Args:
agent: The agent instance
kwargs: Input arguments to the method being hooked
Returns:
Modified kwargs or None to use original
"""
pass
class HookManager:
"""Manages agent hooks.
Provides registration and execution of hooks for different
lifecycle events in the agent's operation.
"""
def __init__(self):
self._hooks: Dict[HookType, List[tuple[str, Hook]]] = {
HOOK_PRE_REASONING: [],
HOOK_POST_ACTING: [],
}
def register(
self,
hook_type: HookType,
hook_name: str,
hook: Hook | Callable,
) -> None:
"""Register a hook.
Args:
hook_type: Type of hook (pre_reasoning, post_acting)
hook_name: Unique name for this hook
hook: Hook instance or callable
"""
# Remove existing hook with same name
self._hooks[hook_type] = [
(name, h) for name, h in self._hooks[hook_type] if name != hook_name
]
self._hooks[hook_type].append((hook_name, hook))
logger.debug("Registered hook '%s' for type '%s'", hook_name, hook_type)
def unregister(self, hook_type: HookType, hook_name: str) -> bool:
"""Unregister a hook.
Args:
hook_type: Type of hook
hook_name: Name of the hook to remove
Returns:
True if hook was found and removed
"""
original_len = len(self._hooks[hook_type])
self._hooks[hook_type] = [
(name, h) for name, h in self._hooks[hook_type] if name != hook_name
]
removed = len(self._hooks[hook_type]) < original_len
if removed:
logger.debug("Unregistered hook '%s' from type '%s'", hook_name, hook_type)
return removed
async def execute(
self,
hook_type: HookType,
agent: "ReActAgent",
kwargs: Dict[str, Any],
) -> Dict[str, Any]:
"""Execute all hooks of a given type.
Args:
hook_type: Type of hooks to execute
agent: The agent instance
kwargs: Input arguments
Returns:
Potentially modified kwargs
"""
for name, hook in self._hooks[hook_type]:
try:
result = await hook(agent, kwargs)
if result is not None:
kwargs = result
except Exception as e:
logger.error("Hook '%s' failed: %s", name, e, exc_info=True)
return kwargs
def list_hooks(self, hook_type: Optional[HookType] = None) -> List[str]:
"""List registered hook names.
Args:
hook_type: Optional type to filter by
Returns:
List of hook names
"""
if hook_type:
return [name for name, _ in self._hooks.get(hook_type, [])]
names = []
for hooks in self._hooks.values():
names.extend([name for name, _ in hooks])
return names
class BootstrapHook(Hook):
"""Hook for bootstrap guidance on first user interaction.
This hook looks for a BOOTSTRAP.md file in the working directory
and if found, prepends guidance to the first user message to help
establish the agent's identity and user preferences.
"""
def __init__(
self,
workspace_dir: Path,
language: str = "zh",
):
"""Initialize bootstrap hook.
Args:
workspace_dir: Working directory containing BOOTSTRAP.md
language: Language code for bootstrap guidance (en/zh)
"""
self.workspace_dir = Path(workspace_dir)
self.language = language
self._completed_flag = self.workspace_dir / ".bootstrap_completed"
def _is_first_user_interaction(self, agent: "ReActAgent") -> bool:
"""Check if this is the first user interaction.
Args:
agent: The agent instance
Returns:
True if first user interaction
"""
if not hasattr(agent, "memory") or not agent.memory.content:
return True
# Count user messages (excluding system)
user_count = sum(
1 for msg, _ in agent.memory.content if msg.role == "user"
)
return user_count <= 1
def _build_bootstrap_guidance(self) -> str:
"""Build bootstrap guidance message.
Returns:
Formatted bootstrap guidance
"""
if self.language == "zh":
return (
"# 引导模式\n"
"\n"
"工作目录中存在 `BOOTSTRAP.md` — 首次设置。\n"
"\n"
"1. 阅读 BOOTSTRAP.md友好地表示初次见面"
"引导用户完成设置。\n"
"2. 按照 BOOTSTRAP.md 的指示,"
"帮助用户定义你的身份和偏好。\n"
"3. 按指南创建/更新必要文件"
"PROFILE.md、MEMORY.md 等)。\n"
"4. 完成后删除 BOOTSTRAP.md。\n"
"\n"
"如果用户希望跳过,直接回答下面的问题即可。\n"
"\n"
"---\n"
"\n"
)
return (
"# BOOTSTRAP MODE\n"
"\n"
"`BOOTSTRAP.md` exists — first-time setup.\n"
"\n"
"1. Read BOOTSTRAP.md, greet the user, "
"and guide them through setup.\n"
"2. Follow BOOTSTRAP.md instructions "
"to define identity and preferences.\n"
"3. Create/update files "
"(PROFILE.md, MEMORY.md, etc.) as described.\n"
"4. Delete BOOTSTRAP.md when done.\n"
"\n"
"If the user wants to skip, answer their "
"question directly instead.\n"
"\n"
"---\n"
"\n"
)
async def __call__(
self,
agent: "ReActAgent",
kwargs: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
"""Check and load BOOTSTRAP.md on first user interaction.
Args:
agent: The agent instance
kwargs: Input arguments to the _reasoning method
Returns:
None (hook doesn't modify kwargs)
"""
try:
bootstrap_path = self.workspace_dir / "BOOTSTRAP.md"
# Check if bootstrap has already been triggered
if self._completed_flag.exists():
return None
if not bootstrap_path.exists():
return None
if not self._is_first_user_interaction(agent):
return None
bootstrap_guidance = self._build_bootstrap_guidance()
logger.debug("Found BOOTSTRAP.md [%s], prepending guidance", self.language)
# Prepend to first user message in memory
if hasattr(agent, "memory") and agent.memory.content:
system_count = sum(
1 for msg, _ in agent.memory.content if msg.role == "system"
)
for msg, _ in agent.memory.content[system_count:]:
if msg.role == "user":
# Prepend guidance to message content
original_content = msg.content
msg.content = bootstrap_guidance + original_content
break
logger.debug("Bootstrap guidance prepended to first user message")
# Create completion flag to prevent repeated triggering
self._completed_flag.touch()
logger.debug("Created bootstrap completion flag")
except Exception as e:
logger.error("Failed to process bootstrap: %s", e, exc_info=True)
return None
class WorkspaceWatchHook(Hook):
"""Hook for auto-reloading workspace markdown files on change.
Monitors SOUL.md, AGENTS.md, PROFILE.md, etc. and triggers
a prompt rebuild when any of them change. Based on CoPaw's
AgentConfigWatcher approach but for markdown files.
"""
# Files to monitor (same as PromptBuilder.DEFAULT_FILES)
WATCHED_FILES = frozenset([
"SOUL.md", "AGENTS.md", "PROFILE.md",
"POLICY.md", "MEMORY.md",
"BOOTSTRAP.md",
])
def __init__(
self,
workspace_dir: Path,
poll_interval: float = 2.0,
):
"""Initialize workspace watch hook.
Args:
workspace_dir: Workspace directory to monitor
poll_interval: How often to check for changes (seconds)
"""
self.workspace_dir = Path(workspace_dir)
self.poll_interval = poll_interval
self._last_mtimes: dict[str, float] = {}
self._initialized = False
def _scan_mtimes(self) -> dict[str, float]:
"""Scan watched files and return their current mtimes."""
mtimes = {}
for name in self.WATCHED_FILES:
path = self.workspace_dir / name
if path.exists():
mtimes[name] = path.stat().st_mtime
return mtimes
def _has_changes(self) -> bool:
"""Check if any watched file has changed since last check."""
current = self._scan_mtimes()
if not self._initialized:
self._last_mtimes = current
self._initialized = True
return False
# Check for new, modified, or deleted files
if set(current.keys()) != set(self._last_mtimes.keys()):
self._last_mtimes = current
return True
for name, mtime in current.items():
if mtime != self._last_mtimes.get(name):
self._last_mtimes = current
return True
return False
async def __call__(
self,
agent: "ReActAgent",
kwargs: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
"""Check for file changes and rebuild prompt if needed.
Args:
agent: The agent instance
kwargs: Input arguments (unused)
Returns:
None
"""
try:
if self._has_changes():
logger.info(
"Workspace files changed, triggering prompt rebuild for: %s",
getattr(agent, "agent_id", "unknown"),
)
if hasattr(agent, "rebuild_sys_prompt"):
agent.rebuild_sys_prompt()
else:
logger.warning(
"Agent %s has no rebuild_sys_prompt method",
getattr(agent, "agent_id", "unknown"),
)
except Exception as e:
logger.error("Workspace watch hook failed: %s", e, exc_info=True)
return None
class MemoryCompactionHook(Hook):
"""Hook for automatic memory compaction when context is full.
This hook monitors the token count of messages and triggers compaction
when it exceeds the threshold. It preserves the system prompt and recent
messages while summarizing older conversation history.
Based on CoPaw's memory compaction design with additional improvements:
- memory_compact_ratio: Ratio to compact when threshold reached
- memory_reserve_ratio: Always keep a reserve of tokens for recent messages
- enable_tool_result_compact: Compact tool results separately
- tool_result_compact_keep_n: Number of tool results to keep
"""
def __init__(
self,
memory_manager: Any,
memory_compact_threshold: Optional[int] = None,
memory_compact_ratio: float = 0.75,
memory_reserve_ratio: float = 0.1,
enable_tool_result_compact: bool = False,
tool_result_compact_keep_n: int = 5,
):
"""Initialize memory compaction hook.
Args:
memory_manager: Memory manager instance for compaction
memory_compact_threshold: Token threshold for compaction
memory_compact_ratio: Target ratio to compact to (e.g., 0.75 = compact to 75%)
memory_reserve_ratio: Reserve ratio to always keep free (e.g., 0.1 = 10%)
enable_tool_result_compact: Enable tool result compaction
tool_result_compact_keep_n: Number of tool results to keep
"""
self.memory_manager = memory_manager
self.memory_compact_threshold = memory_compact_threshold
self.memory_compact_ratio = memory_compact_ratio
self.memory_reserve_ratio = memory_reserve_ratio
self.enable_tool_result_compact = enable_tool_result_compact
self.tool_result_compact_keep_n = tool_result_compact_keep_n
async def __call__(
self,
agent: "ReActAgent",
kwargs: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
"""Pre-reasoning hook to check and compact memory if needed.
Args:
agent: The agent instance
kwargs: Input arguments to the _reasoning method
Returns:
None (hook doesn't modify kwargs)
"""
try:
if not hasattr(agent, "memory") or not self.memory_manager:
return None
memory = agent.memory
# Get current token count estimate
messages = await memory.get_memory()
total_tokens = self._estimate_tokens(messages)
if self.memory_compact_threshold is None:
return None
if total_tokens < self.memory_compact_threshold:
return None
logger.info(
"Memory compaction triggered: %d tokens (threshold: %d)",
total_tokens,
self.memory_compact_threshold,
)
# Compact memory
await self._compact_memory(agent, messages)
except Exception as e:
logger.error("Failed to compact memory: %s", e, exc_info=True)
return None
def _estimate_tokens(self, messages: List[Any]) -> int:
"""Estimate token count for messages.
Args:
messages: List of messages
Returns:
Estimated token count
"""
# Simple estimation: ~4 chars per token
total_chars = sum(
len(str(getattr(msg, "content", "")))
for msg in messages
)
return total_chars // 4
async def _compact_memory(
self,
agent: "ReActAgent",
messages: List[Any],
) -> None:
"""Compact memory by summarizing older messages.
Uses CoPaw-style memory management:
- memory_compact_ratio: Target ratio to compact to (e.g., 0.75 means compact to 75%)
- memory_reserve_ratio: Always keep this ratio free (e.g., 0.1 means keep 10% for recent)
Args:
agent: The agent instance
messages: Current messages in memory
"""
if self.memory_compact_threshold is None:
return
# Estimate total tokens
total_tokens = self._estimate_tokens(messages)
# Calculate reserve based on ratio (CoPaw-style)
reserve_tokens = int(total_tokens * self.memory_reserve_ratio)
# Calculate target tokens after compaction
target_tokens = int(total_tokens * self.memory_compact_ratio)
target_tokens = max(target_tokens, total_tokens - reserve_tokens)
# Find messages to compact (older ones)
# Keep recent messages that fit within target
messages_to_compact = []
kept_tokens = 0
# Start from oldest, stop when we've kept enough
for msg in messages:
msg_tokens = self._estimate_tokens([msg])
if kept_tokens + msg_tokens > target_tokens:
messages_to_compact.append(msg)
else:
kept_tokens += msg_tokens
if not messages_to_compact:
return
logger.info(
"Compacting %d messages (%d tokens) to target %d tokens",
len(messages_to_compact),
self._estimate_tokens(messages_to_compact),
target_tokens,
)
# Use memory manager to compact if available
if hasattr(self.memory_manager, "compact_memory"):
try:
summary = await self.memory_manager.compact_memory(
messages=messages_to_compact,
)
logger.info(
"Memory compacted: %d messages summarized, summary: %s",
len(messages_to_compact),
summary[:200] if summary else "N/A",
)
# Mark messages as compressed if supported
if hasattr(agent.memory, "update_messages_mark"):
from agentscope.agent._react_agent import _MemoryMark
await agent.memory.update_messages_mark(
new_mark=_MemoryMark.COMPRESSED,
msg_ids=[msg.id for msg in messages_to_compact],
)
except Exception as e:
logger.error("Memory manager compaction failed: %s", e)
# Tool result compaction (CoPaw-style)
if self.enable_tool_result_compact:
await self._compact_tool_results(agent, messages)
async def _compact_tool_results(
self,
agent: "ReActAgent",
messages: List[Any],
) -> None:
"""Compact tool results by keeping only recent ones.
Based on CoPaw's tool_result_compact_keep_n pattern.
Tool results can be very verbose, so we keep only the N most recent ones.
Args:
agent: The agent instance
messages: Current messages in memory
"""
if not hasattr(agent.memory, "content"):
return
# Find tool result messages (usually have "tool" role or tool_related content)
tool_results = []
for msg, _ in agent.memory.content:
if hasattr(msg, "role") and msg.role == "tool":
tool_results.append(msg)
if len(tool_results) <= self.tool_result_compact_keep_n:
return
# Keep only the most recent N tool results
excess_results = tool_results[:-self.tool_result_compact_keep_n]
logger.info(
"Tool result compaction: %d tool results found, keeping %d, compacting %d",
len(tool_results),
self.tool_result_compact_keep_n,
len(excess_results),
)
# Mark excess tool results as compressed if supported
if hasattr(agent.memory, "update_messages_mark"):
from agentscope.agent._react_agent import _MemoryMark
await agent.memory.update_messages_mark(
new_mark=_MemoryMark.COMPRESSED,
msg_ids=[msg.id for msg in excess_results],
)
__all__ = [
"Hook",
"HookManager",
"HookType",
"HOOK_PRE_REASONING",
"HOOK_POST_ACTING",
"BootstrapHook",
"MemoryCompactionHook",
"WorkspaceWatchHook",
]

View File

@@ -0,0 +1,489 @@
# -*- coding: utf-8 -*-
"""Skill adaptation hook for automatic evaluation-to-iteration闭环.
Monitors evaluation metrics against configurable thresholds and triggers
automatic skill reload or logs warnings when thresholds are breached.
"""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional, Set
from .evaluation_hook import (
EvaluationCollector,
EvaluationResult,
MetricType,
)
logger = logging.getLogger(__name__)
class AdaptationAction(Enum):
"""Actions to take when threshold is breached."""
RELOAD = "reload" # 自动重新加载技能
WARN = "warn" # 记录警告供人工审核
BOTH = "both" # 同时执行重载和警告
NONE = "none" # 不做任何操作
@dataclass
class AdaptationThreshold:
"""Threshold configuration for a metric."""
metric_type: MetricType
operator: str = "lt" # lt (less than), gt (greater than), lte, gte, eq
value: float = 0.0
window_size: int = 10 # 移动窗口大小,用于计算滑动平均
min_samples: int = 5 # 最少样本数才触发检查
action: AdaptationAction = AdaptationAction.WARN
cooldown_seconds: int = 300 # 触发后的冷却时间
def evaluate(self, current_value: float) -> bool:
"""Evaluate if threshold is breached."""
ops = {
"lt": lambda x, y: x < y,
"lte": lambda x, y: x <= y,
"gt": lambda x, y: x > y,
"gte": lambda x, y: x >= y,
"eq": lambda x, y: x == y,
}
op_func = ops.get(self.operator)
if op_func is None:
logger.warning(f"Unknown operator: {self.operator}")
return False
return op_func(current_value, self.value)
def to_dict(self) -> Dict[str, Any]:
return {
"metric_type": self.metric_type.value,
"operator": self.operator,
"value": self.value,
"window_size": self.window_size,
"min_samples": self.min_samples,
"action": self.action.value,
"cooldown_seconds": self.cooldown_seconds,
}
@dataclass
class AdaptationEvent:
"""Record of an adaptation trigger event."""
timestamp: str
skill_name: str
metric_type: MetricType
threshold: AdaptationThreshold
current_value: float
avg_value: float
action_taken: AdaptationAction
details: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"timestamp": self.timestamp,
"skill_name": self.skill_name,
"metric_type": self.metric_type.value,
"threshold": self.threshold.to_dict(),
"current_value": self.current_value,
"avg_value": self.avg_value,
"action_taken": self.action_taken.value,
"details": self.details,
}
class SkillAdaptationHook:
"""Hook for monitoring evaluation metrics and triggering skill adaptation.
This hook wraps EvaluationHook to add threshold-based adaptation logic.
When metrics breach configured thresholds, it can:
- Automatically reload skills via SkillsManager
- Log warnings for human review
- Both
"""
# Default thresholds for common metrics
DEFAULT_THRESHOLDS: List[AdaptationThreshold] = [
AdaptationThreshold(
metric_type=MetricType.HIT_RATE,
operator="lt",
value=0.5,
action=AdaptationAction.WARN,
cooldown_seconds=600,
),
AdaptationThreshold(
metric_type=MetricType.RISK_VIOLATION,
operator="gt",
value=0.1,
action=AdaptationAction.WARN,
cooldown_seconds=300,
),
AdaptationThreshold(
metric_type=MetricType.DECISION_LATENCY,
operator="gt",
value=5000, # 5 seconds
action=AdaptationAction.WARN,
cooldown_seconds=300,
),
]
def __init__(
self,
storage_dir: Path,
run_id: str,
agent_id: str,
thresholds: Optional[List[AdaptationThreshold]] = None,
collector: Optional[EvaluationCollector] = None,
):
"""Initialize skill adaptation hook.
Args:
storage_dir: Directory to store adaptation events
run_id: Current run identifier
agent_id: Current agent identifier
thresholds: Custom threshold configurations (uses defaults if None)
collector: Optional EvaluationCollector for historical data
"""
self.storage_dir = Path(storage_dir)
self.run_id = run_id
self.agent_id = agent_id
self.thresholds = thresholds or self.DEFAULT_THRESHOLDS
self.collector = collector or EvaluationCollector(storage_dir)
# Track cooldowns to prevent rapid re-triggering
self._cooldowns: Dict[str, datetime] = {}
# Store recent metrics in memory for quick access
self._recent_metrics: Dict[str, List[float]] = {}
# Pending adaptation events
self._pending_events: List[AdaptationEvent] = []
def check_threshold(
self,
skill_name: str,
metric_type: MetricType,
current_value: float,
) -> Optional[AdaptationEvent]:
"""Check if a metric breaches any threshold.
Args:
skill_name: Name of the skill
metric_type: Type of metric
current_value: Current metric value
Returns:
AdaptationEvent if threshold breached, None otherwise
"""
# Find applicable thresholds
applicable_thresholds = [
t for t in self.thresholds
if t.metric_type == metric_type
]
if not applicable_thresholds:
return None
# Check cooldown
cooldown_key = f"{skill_name}:{metric_type.value}"
now = datetime.now()
last_trigger = self._cooldowns.get(cooldown_key)
# Store current value first for avg calculation
self._store_metric(cooldown_key, current_value)
for threshold in applicable_thresholds:
if last_trigger:
elapsed = (now - last_trigger).total_seconds()
if elapsed < threshold.cooldown_seconds:
continue
# Evaluate threshold
if threshold.evaluate(current_value):
# Calculate moving average
avg_value = self._calculate_avg(skill_name, metric_type, current_value)
# Check minimum samples (allow immediate trigger if min_samples <= 1)
sample_count = len(self._recent_metrics.get(cooldown_key, []))
if threshold.min_samples > 1 and sample_count < threshold.min_samples:
# Not enough samples yet
continue
# Trigger adaptation
event = AdaptationEvent(
timestamp=now.isoformat(),
skill_name=skill_name,
metric_type=metric_type,
threshold=threshold,
current_value=current_value,
avg_value=avg_value,
action_taken=threshold.action,
details={
"run_id": self.run_id,
"agent_id": self.agent_id,
},
)
# Update cooldown
self._cooldowns[cooldown_key] = now
# Persist event
self._persist_event(event)
logger.info(
f"Threshold breached for {skill_name}.{metric_type.value}: "
f"current={current_value}, avg={avg_value}, action={threshold.action.value}"
)
return event
return None
def _calculate_avg(
self,
skill_name: str,
metric_type: MetricType,
current_value: float,
) -> float:
"""Calculate moving average for a metric."""
key = f"{skill_name}:{metric_type.value}"
values = self._recent_metrics.get(key, [])
if not values:
return current_value
return sum(values) / len(values)
def _store_metric(self, key: str, value: float) -> None:
"""Store metric value with sliding window."""
if key not in self._recent_metrics:
self._recent_metrics[key] = []
self._recent_metrics[key].append(value)
# Keep only last 100 values
if len(self._recent_metrics[key]) > 100:
self._recent_metrics[key] = self._recent_metrics[key][-100:]
def _persist_event(self, event: AdaptationEvent) -> None:
"""Persist adaptation event to storage."""
run_dir = self.storage_dir / self.run_id / "adaptations"
run_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
filename = f"{event.skill_name}_{event.metric_type.value}_{timestamp}.json"
filepath = run_dir / filename
try:
with open(filepath, "w", encoding="utf-8") as f:
json.dump(event.to_dict(), f, ensure_ascii=False, indent=2)
logger.debug(f"Persisted adaptation event to: {filepath}")
except Exception as e:
logger.error(f"Failed to persist adaptation event: {e}")
# Also add to pending list
self._pending_events.append(event)
def get_pending_warnings(self) -> List[AdaptationEvent]:
"""Get all pending warning events that need human review."""
return [
e for e in self._pending_events
if e.action_taken in (AdaptationAction.WARN, AdaptationAction.BOTH)
]
def clear_pending_warnings(self) -> None:
"""Clear pending warnings after they have been reviewed."""
self._pending_events = [
e for e in self._pending_events
if e.action_taken == AdaptationAction.RELOAD
]
def get_recent_events(
self,
skill_name: Optional[str] = None,
metric_type: Optional[MetricType] = None,
limit: int = 50,
) -> List[AdaptationEvent]:
"""Get recent adaptation events.
Args:
skill_name: Optional filter by skill name
metric_type: Optional filter by metric type
limit: Maximum number of events to return
Returns:
List of recent adaptation events
"""
events_dir = self.storage_dir / self.run_id / "adaptations"
if not events_dir.exists():
return []
events = []
for eval_file in sorted(events_dir.glob("*.json"), reverse=True)[:limit]:
try:
with open(eval_file, "r", encoding="utf-8") as f:
data = json.load(f)
event = self._parse_event(data)
if skill_name and event.skill_name != skill_name:
continue
if metric_type and event.metric_type != metric_type:
continue
events.append(event)
except Exception as e:
logger.warning(f"Failed to load adaptation event {eval_file}: {e}")
return events
def _parse_event(self, data: Dict[str, Any]) -> AdaptationEvent:
"""Parse adaptation event from JSON data."""
threshold_data = data.get("threshold", {})
metric_type = MetricType(threshold_data.get("metric_type", "custom"))
threshold = AdaptationThreshold(
metric_type=metric_type,
operator=threshold_data.get("operator", "lt"),
value=threshold_data.get("value", 0.0),
window_size=threshold_data.get("window_size", 10),
min_samples=threshold_data.get("min_samples", 5),
action=AdaptationAction(threshold_data.get("action", "warn")),
cooldown_seconds=threshold_data.get("cooldown_seconds", 300),
)
return AdaptationEvent(
timestamp=data.get("timestamp", ""),
skill_name=data.get("skill_name", ""),
metric_type=metric_type,
threshold=threshold,
current_value=data.get("current_value", 0.0),
avg_value=data.get("avg_value", 0.0),
action_taken=AdaptationAction(data.get("action_taken", "warn")),
details=data.get("details", {}),
)
def add_threshold(self, threshold: AdaptationThreshold) -> None:
"""Add a new threshold configuration."""
self.thresholds.append(threshold)
def remove_threshold(self, metric_type: MetricType) -> None:
"""Remove all thresholds for a specific metric type."""
self.thresholds = [
t for t in self.thresholds
if t.metric_type != metric_type
]
def update_threshold(
self,
metric_type: MetricType,
**kwargs,
) -> None:
"""Update threshold configuration for a metric type."""
for threshold in self.thresholds:
if threshold.metric_type == metric_type:
for key, value in kwargs.items():
if hasattr(threshold, key):
setattr(threshold, key, value)
def get_thresholds(self) -> List[AdaptationThreshold]:
"""Get current threshold configurations."""
return list(self.thresholds)
def is_in_cooldown(self, skill_name: str, metric_type: MetricType) -> bool:
"""Check if a skill/metric combination is in cooldown period."""
key = f"{skill_name}:{metric_type.value}"
last_trigger = self._cooldowns.get(key)
if not last_trigger:
return False
# Find the threshold for this metric type
for threshold in self.thresholds:
if threshold.metric_type == metric_type:
elapsed = (datetime.now() - last_trigger).total_seconds()
return elapsed < threshold.cooldown_seconds
return False
class AdaptationManager:
"""Manager for coordinating skill adaptation across multiple agents.
Provides centralized tracking of adaptation events and skill reloads.
"""
def __init__(self, storage_dir: Path):
"""Initialize adaptation manager.
Args:
storage_dir: Root directory for storing adaptation data
"""
self.storage_dir = Path(storage_dir)
self._hooks: Dict[str, SkillAdaptationHook] = {}
def get_hook(
self,
run_id: str,
agent_id: str,
thresholds: Optional[List[AdaptationThreshold]] = None,
) -> SkillAdaptationHook:
"""Get or create an adaptation hook for an agent.
Args:
run_id: Run identifier
agent_id: Agent identifier
thresholds: Optional custom thresholds
Returns:
SkillAdaptationHook instance
"""
key = f"{run_id}:{agent_id}"
if key not in self._hooks:
self._hooks[key] = SkillAdaptationHook(
storage_dir=self.storage_dir,
run_id=run_id,
agent_id=agent_id,
thresholds=thresholds,
)
return self._hooks[key]
def get_all_pending_warnings(self) -> List[AdaptationEvent]:
"""Get all pending warnings from all hooks."""
warnings = []
for hook in self._hooks.values():
warnings.extend(hook.get_pending_warnings())
return warnings
def get_run_adaptations(self, run_id: str) -> List[AdaptationEvent]:
"""Get all adaptation events for a run."""
events = []
for hook in self._hooks.values():
if hook.run_id == run_id:
events.extend(hook.get_recent_events())
return events
# Global manager instance
_adaptation_manager: Optional[AdaptationManager] = None
def get_adaptation_manager(storage_dir: Optional[Path] = None) -> AdaptationManager:
"""Get global adaptation manager instance.
Args:
storage_dir: Optional storage directory (required on first call)
Returns:
AdaptationManager instance
"""
global _adaptation_manager
if _adaptation_manager is None:
if storage_dir is None:
raise ValueError("storage_dir required on first initialization")
_adaptation_manager = AdaptationManager(storage_dir)
return _adaptation_manager
__all__ = [
"AdaptationAction",
"AdaptationThreshold",
"AdaptationEvent",
"SkillAdaptationHook",
"AdaptationManager",
"get_adaptation_manager",
]

View File

@@ -0,0 +1,684 @@
# -*- coding: utf-8 -*-
"""ToolGuardMixin - Security interception for dangerous tool calls.
Provides ``_acting`` and ``_reasoning`` overrides that intercept
sensitive tool calls before execution, implementing the deny /
guard / approve flow.
Based on CoPaw's tool_guard_mixin.py design.
"""
from __future__ import annotations
import asyncio
import json
import logging
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Callable, Dict, Iterable, List, Optional, Set
from agentscope.message import Msg
from backend.runtime.manager import get_global_runtime_manager
logger = logging.getLogger(__name__)
class SeverityLevel(str, Enum):
"""Risk severity level."""
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CRITICAL = "critical"
class ApprovalStatus(str, Enum):
"""Approval lifecycle state."""
PENDING = "pending"
APPROVED = "approved"
DENIED = "denied"
EXPIRED = "expired"
class ToolFindingRecord:
"""Internal representation of a guard finding."""
def __init__(self, severity: SeverityLevel, message: str, field: Optional[str] = None) -> None:
self.severity = severity
self.message = message
self.field = field
def to_dict(self) -> Dict[str, Any]:
return {
"severity": self.severity.value,
"message": self.message,
"field": self.field,
}
class ApprovalRecord:
"""Stores the state of an approval request."""
def __init__(
self,
approval_id: str,
tool_name: str,
tool_input: Dict[str, Any],
agent_id: str,
workspace_id: str,
session_id: Optional[str] = None,
findings: Optional[List[ToolFindingRecord]] = None,
) -> None:
self.approval_id = approval_id
self.tool_name = tool_name
self.tool_input = tool_input
self.agent_id = agent_id
self.workspace_id = workspace_id
self.session_id = session_id
self.status = ApprovalStatus.PENDING
self.findings = findings or []
self.created_at = datetime.utcnow()
self.resolved_at: Optional[datetime] = None
self.resolved_by: Optional[str] = None
self.metadata: Dict[str, Any] = {}
self.pending_request: "ToolApprovalRequest" | None = None
def to_dict(self) -> Dict[str, Any]:
return {
"approval_id": self.approval_id,
"status": self.status.value,
"tool_name": self.tool_name,
"tool_input": self.tool_input,
"agent_id": self.agent_id,
"workspace_id": self.workspace_id,
"session_id": self.session_id,
"findings": [f.to_dict() for f in self.findings],
"created_at": self.created_at.isoformat(),
"resolved_at": self.resolved_at.isoformat() if self.resolved_at else None,
"resolved_by": self.resolved_by,
}
class ToolGuardStore:
"""Simple in-memory approval store for development/testing."""
def __init__(self) -> None:
self._records: Dict[str, ApprovalRecord] = {}
self._counter = 0
def next_id(self) -> str:
self._counter += 1
return f"approval_{self._counter:06d}"
def list(
self,
status: ApprovalStatus | None = None,
workspace_id: Optional[str] = None,
agent_id: Optional[str] = None,
) -> Iterable[ApprovalRecord]:
for record in self._records.values():
if status and record.status != status:
continue
if workspace_id and record.workspace_id != workspace_id:
continue
if agent_id and record.agent_id != agent_id:
continue
yield record
def get(self, approval_id: str) -> Optional[ApprovalRecord]:
return self._records.get(approval_id)
def create_pending(
self,
tool_name: str,
tool_input: Dict[str, Any],
agent_id: str,
workspace_id: str,
session_id: Optional[str] = None,
findings: Optional[List[ToolFindingRecord]] = None,
) -> ApprovalRecord:
record = ApprovalRecord(
approval_id=self.next_id(),
tool_name=tool_name,
tool_input=tool_input,
agent_id=agent_id,
workspace_id=workspace_id,
session_id=session_id,
findings=findings,
)
self._records[record.approval_id] = record
return record
def set_status(
self,
approval_id: str,
status: ApprovalStatus,
resolved_by: Optional[str] = None,
notify_request: bool = True,
) -> ApprovalRecord:
record = self._records[approval_id]
if record.status == status:
return record
record.status = status
record.resolved_at = datetime.utcnow()
record.resolved_by = resolved_by
if notify_request and record.pending_request:
if status == ApprovalStatus.APPROVED:
record.pending_request.approve()
elif status == ApprovalStatus.DENIED:
record.pending_request.deny()
return record
def cancel(self, approval_id: str) -> None:
self._records.pop(approval_id, None)
TOOL_GUARD_STORE = ToolGuardStore()
def get_tool_guard_store() -> ToolGuardStore:
return TOOL_GUARD_STORE
# Default tools that require approval
DEFAULT_GUARDED_TOOLS: Set[str] = {
"execute_shell_command",
"write_file",
"edit_file",
"place_order",
"modify_position",
"delete_file",
}
# Default denied tools (cannot be approved)
DEFAULT_DENIED_TOOLS: Set[str] = {
"execute_shell_command", # Shell execution is dangerous
}
# Mark for tool guard denied messages
TOOL_GUARD_DENIED_MARK = "tool_guard_denied"
def default_findings_for_tool(tool_name: str) -> List[ToolFindingRecord]:
findings: List[ToolFindingRecord] = []
if tool_name in {"execute_trade", "modify_portfolio"}:
findings.append(
ToolFindingRecord(
severity=SeverityLevel.HIGH,
message=f"Tool '{tool_name}' touches portfolio state",
)
)
return findings
class ToolApprovalRequest:
"""Represents a pending tool approval request."""
def __init__(
self,
approval_id: str,
tool_name: str,
tool_input: Dict[str, Any],
tool_call_id: str,
session_id: Optional[str] = None,
):
self.approval_id = approval_id
self.tool_name = tool_name
self.tool_input = tool_input
self.tool_call_id = tool_call_id
self.session_id = session_id
self.approved: Optional[bool] = None
self._event = asyncio.Event()
async def wait_for_approval(self, timeout: Optional[float] = None) -> bool:
"""Wait for approval decision.
Args:
timeout: Maximum time to wait in seconds
Returns:
True if approved, False otherwise
"""
try:
await asyncio.wait_for(self._event.wait(), timeout=timeout)
except asyncio.TimeoutError:
return False
return self.approved is True
def approve(self) -> None:
"""Approve this request."""
self.approved = True
self._event.set()
def deny(self) -> None:
"""Deny this request."""
self.approved = False
self._event.set()
class ToolGuardMixin:
"""Mixin that adds tool-guard interception to a ReActAgent.
At runtime this class is combined with ReActAgent via MRO,
so ``super()._acting`` and ``super()._reasoning`` resolve to
the concrete agent methods.
Usage:
class MyAgent(ToolGuardMixin, ReActAgent):
def __init__(self, ...):
super().__init__(...)
self._init_tool_guard()
"""
def _init_tool_guard(
self,
guarded_tools: Optional[Set[str]] = None,
denied_tools: Optional[Set[str]] = None,
approval_timeout: float = 300.0,
) -> None:
"""Initialize tool guard.
Args:
guarded_tools: Set of tool names requiring approval
denied_tools: Set of tool names that are always denied
approval_timeout: Timeout for approval requests in seconds
"""
self._guarded_tools = guarded_tools or DEFAULT_GUARDED_TOOLS.copy()
self._denied_tools = denied_tools or DEFAULT_DENIED_TOOLS.copy()
self._approval_timeout = approval_timeout
self._pending_approval: Optional[ToolApprovalRequest] = None
self._approval_callback: Optional[Callable[[ToolApprovalRequest], None]] = None
self._approval_lock = asyncio.Lock()
def set_approval_callback(
self,
callback: Callable[[ToolApprovalRequest], None],
) -> None:
"""Set callback for approval requests.
Args:
callback: Function called when approval is needed
"""
self._approval_callback = callback
def _is_tool_guarded(self, tool_name: str) -> bool:
"""Check if a tool requires approval.
Args:
tool_name: Name of the tool
Returns:
True if tool requires approval
"""
return tool_name in self._guarded_tools
def _is_tool_denied(self, tool_name: str) -> bool:
"""Check if a tool is always denied.
Args:
tool_name: Name of the tool
Returns:
True if tool is denied
"""
return tool_name in self._denied_tools
def _last_tool_response_is_denied(self) -> bool:
"""Check if the last message is a guard-denied tool result."""
if not hasattr(self, "memory") or not self.memory.content:
return False
msg, marks = self.memory.content[-1]
return TOOL_GUARD_DENIED_MARK in marks and msg.role == "system"
async def _cleanup_tool_guard_denied_messages(
self,
include_denial_response: bool = True,
) -> None:
"""Remove tool-guard denied messages from memory.
Args:
include_denial_response: Also remove the assistant's denial explanation
"""
if not hasattr(self, "memory"):
return
ids_to_delete: list[str] = []
last_marked_idx = -1
for i, (msg, marks) in enumerate(self.memory.content):
if TOOL_GUARD_DENIED_MARK in marks:
ids_to_delete.append(msg.id)
last_marked_idx = i
if (
include_denial_response
and last_marked_idx >= 0
and last_marked_idx + 1 < len(self.memory.content)
):
next_msg, _ = self.memory.content[last_marked_idx + 1]
if next_msg.role == "assistant":
ids_to_delete.append(next_msg.id)
if ids_to_delete:
removed = await self.memory.delete(ids_to_delete)
logger.info("Tool guard: cleaned up %d denied message(s)", removed)
async def _request_guard_approval(
self,
tool_name: str,
tool_input: Dict[str, Any],
tool_call_id: str,
) -> bool:
"""Request approval for a guarded tool call.
This method creates a ToolApprovalRequest and waits for
external approval via approve_guard_call() or deny_guard_call().
Args:
tool_name: Name of the tool
tool_input: Tool input parameters
tool_call_id: ID of the tool call
Returns:
True if approved, False otherwise
"""
async with self._approval_lock:
record = TOOL_GUARD_STORE.create_pending(
tool_name=tool_name,
tool_input=tool_input,
agent_id=getattr(self, "agent_id", "unknown"),
workspace_id=getattr(self, "workspace_id", "default"),
session_id=getattr(self, "session_id", None),
findings=default_findings_for_tool(tool_name),
)
manager = get_global_runtime_manager()
if manager:
manager.register_pending_approval(
record.approval_id,
{
"tool_name": record.tool_name,
"agent_id": record.agent_id,
"workspace_id": record.workspace_id,
"session_id": record.session_id,
"tool_input": record.tool_input,
},
)
self._pending_approval = ToolApprovalRequest(
approval_id=record.approval_id,
tool_name=tool_name,
tool_input=tool_input,
tool_call_id=tool_call_id,
session_id=getattr(self, "session_id", None),
)
record.pending_request = self._pending_approval
# Notify via callback if set
if self._approval_callback:
self._approval_callback(self._pending_approval)
# Wait for approval (lock is released during wait, re-acquired after)
approval_request = self._pending_approval
# Wait for approval outside the lock to allow concurrent approval
approved = await approval_request.wait_for_approval(
timeout=self._approval_timeout
)
async with self._approval_lock:
if approval_request:
status = (
ApprovalStatus.APPROVED
if approval_request.approved is True
else ApprovalStatus.DENIED
if approval_request.approved is False
else ApprovalStatus.EXPIRED
)
TOOL_GUARD_STORE.set_status(
approval_request.approval_id,
status,
resolved_by="agent",
notify_request=False,
)
manager = get_global_runtime_manager()
if manager:
manager.resolve_pending_approval(
approval_request.approval_id,
resolved_by="agent",
status=status.value,
)
# Only clear if this is still the same request
if self._pending_approval is approval_request:
self._pending_approval = None
return approved
async def approve_guard_call(self, request_id: Optional[str] = None) -> bool:
"""Approve a pending guard request.
This method is called externally to approve a tool call
that is waiting for approval.
Args:
request_id: Optional request ID to verify (not yet implemented)
Returns:
True if a request was approved, False if no pending request
"""
async with self._approval_lock:
if self._pending_approval is None:
logger.warning("No pending approval request to approve")
return False
TOOL_GUARD_STORE.set_status(
self._pending_approval.approval_id,
ApprovalStatus.APPROVED,
resolved_by="agent",
notify_request=False,
)
manager = get_global_runtime_manager()
if manager:
manager.resolve_pending_approval(
self._pending_approval.approval_id,
resolved_by="agent",
status=ApprovalStatus.APPROVED.value,
)
self._pending_approval.approve()
logger.info("Approved tool call: %s", self._pending_approval.tool_name)
return True
async def deny_guard_call(self, request_id: Optional[str] = None) -> bool:
"""Deny a pending guard request.
This method is called externally to deny a tool call
that is waiting for approval.
Args:
request_id: Optional request ID to verify (not yet implemented)
Returns:
True if a request was denied, False if no pending request
"""
async with self._approval_lock:
if self._pending_approval is None:
logger.warning("No pending approval request to deny")
return False
TOOL_GUARD_STORE.set_status(
self._pending_approval.approval_id,
ApprovalStatus.DENIED,
resolved_by="agent",
notify_request=False,
)
manager = get_global_runtime_manager()
if manager:
manager.resolve_pending_approval(
self._pending_approval.approval_id,
resolved_by="agent",
status=ApprovalStatus.DENIED.value,
)
self._pending_approval.deny()
logger.info("Denied tool call: %s", self._pending_approval.tool_name)
return True
async def _acting(self, tool_call) -> dict | None:
"""Intercept sensitive tool calls before execution.
1. If tool is in denied_tools, auto-deny unconditionally.
2. Check for a one-shot pre-approval.
3. If tool is in the guarded scope, request approval.
4. Otherwise, delegate to parent _acting.
Args:
tool_call: Tool call from the model
Returns:
Tool result dict or None
"""
tool_name: str = tool_call.get("name", "")
tool_input: dict = tool_call.get("input", {})
tool_call_id: str = tool_call.get("id", "")
# Check if tool is denied
if tool_name and self._is_tool_denied(tool_name):
logger.warning("Tool '%s' is in the denied set, auto-denying", tool_name)
return await self._acting_auto_denied(tool_call, tool_name)
# Check if tool is guarded
if tool_name and self._is_tool_guarded(tool_name):
approved = await self._request_guard_approval(
tool_name=tool_name,
tool_input=tool_input,
tool_call_id=tool_call_id,
)
if not approved:
return await self._acting_with_denial(tool_call, tool_name)
# Call parent _acting
return await super()._acting(tool_call) # type: ignore[misc]
async def _acting_auto_denied(
self,
tool_call: Dict[str, Any],
tool_name: str,
) -> dict | None:
"""Auto-deny a tool call without offering approval.
Args:
tool_call: Tool call from the model
tool_name: Name of the denied tool
Returns:
Denial result
"""
from agentscope.message import ToolResultBlock
denied_text = (
f"⛔ **Tool Blocked / 工具已拦截**\n\n"
f"- Tool / 工具: `{tool_name}`\n"
f"- Reason / 原因: This tool is blocked for security reasons\n\n"
f"This tool is blocked and cannot be approved.\n"
f"该工具已被禁止,无法批准执行。"
)
tool_res_msg = Msg(
"system",
[
ToolResultBlock(
type="tool_result",
id=tool_call.get("id", ""),
name=tool_name,
output=[{"type": "text", "text": denied_text}],
),
],
"system",
)
await self.print(tool_res_msg, True)
await self.memory.add(tool_res_msg)
return None
async def _acting_with_denial(
self,
tool_call: Dict[str, Any],
tool_name: str,
) -> dict | None:
"""Deny the tool call after approval was rejected.
Args:
tool_call: Tool call from the model
tool_name: Name of the tool
Returns:
Denial result
"""
from agentscope.message import ToolResultBlock
params_text = json.dumps(
tool_call.get("input", {}),
ensure_ascii=False,
indent=2,
)
denied_text = (
f"⚠️ **Tool Call Denied / 工具调用被拒绝**\n\n"
f"- Tool / 工具: `{tool_name}`\n"
f"- Parameters / 参数:\n"
f"```json\n{params_text}\n```\n\n"
f"The tool call was denied by the user or timed out.\n"
f"工具调用被用户拒绝或已超时。"
)
tool_res_msg = Msg(
"system",
[
ToolResultBlock(
type="tool_result",
id=tool_call.get("id", ""),
name=tool_name,
output=[{"type": "text", "text": denied_text}],
),
],
"system",
)
await self.print(tool_res_msg, True)
await self.memory.add(tool_res_msg, marks=TOOL_GUARD_DENIED_MARK)
return None
async def _reasoning(self, **kwargs) -> Msg:
"""Short-circuit reasoning when awaiting guard approval.
If the last message was a guard denial, return a waiting message
instead of continuing reasoning.
Returns:
Response message
"""
if self._last_tool_response_is_denied():
msg = Msg(
self.name,
"⏳ Waiting for approval / 等待审批...\n\n"
"Type `/approve` to approve, or send any message to deny.\n"
"输入 `/approve` 批准执行,或发送任意消息拒绝。",
"assistant",
)
await self.print(msg, True)
await self.memory.add(msg)
return msg
return await super()._reasoning(**kwargs) # type: ignore[misc]
__all__ = [
"ToolGuardMixin",
"ToolApprovalRequest",
"DEFAULT_GUARDED_TOOLS",
"DEFAULT_DENIED_TOOLS",
"TOOL_GUARD_DENIED_MARK",
]