Initial commit of integrated agent system
This commit is contained in:
57
backend/agents/base/__init__.py
Normal file
57
backend/agents/base/__init__.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Base agent module for 大时代.
|
||||
|
||||
提供Agent基础类、命令处理、工具守卫和钩子管理等功能。
|
||||
"""
|
||||
|
||||
# 命令处理器 (从command_handler.py导入)
|
||||
from .command_handler import (
|
||||
AgentCommandDispatcher,
|
||||
CommandContext,
|
||||
CommandHandler,
|
||||
CommandResult,
|
||||
create_command_dispatcher,
|
||||
)
|
||||
|
||||
# 评估钩子 (从evaluation_hook.py导入)
|
||||
from .evaluation_hook import (
|
||||
EvaluationHook,
|
||||
EvaluationCollector,
|
||||
MetricType,
|
||||
EvaluationMetric,
|
||||
EvaluationResult,
|
||||
parse_evaluation_hooks,
|
||||
)
|
||||
|
||||
# 技能适配钩子 (从skill_adaptation_hook.py导入)
|
||||
from .skill_adaptation_hook import (
|
||||
AdaptationAction,
|
||||
AdaptationThreshold,
|
||||
AdaptationEvent,
|
||||
SkillAdaptationHook,
|
||||
AdaptationManager,
|
||||
get_adaptation_manager,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# 命令处理
|
||||
"AgentCommandDispatcher",
|
||||
"CommandContext",
|
||||
"CommandHandler",
|
||||
"CommandResult",
|
||||
"create_command_dispatcher",
|
||||
# 评估钩子
|
||||
"EvaluationHook",
|
||||
"EvaluationCollector",
|
||||
"MetricType",
|
||||
"EvaluationMetric",
|
||||
"EvaluationResult",
|
||||
"parse_evaluation_hooks",
|
||||
# 技能适配钩子
|
||||
"AdaptationAction",
|
||||
"AdaptationThreshold",
|
||||
"AdaptationEvent",
|
||||
"SkillAdaptationHook",
|
||||
"AdaptationManager",
|
||||
"get_adaptation_manager",
|
||||
]
|
||||
543
backend/agents/base/command_handler.py
Normal file
543
backend/agents/base/command_handler.py
Normal file
@@ -0,0 +1,543 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Agent command handler for system commands.
|
||||
|
||||
This module handles system commands like /save, /compact, /skills, /reload, etc.
|
||||
参考CoPaw设计,为EvoAgent提供命令处理能力。
|
||||
"""
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Protocol
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .agent import EvoAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CommandResult:
|
||||
"""命令执行结果"""
|
||||
success: bool
|
||||
message: str
|
||||
data: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class CommandContext:
|
||||
"""命令执行上下文"""
|
||||
|
||||
def __init__(self, agent: "EvoAgent", raw_query: str, args: str = ""):
|
||||
self.agent = agent
|
||||
self.raw_query = raw_query
|
||||
self.args = args
|
||||
self.config_name = getattr(agent, "config_name", "default")
|
||||
self.agent_id = getattr(agent, "agent_id", "unknown")
|
||||
|
||||
|
||||
class CommandHandler(ABC):
|
||||
"""命令处理器抽象基类"""
|
||||
|
||||
@abstractmethod
|
||||
async def handle(self, ctx: CommandContext) -> CommandResult:
|
||||
"""处理命令"""
|
||||
pass
|
||||
|
||||
|
||||
class SaveCommandHandler(CommandHandler):
|
||||
"""处理 /save <message> 命令 - 保存内容到MEMORY.md"""
|
||||
|
||||
async def handle(self, ctx: CommandContext) -> CommandResult:
|
||||
message = ctx.args.strip()
|
||||
if not message:
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message="Usage: /save <message>\n请提供要保存的内容。"
|
||||
)
|
||||
|
||||
try:
|
||||
memory_path = self._get_memory_path(ctx)
|
||||
memory_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
timestamp = self._get_timestamp()
|
||||
entry = f"\n## {timestamp}\n\n{message}\n"
|
||||
|
||||
with open(memory_path, "a", encoding="utf-8") as f:
|
||||
f.write(entry)
|
||||
|
||||
return CommandResult(
|
||||
success=True,
|
||||
message=f"✅ 内容已保存到 MEMORY.md\n- 路径: {memory_path}\n- 长度: {len(message)} 字符",
|
||||
data={"path": str(memory_path), "length": len(message)}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save to MEMORY.md: {e}")
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"❌ 保存失败: {str(e)}"
|
||||
)
|
||||
|
||||
def _get_memory_path(self, ctx: CommandContext) -> Path:
|
||||
"""获取MEMORY.md路径"""
|
||||
from backend.agents.skills_manager import SkillsManager
|
||||
sm = SkillsManager()
|
||||
asset_dir = sm.get_agent_asset_dir(ctx.config_name, ctx.agent_id)
|
||||
return asset_dir / "MEMORY.md"
|
||||
|
||||
def _get_timestamp(self) -> str:
|
||||
"""获取当前时间戳"""
|
||||
from datetime import datetime
|
||||
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
class CompactCommandHandler(CommandHandler):
|
||||
"""处理 /compact 命令 - 压缩记忆"""
|
||||
|
||||
async def handle(self, ctx: CommandContext) -> CommandResult:
|
||||
try:
|
||||
agent = ctx.agent
|
||||
memory_manager = getattr(agent, "memory_manager", None)
|
||||
|
||||
if memory_manager is None:
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message="❌ Memory Manager 未启用\n\n- 记忆压缩功能不可用\n- 请在配置中启用 memory_manager"
|
||||
)
|
||||
|
||||
messages = await self._get_messages(agent)
|
||||
if not messages:
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message="⚠️ 没有可压缩的消息\n\n- 当前记忆为空\n- 无需执行压缩"
|
||||
)
|
||||
|
||||
compact_content = await memory_manager.compact_memory(messages)
|
||||
await self._update_compressed_summary(agent, compact_content)
|
||||
|
||||
return CommandResult(
|
||||
success=True,
|
||||
message=f"✅ 记忆压缩完成\n\n- 压缩了 {len(messages)} 条消息\n- 摘要长度: {len(compact_content)} 字符",
|
||||
data={"message_count": len(messages), "summary_length": len(compact_content)}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to compact memory: {e}")
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"❌ 压缩失败: {str(e)}"
|
||||
)
|
||||
|
||||
async def _get_messages(self, agent: "EvoAgent") -> List[Any]:
|
||||
"""获取Agent的记忆消息"""
|
||||
memory = getattr(agent, "memory", None)
|
||||
if memory is None:
|
||||
return []
|
||||
return await memory.get_memory() if hasattr(memory, "get_memory") else []
|
||||
|
||||
async def _update_compressed_summary(self, agent: "EvoAgent", content: str) -> None:
|
||||
"""更新压缩摘要"""
|
||||
memory = getattr(agent, "memory", None)
|
||||
if memory and hasattr(memory, "update_compressed_summary"):
|
||||
await memory.update_compressed_summary(content)
|
||||
|
||||
|
||||
class SkillsListCommandHandler(CommandHandler):
|
||||
"""处理 /skills list 命令 - 列出已激活技能"""
|
||||
|
||||
async def handle(self, ctx: CommandContext) -> CommandResult:
|
||||
try:
|
||||
from backend.agents.skills_manager import SkillsManager
|
||||
sm = SkillsManager()
|
||||
|
||||
active_skills = sm.list_active_skill_metadata(ctx.config_name, ctx.agent_id)
|
||||
catalog = sm.list_agent_skill_catalog(ctx.config_name, ctx.agent_id)
|
||||
|
||||
lines = ["📋 技能列表", ""]
|
||||
|
||||
if active_skills:
|
||||
lines.append("✅ 已激活技能:")
|
||||
for skill in active_skills:
|
||||
lines.append(f" • {skill.name} - {skill.description[:50]}...")
|
||||
else:
|
||||
lines.append("⚠️ 当前没有激活的技能")
|
||||
|
||||
lines.append("")
|
||||
lines.append(f"📚 可用技能总数: {len(catalog)}")
|
||||
lines.append("💡 使用 /skills enable <name> 启用技能")
|
||||
|
||||
return CommandResult(
|
||||
success=True,
|
||||
message="\n".join(lines),
|
||||
data={
|
||||
"active_count": len(active_skills),
|
||||
"catalog_count": len(catalog),
|
||||
"active": [s.skill_name for s in active_skills]
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list skills: {e}")
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"❌ 获取技能列表失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
class SkillsEnableCommandHandler(CommandHandler):
|
||||
"""处理 /skills enable <name> 命令 - 启用技能"""
|
||||
|
||||
async def handle(self, ctx: CommandContext) -> CommandResult:
|
||||
skill_name = ctx.args.strip()
|
||||
if not skill_name:
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message="Usage: /skills enable <skill_name>\n请提供技能名称。"
|
||||
)
|
||||
|
||||
try:
|
||||
from backend.agents.skills_manager import SkillsManager
|
||||
sm = SkillsManager()
|
||||
|
||||
result = sm.update_agent_skill_overrides(
|
||||
ctx.config_name,
|
||||
ctx.agent_id,
|
||||
enable=[skill_name]
|
||||
)
|
||||
|
||||
return CommandResult(
|
||||
success=True,
|
||||
message=f"✅ 技能已启用: {skill_name}\n\n已启用技能: {', '.join(result['enabled_skills'])}",
|
||||
data=result
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to enable skill: {e}")
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"❌ 启用技能失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
class SkillsDisableCommandHandler(CommandHandler):
|
||||
"""处理 /skills disable <name> 命令 - 禁用技能"""
|
||||
|
||||
async def handle(self, ctx: CommandContext) -> CommandResult:
|
||||
skill_name = ctx.args.strip()
|
||||
if not skill_name:
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message="Usage: /skills disable <skill_name>\n请提供技能名称。"
|
||||
)
|
||||
|
||||
try:
|
||||
from backend.agents.skills_manager import SkillsManager
|
||||
sm = SkillsManager()
|
||||
|
||||
result = sm.update_agent_skill_overrides(
|
||||
ctx.config_name,
|
||||
ctx.agent_id,
|
||||
disable=[skill_name]
|
||||
)
|
||||
|
||||
return CommandResult(
|
||||
success=True,
|
||||
message=f"✅ 技能已禁用: {skill_name}\n\n已禁用技能: {', '.join(result['disabled_skills'])}",
|
||||
data=result
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to disable skill: {e}")
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"❌ 禁用技能失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
class SkillsInstallCommandHandler(CommandHandler):
|
||||
"""处理 /skills install <name> 命令 - 安装技能"""
|
||||
|
||||
async def handle(self, ctx: CommandContext) -> CommandResult:
|
||||
skill_name = ctx.args.strip()
|
||||
if not skill_name:
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message="Usage: /skills install <skill_name>\n请提供技能名称。"
|
||||
)
|
||||
|
||||
try:
|
||||
from backend.agents.skills_manager import SkillsManager
|
||||
from backend.agents.skill_loader import load_skill_from_dir
|
||||
sm = SkillsManager()
|
||||
|
||||
# 查找技能源目录
|
||||
source_dir = self._resolve_skill_source(sm, skill_name)
|
||||
if not source_dir:
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"❌ 技能未找到: {skill_name}\n\n请检查技能名称是否正确,或技能是否存在于 builtin/customized 目录。"
|
||||
)
|
||||
|
||||
# 加载并验证技能
|
||||
skill_info = load_skill_from_dir(source_dir)
|
||||
if not skill_info:
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"❌ 技能加载失败: {skill_name}\n\n技能格式可能不正确。"
|
||||
)
|
||||
|
||||
# 安装到agent的installed目录
|
||||
installed_root = sm.get_agent_installed_root(ctx.config_name, ctx.agent_id)
|
||||
target_dir = installed_root / skill_name
|
||||
|
||||
import shutil
|
||||
if target_dir.exists():
|
||||
shutil.rmtree(target_dir)
|
||||
shutil.copytree(source_dir, target_dir)
|
||||
|
||||
return CommandResult(
|
||||
success=True,
|
||||
message=f"✅ 技能已安装: {skill_name}\n\n- 名称: {skill_info.get('name', skill_name)}\n- 版本: {skill_info.get('version', 'unknown')}\n- 路径: {target_dir}",
|
||||
data={"skill_name": skill_name, "target_dir": str(target_dir)}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to install skill: {e}")
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"❌ 安装技能失败: {str(e)}"
|
||||
)
|
||||
|
||||
def _resolve_skill_source(self, sm: "SkillsManager", skill_name: str) -> Optional[Path]:
|
||||
"""解析技能源目录"""
|
||||
for root in [sm.customized_root, sm.builtin_root]:
|
||||
candidate = root / skill_name
|
||||
if candidate.exists() and (candidate / "SKILL.md").exists():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
class ReloadCommandHandler(CommandHandler):
|
||||
"""处理 /reload 命令 - 重新加载配置"""
|
||||
|
||||
async def handle(self, ctx: CommandContext) -> CommandResult:
|
||||
try:
|
||||
agent = ctx.agent
|
||||
|
||||
# 重新加载配置
|
||||
if hasattr(agent, "reload_config"):
|
||||
await agent.reload_config()
|
||||
|
||||
# 重新加载技能
|
||||
from backend.agents.skills_manager import SkillsManager
|
||||
sm = SkillsManager()
|
||||
|
||||
# 刷新技能同步
|
||||
active_root = sm.get_agent_active_root(ctx.config_name, ctx.agent_id)
|
||||
if active_root.exists():
|
||||
# 清除缓存,强制重新加载
|
||||
import shutil
|
||||
for item in active_root.iterdir():
|
||||
if item.is_dir():
|
||||
shutil.rmtree(item)
|
||||
|
||||
return CommandResult(
|
||||
success=True,
|
||||
message="✅ 配置已重新加载\n\n- Agent配置已刷新\n- 技能缓存已清除\n- 请重启对话以应用所有更改",
|
||||
data={"config_name": ctx.config_name, "agent_id": ctx.agent_id}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to reload config: {e}")
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"❌ 重新加载失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
class StatusCommandHandler(CommandHandler):
|
||||
"""处理 /status 命令 - 显示Agent状态"""
|
||||
|
||||
async def handle(self, ctx: CommandContext) -> CommandResult:
|
||||
try:
|
||||
agent = ctx.agent
|
||||
|
||||
lines = ["📊 Agent 状态", ""]
|
||||
lines.append(f"🆔 Agent ID: {ctx.agent_id}")
|
||||
lines.append(f"⚙️ Config: {ctx.config_name}")
|
||||
|
||||
# 模型信息
|
||||
model = getattr(agent, "model", None)
|
||||
if model:
|
||||
lines.append(f"🤖 Model: {model}")
|
||||
|
||||
# 记忆状态
|
||||
memory = getattr(agent, "memory", None)
|
||||
if memory:
|
||||
msg_count = len(getattr(memory, "content", []))
|
||||
lines.append(f"💾 Memory: {msg_count} messages")
|
||||
|
||||
# 技能状态
|
||||
from backend.agents.skills_manager import SkillsManager
|
||||
sm = SkillsManager()
|
||||
active_skills = sm.list_active_skill_metadata(ctx.config_name, ctx.agent_id)
|
||||
lines.append(f"🔧 Active Skills: {len(active_skills)}")
|
||||
|
||||
# 工具组状态
|
||||
toolkit = getattr(agent, "toolkit", None)
|
||||
if toolkit:
|
||||
groups = getattr(toolkit, "tool_groups", {})
|
||||
active_groups = [name for name, g in groups.items() if getattr(g, "active", False)]
|
||||
lines.append(f"🛠️ Active Tool Groups: {', '.join(active_groups) if active_groups else 'None'}")
|
||||
|
||||
return CommandResult(
|
||||
success=True,
|
||||
message="\n".join(lines),
|
||||
data={
|
||||
"agent_id": ctx.agent_id,
|
||||
"config_name": ctx.config_name,
|
||||
"active_skills_count": len(active_skills)
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get status: {e}")
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"❌ 获取状态失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
class HelpCommandHandler(CommandHandler):
|
||||
"""处理 /help 命令 - 显示帮助"""
|
||||
|
||||
async def handle(self, ctx: CommandContext) -> CommandResult:
|
||||
help_text = """📖 EvoAgent 命令帮助
|
||||
|
||||
可用命令:
|
||||
/save <message> - 保存内容到 MEMORY.md
|
||||
/compact - 压缩记忆
|
||||
/skills list - 列出已激活技能
|
||||
/skills enable <name> - 启用技能
|
||||
/skills disable <name>- 禁用技能
|
||||
/skills install <name>- 安装技能
|
||||
/reload - 重新加载配置
|
||||
/status - 显示Agent状态
|
||||
/help - 显示此帮助信息
|
||||
|
||||
提示:
|
||||
• 所有命令以 / 开头
|
||||
• 命令不区分大小写
|
||||
• 使用 Tab 键可自动补全命令
|
||||
"""
|
||||
return CommandResult(success=True, message=help_text)
|
||||
|
||||
|
||||
class AgentCommandDispatcher:
|
||||
"""Agent命令分发器
|
||||
|
||||
参考CoPaw的CommandHandler设计,为EvoAgent提供统一的命令处理入口。
|
||||
"""
|
||||
|
||||
# 支持的系统命令
|
||||
SYSTEM_COMMANDS = frozenset({
|
||||
"save", "compact",
|
||||
"skills", "reload",
|
||||
"status", "help"
|
||||
})
|
||||
|
||||
def __init__(self):
|
||||
self._handlers: Dict[str, CommandHandler] = {}
|
||||
self._subcommands: Dict[str, Dict[str, CommandHandler]] = {}
|
||||
self._register_default_handlers()
|
||||
|
||||
def _register_default_handlers(self) -> None:
|
||||
"""注册默认命令处理器"""
|
||||
self._handlers["save"] = SaveCommandHandler()
|
||||
self._handlers["compact"] = CompactCommandHandler()
|
||||
self._handlers["reload"] = ReloadCommandHandler()
|
||||
self._handlers["status"] = StatusCommandHandler()
|
||||
self._handlers["help"] = HelpCommandHandler()
|
||||
|
||||
# 子命令: /skills list/enable/disable/install
|
||||
self._subcommands["skills"] = {
|
||||
"list": SkillsListCommandHandler(),
|
||||
"enable": SkillsEnableCommandHandler(),
|
||||
"disable": SkillsDisableCommandHandler(),
|
||||
"install": SkillsInstallCommandHandler(),
|
||||
}
|
||||
|
||||
def is_command(self, query: str | None) -> bool:
|
||||
"""检查是否为命令
|
||||
|
||||
Args:
|
||||
query: 用户输入字符串
|
||||
|
||||
Returns:
|
||||
True 如果是系统命令
|
||||
"""
|
||||
if not isinstance(query, str) or not query.startswith("/"):
|
||||
return False
|
||||
|
||||
parts = query.strip().lstrip("/").split()
|
||||
if not parts:
|
||||
return False
|
||||
|
||||
cmd = parts[0].lower()
|
||||
|
||||
# 检查主命令
|
||||
if cmd in self.SYSTEM_COMMANDS:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def handle(self, agent: "EvoAgent", query: str) -> CommandResult:
|
||||
"""处理命令
|
||||
|
||||
Args:
|
||||
agent: EvoAgent实例
|
||||
query: 命令字符串
|
||||
|
||||
Returns:
|
||||
命令执行结果
|
||||
"""
|
||||
if not self.is_command(query):
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"未知命令: {query}\n使用 /help 查看可用命令。"
|
||||
)
|
||||
|
||||
# 解析命令和参数
|
||||
parts = query.strip().lstrip("/").split(maxsplit=1)
|
||||
cmd = parts[0].lower()
|
||||
args = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
logger.info(f"Processing command: {cmd}, args: {args}")
|
||||
|
||||
# 处理子命令 (e.g., /skills list)
|
||||
if cmd in self._subcommands:
|
||||
sub_parts = args.split(maxsplit=1)
|
||||
sub_cmd = sub_parts[0].lower() if sub_parts else ""
|
||||
sub_args = sub_parts[1] if len(sub_parts) > 1 else ""
|
||||
|
||||
handlers = self._subcommands[cmd]
|
||||
handler = handlers.get(sub_cmd)
|
||||
|
||||
if handler is None:
|
||||
available = ", ".join(handlers.keys())
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"未知子命令: {sub_cmd}\n可用子命令: {available}"
|
||||
)
|
||||
|
||||
ctx = CommandContext(agent, query, sub_args)
|
||||
return await handler.handle(ctx)
|
||||
|
||||
# 处理主命令
|
||||
handler = self._handlers.get(cmd)
|
||||
if handler is None:
|
||||
return CommandResult(
|
||||
success=False,
|
||||
message=f"命令未实现: {cmd}"
|
||||
)
|
||||
|
||||
ctx = CommandContext(agent, query, args)
|
||||
return await handler.handle(ctx)
|
||||
|
||||
|
||||
# 便捷函数
|
||||
def create_command_dispatcher() -> AgentCommandDispatcher:
|
||||
"""创建命令分发器实例"""
|
||||
return AgentCommandDispatcher()
|
||||
452
backend/agents/base/evaluation_hook.py
Normal file
452
backend/agents/base/evaluation_hook.py
Normal file
@@ -0,0 +1,452 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Evaluation hooks system for skills.
|
||||
|
||||
Provides evaluation metric collection and storage for skill performance tracking.
|
||||
Based on the evaluation hooks design in SKILL_TEMPLATE.md.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MetricType(Enum):
|
||||
"""Types of evaluation metrics."""
|
||||
HIT_RATE = "hit_rate" # 信号命中率
|
||||
RISK_VIOLATION = "risk_violation" # 风控违例率
|
||||
POSITION_DEVIATION = "position_deviation" # 仓位偏离率
|
||||
PnL_ATTRIBUTION = "pnl_attribution" # P&L 归因一致性
|
||||
SIGNAL_CONSISTENCY = "signal_consistency" # 信号一致性
|
||||
DECISION_LATENCY = "decision_latency" # 决策延迟
|
||||
TOOL_USAGE = "tool_usage" # 工具使用率
|
||||
CUSTOM = "custom" # 自定义指标
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvaluationMetric:
|
||||
"""A single evaluation metric."""
|
||||
name: str
|
||||
metric_type: MetricType
|
||||
value: float
|
||||
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"metric_type": self.metric_type.value,
|
||||
"value": self.value,
|
||||
"timestamp": self.timestamp,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvaluationResult:
|
||||
"""Evaluation result for a skill execution."""
|
||||
skill_name: str
|
||||
run_id: str
|
||||
agent_id: str
|
||||
metrics: List[EvaluationMetric] = field(default_factory=list)
|
||||
inputs: Dict[str, Any] = field(default_factory=dict)
|
||||
outputs: Dict[str, Any] = field(default_factory=dict)
|
||||
decision: Optional[str] = None
|
||||
success: bool = True
|
||||
error_message: Optional[str] = None
|
||||
started_at: Optional[str] = None
|
||||
completed_at: Optional[str] = field(default_factory=lambda: datetime.now().isoformat())
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"skill_name": self.skill_name,
|
||||
"run_id": self.run_id,
|
||||
"agent_id": self.agent_id,
|
||||
"metrics": [m.to_dict() for m in self.metrics],
|
||||
"inputs": self.inputs,
|
||||
"outputs": self.outputs,
|
||||
"decision": self.decision,
|
||||
"success": self.success,
|
||||
"error_message": self.error_message,
|
||||
"started_at": self.started_at,
|
||||
"completed_at": self.completed_at,
|
||||
}
|
||||
|
||||
|
||||
class EvaluationHook:
|
||||
"""Hook for collecting skill evaluation metrics.
|
||||
|
||||
This hook collects and stores evaluation metrics after skill execution
|
||||
for later analysis and memory/reflection stages.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_dir: Path,
|
||||
run_id: str,
|
||||
agent_id: str,
|
||||
):
|
||||
"""Initialize evaluation hook.
|
||||
|
||||
Args:
|
||||
storage_dir: Directory to store evaluation results
|
||||
run_id: Current run identifier
|
||||
agent_id: Current agent identifier
|
||||
"""
|
||||
self.storage_dir = Path(storage_dir)
|
||||
self.run_id = run_id
|
||||
self.agent_id = agent_id
|
||||
self._current_evaluation: Optional[EvaluationResult] = None
|
||||
|
||||
def start_evaluation(
|
||||
self,
|
||||
skill_name: str,
|
||||
inputs: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Start a new evaluation session.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill being evaluated
|
||||
inputs: Input parameters for the skill
|
||||
"""
|
||||
self._current_evaluation = EvaluationResult(
|
||||
skill_name=skill_name,
|
||||
run_id=self.run_id,
|
||||
agent_id=self.agent_id,
|
||||
inputs=inputs,
|
||||
started_at=datetime.now().isoformat(),
|
||||
)
|
||||
logger.debug(f"Started evaluation for skill: {skill_name}")
|
||||
|
||||
def add_metric(
|
||||
self,
|
||||
name: str,
|
||||
metric_type: MetricType,
|
||||
value: float,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Add an evaluation metric.
|
||||
|
||||
Args:
|
||||
name: Metric name
|
||||
metric_type: Type of metric
|
||||
value: Metric value
|
||||
metadata: Additional metadata
|
||||
"""
|
||||
if self._current_evaluation is None:
|
||||
logger.warning("No active evaluation session, ignoring metric")
|
||||
return
|
||||
|
||||
metric = EvaluationMetric(
|
||||
name=name,
|
||||
metric_type=metric_type,
|
||||
value=value,
|
||||
metadata=metadata or {},
|
||||
)
|
||||
self._current_evaluation.metrics.append(metric)
|
||||
logger.debug(f"Added metric: {name} = {value}")
|
||||
|
||||
def add_metrics(self, metrics: List[EvaluationMetric]) -> None:
|
||||
"""Add multiple evaluation metrics at once.
|
||||
|
||||
Args:
|
||||
metrics: List of metrics to add
|
||||
"""
|
||||
if self._current_evaluation is None:
|
||||
logger.warning("No active evaluation session, ignoring metrics")
|
||||
return
|
||||
|
||||
self._current_evaluation.metrics.extend(metrics)
|
||||
|
||||
def record_outputs(self, outputs: Dict[str, Any]) -> None:
|
||||
"""Record skill outputs.
|
||||
|
||||
Args:
|
||||
outputs: Output from skill execution
|
||||
"""
|
||||
if self._current_evaluation is None:
|
||||
logger.warning("No active evaluation session, ignoring outputs")
|
||||
return
|
||||
|
||||
self._current_evaluation.outputs = outputs
|
||||
|
||||
def record_decision(self, decision: str) -> None:
|
||||
"""Record the final decision.
|
||||
|
||||
Args:
|
||||
decision: Final decision made by the skill
|
||||
"""
|
||||
if self._current_evaluation is None:
|
||||
logger.warning("No active evaluation session, ignoring decision")
|
||||
return
|
||||
|
||||
self._current_evaluation.decision = decision
|
||||
|
||||
def complete_evaluation(
|
||||
self,
|
||||
success: bool = True,
|
||||
error_message: Optional[str] = None,
|
||||
) -> Optional[EvaluationResult]:
|
||||
"""Complete the evaluation session and persist results.
|
||||
|
||||
Args:
|
||||
success: Whether the skill execution was successful
|
||||
error_message: Error message if failed
|
||||
|
||||
Returns:
|
||||
The completed evaluation result, or None if no active evaluation
|
||||
"""
|
||||
if self._current_evaluation is None:
|
||||
logger.warning("No active evaluation to complete")
|
||||
return None
|
||||
|
||||
self._current_evaluation.success = success
|
||||
self._current_evaluation.error_message = error_message
|
||||
self._current_evaluation.completed_at = datetime.now().isoformat()
|
||||
|
||||
# Persist to storage
|
||||
result = self._persist_evaluation(self._current_evaluation)
|
||||
|
||||
self._current_evaluation = None
|
||||
logger.debug(f"Completed evaluation for skill: {result.skill_name}")
|
||||
|
||||
return result
|
||||
|
||||
def _persist_evaluation(self, evaluation: EvaluationResult) -> EvaluationResult:
|
||||
"""Persist evaluation result to storage.
|
||||
|
||||
Args:
|
||||
evaluation: Evaluation result to persist
|
||||
|
||||
Returns:
|
||||
The persisted evaluation
|
||||
"""
|
||||
# Create run-specific directory
|
||||
run_dir = self.storage_dir / self.run_id
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create agent-specific subdirectory
|
||||
agent_dir = run_dir / self.agent_id
|
||||
agent_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Generate filename with timestamp
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
filename = f"{evaluation.skill_name}_{timestamp}.json"
|
||||
filepath = agent_dir / filename
|
||||
|
||||
# Write evaluation result
|
||||
try:
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
json.dump(evaluation.to_dict(), f, ensure_ascii=False, indent=2)
|
||||
logger.info(f"Persisted evaluation to: {filepath}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to persist evaluation: {e}")
|
||||
|
||||
return evaluation
|
||||
|
||||
def cancel_evaluation(self) -> None:
|
||||
"""Cancel the current evaluation session without saving."""
|
||||
if self._current_evaluation is not None:
|
||||
logger.debug(f"Cancelled evaluation for: {self._current_evaluation.skill_name}")
|
||||
self._current_evaluation = None
|
||||
|
||||
|
||||
class EvaluationCollector:
|
||||
"""Collector for aggregating evaluation metrics across runs.
|
||||
|
||||
Provides methods to query and analyze evaluation results.
|
||||
"""
|
||||
|
||||
def __init__(self, storage_dir: Path):
|
||||
"""Initialize evaluation collector.
|
||||
|
||||
Args:
|
||||
storage_dir: Root directory containing evaluation results
|
||||
"""
|
||||
self.storage_dir = Path(storage_dir)
|
||||
|
||||
def get_run_evaluations(
|
||||
self,
|
||||
run_id: str,
|
||||
agent_id: Optional[str] = None,
|
||||
) -> List[EvaluationResult]:
|
||||
"""Get all evaluations for a run.
|
||||
|
||||
Args:
|
||||
run_id: Run identifier
|
||||
agent_id: Optional agent identifier to filter by
|
||||
|
||||
Returns:
|
||||
List of evaluation results
|
||||
"""
|
||||
run_dir = self.storage_dir / run_id
|
||||
if not run_dir.exists():
|
||||
return []
|
||||
|
||||
evaluations = []
|
||||
|
||||
agent_dirs = [run_dir / agent_id] if agent_id else run_dir.iterdir()
|
||||
|
||||
for agent_dir in agent_dirs:
|
||||
if not agent_dir.is_dir():
|
||||
continue
|
||||
|
||||
for eval_file in agent_dir.glob("*.json"):
|
||||
try:
|
||||
with open(eval_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
evaluations.append(self._parse_evaluation(data))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load evaluation {eval_file}: {e}")
|
||||
|
||||
return evaluations
|
||||
|
||||
def get_skill_metrics(
|
||||
self,
|
||||
skill_name: str,
|
||||
run_ids: Optional[List[str]] = None,
|
||||
) -> List[EvaluationMetric]:
|
||||
"""Get all metrics for a specific skill.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill
|
||||
run_ids: Optional list of run IDs to filter by
|
||||
|
||||
Returns:
|
||||
List of metrics for the skill
|
||||
"""
|
||||
metrics = []
|
||||
|
||||
if run_ids is None:
|
||||
run_ids = [d.name for d in self.storage_dir.iterdir() if d.is_dir()]
|
||||
|
||||
for run_id in run_ids:
|
||||
evaluations = self.get_run_evaluations(run_id)
|
||||
for eval_result in evaluations:
|
||||
if eval_result.skill_name == skill_name:
|
||||
metrics.extend(eval_result.metrics)
|
||||
|
||||
return metrics
|
||||
|
||||
def calculate_skill_stats(
|
||||
self,
|
||||
skill_name: str,
|
||||
metric_type: MetricType,
|
||||
run_ids: Optional[List[str]] = None,
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate statistics for a specific metric type.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill
|
||||
metric_type: Type of metric to calculate
|
||||
run_ids: Optional list of run IDs to filter by
|
||||
|
||||
Returns:
|
||||
Dictionary with min, max, avg, count statistics
|
||||
"""
|
||||
metrics = self.get_skill_metrics(skill_name, run_ids)
|
||||
filtered = [m for m in metrics if m.metric_type == metric_type]
|
||||
|
||||
if not filtered:
|
||||
return {"count": 0}
|
||||
|
||||
values = [m.value for m in filtered]
|
||||
return {
|
||||
"count": len(values),
|
||||
"min": min(values),
|
||||
"max": max(values),
|
||||
"avg": sum(values) / len(values),
|
||||
}
|
||||
|
||||
def _parse_evaluation(self, data: Dict[str, Any]) -> EvaluationResult:
|
||||
"""Parse evaluation data into EvaluationResult.
|
||||
|
||||
Args:
|
||||
data: Raw evaluation data
|
||||
|
||||
Returns:
|
||||
Parsed EvaluationResult
|
||||
"""
|
||||
metrics = []
|
||||
for m in data.get("metrics", []):
|
||||
metrics.append(EvaluationMetric(
|
||||
name=m["name"],
|
||||
metric_type=MetricType(m["metric_type"]),
|
||||
value=m["value"],
|
||||
timestamp=m.get("timestamp", ""),
|
||||
metadata=m.get("metadata", {}),
|
||||
))
|
||||
|
||||
return EvaluationResult(
|
||||
skill_name=data["skill_name"],
|
||||
run_id=data["run_id"],
|
||||
agent_id=data["agent_id"],
|
||||
metrics=metrics,
|
||||
inputs=data.get("inputs", {}),
|
||||
outputs=data.get("outputs", {}),
|
||||
decision=data.get("decision"),
|
||||
success=data.get("success", True),
|
||||
error_message=data.get("error_message"),
|
||||
started_at=data.get("started_at"),
|
||||
completed_at=data.get("completed_at"),
|
||||
)
|
||||
|
||||
|
||||
def parse_evaluation_hooks(skill_dir: Path) -> Dict[str, Any]:
|
||||
"""Parse evaluation hooks from SKILL.md.
|
||||
|
||||
Extracts the Optional: Evaluation hooks section from skill documentation.
|
||||
|
||||
Args:
|
||||
skill_dir: Skill directory path
|
||||
|
||||
Returns:
|
||||
Dictionary containing evaluation hook definitions
|
||||
"""
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
if not skill_md.exists():
|
||||
return {}
|
||||
|
||||
try:
|
||||
content = skill_md.read_text(encoding="utf-8")
|
||||
|
||||
# Extract evaluation hooks section
|
||||
if "## Optional: Evaluation hooks" in content:
|
||||
start = content.find("## Optional: Evaluation hooks")
|
||||
# Find the next ## section or end of file
|
||||
next_section = content.find("\n## ", start + 1)
|
||||
if next_section == -1:
|
||||
eval_section = content[start:]
|
||||
else:
|
||||
eval_section = content[start:next_section]
|
||||
|
||||
# Parse metrics from the section
|
||||
metrics = []
|
||||
for metric_type in MetricType:
|
||||
if metric_type.value.replace("_", " ") in eval_section.lower():
|
||||
metrics.append(metric_type.value)
|
||||
|
||||
return {
|
||||
"supported_metrics": metrics,
|
||||
"section_content": eval_section.strip(),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse evaluation hooks: {e}")
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
__all__ = [
|
||||
"MetricType",
|
||||
"EvaluationMetric",
|
||||
"EvaluationResult",
|
||||
"EvaluationHook",
|
||||
"EvaluationCollector",
|
||||
"parse_evaluation_hooks",
|
||||
]
|
||||
510
backend/agents/base/evo_agent.py
Normal file
510
backend/agents/base/evo_agent.py
Normal file
@@ -0,0 +1,510 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""EvoAgent - Core agent implementation for 大时代.
|
||||
|
||||
This module provides the main EvoAgent class built on AgentScope's ReActAgent,
|
||||
with integrated tools, skills, and memory management based on CoPaw design.
|
||||
|
||||
Key features:
|
||||
- Workspace-driven configuration from Markdown files
|
||||
- Dynamic skill loading from skills/active directories
|
||||
- Tool-guard security interception
|
||||
- Hook system for extensibility
|
||||
- Runtime skill and prompt reloading
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Type, TYPE_CHECKING
|
||||
|
||||
from agentscope.agent import ReActAgent
|
||||
from agentscope.memory import InMemoryMemory
|
||||
from agentscope.message import Msg
|
||||
from agentscope.tool import Toolkit
|
||||
|
||||
from .tool_guard import ToolGuardMixin
|
||||
from .hooks import (
|
||||
HookManager,
|
||||
BootstrapHook,
|
||||
MemoryCompactionHook,
|
||||
WorkspaceWatchHook,
|
||||
HOOK_PRE_REASONING,
|
||||
)
|
||||
from ..prompts.builder import (
|
||||
PromptBuilder,
|
||||
build_system_prompt_from_workspace,
|
||||
)
|
||||
from ..agent_workspace import load_agent_workspace_config
|
||||
from ..skills_manager import SkillsManager
|
||||
|
||||
# Team infrastructure imports (graceful import - may not exist yet)
|
||||
try:
|
||||
from backend.agents.team.messenger import AgentMessenger
|
||||
from backend.agents.team.task_delegator import TaskDelegator
|
||||
TEAM_INFRA_AVAILABLE = True
|
||||
except ImportError:
|
||||
TEAM_INFRA_AVAILABLE = False
|
||||
AgentMessenger = None
|
||||
TaskDelegator = None
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from agentscope.formatter import FormatterBase
|
||||
from agentscope.model import ModelWrapperBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EvoAgent(ToolGuardMixin, ReActAgent):
|
||||
"""EvoAgent with integrated tools, skills, and memory management.
|
||||
|
||||
This agent extends ReActAgent with:
|
||||
- Workspace-driven configuration from AGENTS.md/SOUL.md/PROFILE.md/etc.
|
||||
- Dynamic skill loading from skills/active directories
|
||||
- Tool-guard security interception (via ToolGuardMixin)
|
||||
- Hook system for extensibility (bootstrap, memory compaction)
|
||||
- Runtime skill and prompt reloading
|
||||
|
||||
MRO note
|
||||
~~~~~~~~
|
||||
``ToolGuardMixin`` overrides ``_acting`` and ``_reasoning`` via
|
||||
Python's MRO: EvoAgent → ToolGuardMixin → ReActAgent.
|
||||
|
||||
Example:
|
||||
agent = EvoAgent(
|
||||
agent_id="fundamentals_analyst",
|
||||
config_name="smoke_fullstack",
|
||||
workspace_dir=Path("runs/smoke_fullstack/agents/fundamentals_analyst"),
|
||||
model=model_instance,
|
||||
formatter=formatter_instance,
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
agent_id: str,
|
||||
config_name: str,
|
||||
workspace_dir: Path,
|
||||
model: "ModelWrapperBase",
|
||||
formatter: "FormatterBase",
|
||||
skills_manager: Optional[SkillsManager] = None,
|
||||
sys_prompt: Optional[str] = None,
|
||||
max_iters: int = 10,
|
||||
memory: Optional[Any] = None,
|
||||
enable_tool_guard: bool = True,
|
||||
enable_bootstrap_hook: bool = True,
|
||||
enable_memory_compaction: bool = False,
|
||||
memory_manager: Optional[Any] = None,
|
||||
memory_compact_threshold: Optional[int] = None,
|
||||
env_context: Optional[str] = None,
|
||||
prompt_files: Optional[List[str]] = None,
|
||||
):
|
||||
"""Initialize EvoAgent.
|
||||
|
||||
Args:
|
||||
agent_id: Unique identifier for this agent
|
||||
config_name: Run configuration name (e.g., "smoke_fullstack")
|
||||
workspace_dir: Agent workspace directory containing markdown files
|
||||
model: LLM model instance
|
||||
formatter: Message formatter instance
|
||||
skills_manager: Optional SkillsManager instance
|
||||
sys_prompt: Optional override for system prompt
|
||||
max_iters: Maximum reasoning-acting iterations
|
||||
memory: Optional memory instance (defaults to InMemoryMemory)
|
||||
enable_tool_guard: Enable tool-guard security interception
|
||||
enable_bootstrap_hook: Enable bootstrap guidance on first interaction
|
||||
enable_memory_compaction: Enable automatic memory compaction
|
||||
memory_manager: Optional memory manager for compaction
|
||||
memory_compact_threshold: Token threshold for memory compaction
|
||||
env_context: Optional environment context to prepend to system prompt
|
||||
prompt_files: List of markdown files to load (defaults to standard set)
|
||||
"""
|
||||
self.agent_id = agent_id
|
||||
self.config_name = config_name
|
||||
self.workspace_dir = Path(workspace_dir)
|
||||
self._skills_manager = skills_manager or SkillsManager()
|
||||
self._env_context = env_context
|
||||
self._prompt_files = prompt_files
|
||||
|
||||
# Initialize tool guard
|
||||
if enable_tool_guard:
|
||||
self._init_tool_guard()
|
||||
|
||||
# Load agent configuration from workspace
|
||||
self._agent_config = self._load_agent_config()
|
||||
|
||||
# Build or use provided system prompt
|
||||
if sys_prompt is not None:
|
||||
self._sys_prompt = sys_prompt
|
||||
else:
|
||||
self._sys_prompt = self._build_system_prompt()
|
||||
|
||||
# Create toolkit with skills
|
||||
toolkit = self._create_toolkit()
|
||||
|
||||
# Initialize hook manager
|
||||
self._hook_manager = HookManager()
|
||||
|
||||
# Initialize parent ReActAgent
|
||||
super().__init__(
|
||||
name=agent_id,
|
||||
model=model,
|
||||
sys_prompt=self._sys_prompt,
|
||||
toolkit=toolkit,
|
||||
memory=memory or InMemoryMemory(),
|
||||
formatter=formatter,
|
||||
max_iters=max_iters,
|
||||
)
|
||||
|
||||
# Register hooks
|
||||
self._register_hooks(
|
||||
enable_bootstrap=enable_bootstrap_hook,
|
||||
enable_memory_compaction=enable_memory_compaction,
|
||||
memory_manager=memory_manager,
|
||||
memory_compact_threshold=memory_compact_threshold,
|
||||
)
|
||||
|
||||
# Initialize team infrastructure if available
|
||||
self._messenger: Optional["AgentMessenger"] = None
|
||||
self._task_delegator: Optional["TaskDelegator"] = None
|
||||
if TEAM_INFRA_AVAILABLE:
|
||||
self._init_team_infrastructure()
|
||||
|
||||
logger.info(
|
||||
"EvoAgent initialized: %s (workspace: %s)",
|
||||
agent_id,
|
||||
workspace_dir,
|
||||
)
|
||||
|
||||
def _load_agent_config(self) -> Dict[str, Any]:
|
||||
"""Load agent configuration from workspace.
|
||||
|
||||
Returns:
|
||||
Agent configuration dictionary
|
||||
"""
|
||||
config_path = self.workspace_dir / "agent.yaml"
|
||||
if config_path.exists():
|
||||
loaded = load_agent_workspace_config(config_path)
|
||||
return dict(loaded.values)
|
||||
return {}
|
||||
|
||||
def _build_system_prompt(self) -> str:
|
||||
"""Build system prompt from workspace markdown files.
|
||||
|
||||
Uses PromptBuilder to load and combine AGENTS.md, SOUL.md,
|
||||
PROFILE.md, and other configured files.
|
||||
|
||||
Returns:
|
||||
Complete system prompt string
|
||||
"""
|
||||
prompt = build_system_prompt_from_workspace(
|
||||
workspace_dir=self.workspace_dir,
|
||||
enabled_files=self._prompt_files,
|
||||
agent_id=self.agent_id,
|
||||
extra_context=self._env_context,
|
||||
)
|
||||
return prompt
|
||||
|
||||
def _create_toolkit(self) -> Toolkit:
|
||||
"""Create and populate toolkit with agent skills.
|
||||
|
||||
Loads skills from the agent's active skills directory and
|
||||
registers them with the toolkit.
|
||||
|
||||
Returns:
|
||||
Configured Toolkit instance
|
||||
"""
|
||||
toolkit = Toolkit(
|
||||
agent_skill_instruction=(
|
||||
"<system-info>You have access to specialized skills. "
|
||||
"Each skill lives in a directory and is described by SKILL.md. "
|
||||
"Follow the skill instructions when they are relevant to the current task."
|
||||
"</system-info>"
|
||||
),
|
||||
agent_skill_template="- {name} (dir: {dir}): {description}",
|
||||
)
|
||||
|
||||
# Register skills from active directory
|
||||
active_skills_dir = self._skills_manager.get_agent_active_root(
|
||||
self.config_name,
|
||||
self.agent_id,
|
||||
)
|
||||
|
||||
if active_skills_dir.exists():
|
||||
for skill_dir in sorted(active_skills_dir.iterdir()):
|
||||
if skill_dir.is_dir() and (skill_dir / "SKILL.md").exists():
|
||||
try:
|
||||
toolkit.register_agent_skill(str(skill_dir))
|
||||
logger.debug("Registered skill: %s", skill_dir.name)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to register skill '%s': %s",
|
||||
skill_dir.name,
|
||||
e,
|
||||
)
|
||||
|
||||
return toolkit
|
||||
|
||||
def _register_hooks(
|
||||
self,
|
||||
enable_bootstrap: bool,
|
||||
enable_memory_compaction: bool,
|
||||
memory_manager: Optional[Any],
|
||||
memory_compact_threshold: Optional[int],
|
||||
) -> None:
|
||||
"""Register agent hooks.
|
||||
|
||||
Args:
|
||||
enable_bootstrap: Enable bootstrap hook
|
||||
enable_memory_compaction: Enable memory compaction hook
|
||||
memory_manager: Memory manager instance
|
||||
memory_compact_threshold: Token threshold for compaction
|
||||
"""
|
||||
# Bootstrap hook - checks BOOTSTRAP.md on first interaction
|
||||
if enable_bootstrap:
|
||||
bootstrap_hook = BootstrapHook(
|
||||
workspace_dir=self.workspace_dir,
|
||||
language="zh",
|
||||
)
|
||||
self._hook_manager.register(
|
||||
hook_type=HOOK_PRE_REASONING,
|
||||
hook_name="bootstrap",
|
||||
hook=bootstrap_hook,
|
||||
)
|
||||
logger.debug("Registered bootstrap hook")
|
||||
|
||||
# Memory compaction hook
|
||||
if enable_memory_compaction and memory_manager is not None:
|
||||
compaction_hook = MemoryCompactionHook(
|
||||
memory_manager=memory_manager,
|
||||
memory_compact_threshold=memory_compact_threshold,
|
||||
)
|
||||
self._hook_manager.register(
|
||||
hook_type=HOOK_PRE_REASONING,
|
||||
hook_name="memory_compaction",
|
||||
hook=compaction_hook,
|
||||
)
|
||||
logger.debug("Registered memory compaction hook")
|
||||
|
||||
# Workspace watch hook - auto-reload markdown files on change
|
||||
workspace_watch_hook = WorkspaceWatchHook(
|
||||
workspace_dir=self.workspace_dir,
|
||||
)
|
||||
self._hook_manager.register(
|
||||
hook_type=HOOK_PRE_REASONING,
|
||||
hook_name="workspace_watch",
|
||||
hook=workspace_watch_hook,
|
||||
)
|
||||
logger.debug("Registered workspace watch hook")
|
||||
|
||||
async def _reasoning(self, **kwargs) -> Msg:
|
||||
"""Override reasoning to execute pre-reasoning hooks.
|
||||
|
||||
Args:
|
||||
**kwargs: Arguments for reasoning
|
||||
|
||||
Returns:
|
||||
Response message
|
||||
"""
|
||||
# Execute pre-reasoning hooks
|
||||
kwargs = await self._hook_manager.execute(
|
||||
hook_type=HOOK_PRE_REASONING,
|
||||
agent=self,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
|
||||
# Call parent (which may be ToolGuardMixin's _reasoning)
|
||||
return await super()._reasoning(**kwargs)
|
||||
|
||||
def reload_skills(self, active_skill_dirs: Optional[List[Path]] = None) -> None:
|
||||
"""Reload skills at runtime.
|
||||
|
||||
Rebuilds the toolkit with current skills from the active directory.
|
||||
|
||||
Args:
|
||||
active_skill_dirs: Optional list of specific skill directories to load
|
||||
"""
|
||||
logger.info("Reloading skills for agent: %s", self.agent_id)
|
||||
|
||||
# Create new toolkit
|
||||
new_toolkit = Toolkit(
|
||||
agent_skill_instruction=(
|
||||
"<system-info>You have access to specialized skills. "
|
||||
"Each skill lives in a directory and is described by SKILL.md. "
|
||||
"Follow the skill instructions when they are relevant to the current task."
|
||||
"</system-info>"
|
||||
),
|
||||
agent_skill_template="- {name} (dir: {dir}): {description}",
|
||||
)
|
||||
|
||||
# Register skills
|
||||
if active_skill_dirs is None:
|
||||
active_skills_dir = self._skills_manager.get_agent_active_root(
|
||||
self.config_name,
|
||||
self.agent_id,
|
||||
)
|
||||
if active_skills_dir.exists():
|
||||
active_skill_dirs = [
|
||||
d for d in active_skills_dir.iterdir()
|
||||
if d.is_dir() and (d / "SKILL.md").exists()
|
||||
]
|
||||
else:
|
||||
active_skill_dirs = []
|
||||
|
||||
for skill_dir in active_skill_dirs:
|
||||
if skill_dir.exists() and (skill_dir / "SKILL.md").exists():
|
||||
try:
|
||||
new_toolkit.register_agent_skill(str(skill_dir))
|
||||
logger.debug("Reloaded skill: %s", skill_dir.name)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to reload skill '%s': %s",
|
||||
skill_dir.name,
|
||||
e,
|
||||
)
|
||||
|
||||
# Replace toolkit
|
||||
self.toolkit = new_toolkit
|
||||
logger.info("Skills reloaded for agent: %s", self.agent_id)
|
||||
|
||||
def rebuild_sys_prompt(self) -> None:
|
||||
"""Rebuild and replace the system prompt at runtime.
|
||||
|
||||
Useful after updating AGENTS.md, SOUL.md, PROFILE.md, etc.
|
||||
to ensure the prompt reflects the latest configuration.
|
||||
|
||||
Updates both self._sys_prompt and the first system-role
|
||||
message stored in self.memory.content.
|
||||
"""
|
||||
logger.info("Rebuilding system prompt for agent: %s", self.agent_id)
|
||||
|
||||
# Reload agent config in case it changed
|
||||
self._agent_config = self._load_agent_config()
|
||||
|
||||
# Rebuild prompt
|
||||
self._sys_prompt = self._build_system_prompt()
|
||||
|
||||
# Update memory if system message exists
|
||||
if hasattr(self, "memory") and self.memory.content:
|
||||
for msg, _marks in self.memory.content:
|
||||
if getattr(msg, "role", None) == "system":
|
||||
msg.content = self._sys_prompt
|
||||
logger.debug("Updated system message in memory")
|
||||
break
|
||||
|
||||
logger.info("System prompt rebuilt for agent: %s", self.agent_id)
|
||||
|
||||
async def reply(
|
||||
self,
|
||||
msg: Msg | List[Msg] | None = None,
|
||||
structured_model: Optional[Type[Any]] = None,
|
||||
) -> Msg:
|
||||
"""Process a message and return a response.
|
||||
|
||||
Args:
|
||||
msg: Input message(s) from user
|
||||
structured_model: Optional pydantic model for structured output
|
||||
|
||||
Returns:
|
||||
Response message
|
||||
"""
|
||||
# Handle list of messages
|
||||
if isinstance(msg, list):
|
||||
# Process each message in sequence
|
||||
for m in msg[:-1]:
|
||||
await self.memory.add(m)
|
||||
msg = msg[-1] if msg else None
|
||||
|
||||
return await super().reply(msg=msg, structured_model=structured_model)
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Get agent information.
|
||||
|
||||
Returns:
|
||||
Dictionary with agent metadata
|
||||
"""
|
||||
return {
|
||||
"agent_id": self.agent_id,
|
||||
"config_name": self.config_name,
|
||||
"workspace_dir": str(self.workspace_dir),
|
||||
"skills_count": len([
|
||||
s for s in self._skills_manager.list_active_skill_metadata(
|
||||
self.config_name,
|
||||
self.agent_id,
|
||||
)
|
||||
]),
|
||||
"registered_hooks": self._hook_manager.list_hooks(),
|
||||
"team_infra_available": TEAM_INFRA_AVAILABLE,
|
||||
}
|
||||
|
||||
def _init_team_infrastructure(self) -> None:
|
||||
"""Initialize team infrastructure components (messenger and task delegator).
|
||||
|
||||
This method initializes the AgentMessenger for inter-agent communication
|
||||
and the TaskDelegator for subagent delegation.
|
||||
"""
|
||||
if not TEAM_INFRA_AVAILABLE:
|
||||
return
|
||||
|
||||
try:
|
||||
self._messenger = AgentMessenger(agent_id=self.agent_id)
|
||||
self._task_delegator = TaskDelegator(agent=self)
|
||||
logger.debug(
|
||||
"Team infrastructure initialized for agent: %s",
|
||||
self.agent_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to initialize team infrastructure for %s: %s",
|
||||
self.agent_id,
|
||||
e,
|
||||
)
|
||||
self._messenger = None
|
||||
self._task_delegator = None
|
||||
|
||||
@property
|
||||
def messenger(self) -> Optional["AgentMessenger"]:
|
||||
"""Get the agent's messenger for inter-agent communication.
|
||||
|
||||
Returns:
|
||||
AgentMessenger instance if available, None otherwise
|
||||
"""
|
||||
return self._messenger
|
||||
|
||||
async def delegate_task(
|
||||
self,
|
||||
task_type: str,
|
||||
task_data: Dict[str, Any],
|
||||
target_agent: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Delegate a task to a subagent using the TaskDelegator.
|
||||
|
||||
Args:
|
||||
task_type: Type of task to delegate
|
||||
task_data: Data/payload for the task
|
||||
target_agent: Optional specific agent ID to delegate to
|
||||
|
||||
Returns:
|
||||
Dict containing the delegation result
|
||||
"""
|
||||
if not TEAM_INFRA_AVAILABLE or self._task_delegator is None:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Team infrastructure not available",
|
||||
}
|
||||
|
||||
try:
|
||||
return await self._task_delegator.delegate_task(
|
||||
task_type=task_type,
|
||||
task_data=task_data,
|
||||
target_agent=target_agent,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Task delegation failed for %s: %s",
|
||||
self.agent_id,
|
||||
e,
|
||||
)
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
__all__ = ["EvoAgent"]
|
||||
613
backend/agents/base/hooks.py
Normal file
613
backend/agents/base/hooks.py
Normal file
@@ -0,0 +1,613 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Hook system for EvoAgent.
|
||||
|
||||
Provides pre_reasoning and post_acting hooks with built-in implementations:
|
||||
- BootstrapHook: First-time setup guidance
|
||||
- MemoryCompactionHook: Automatic memory compression
|
||||
|
||||
Based on CoPaw's hooks design.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from agentscope.agent import ReActAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Hook types
|
||||
HookType = str
|
||||
HOOK_PRE_REASONING: HookType = "pre_reasoning"
|
||||
HOOK_POST_ACTING: HookType = "post_acting"
|
||||
|
||||
|
||||
class Hook(ABC):
|
||||
"""Abstract base class for agent hooks."""
|
||||
|
||||
@abstractmethod
|
||||
async def __call__(
|
||||
self,
|
||||
agent: "ReActAgent",
|
||||
kwargs: Dict[str, Any],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Execute the hook.
|
||||
|
||||
Args:
|
||||
agent: The agent instance
|
||||
kwargs: Input arguments to the method being hooked
|
||||
|
||||
Returns:
|
||||
Modified kwargs or None to use original
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class HookManager:
|
||||
"""Manages agent hooks.
|
||||
|
||||
Provides registration and execution of hooks for different
|
||||
lifecycle events in the agent's operation.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._hooks: Dict[HookType, List[tuple[str, Hook]]] = {
|
||||
HOOK_PRE_REASONING: [],
|
||||
HOOK_POST_ACTING: [],
|
||||
}
|
||||
|
||||
def register(
|
||||
self,
|
||||
hook_type: HookType,
|
||||
hook_name: str,
|
||||
hook: Hook | Callable,
|
||||
) -> None:
|
||||
"""Register a hook.
|
||||
|
||||
Args:
|
||||
hook_type: Type of hook (pre_reasoning, post_acting)
|
||||
hook_name: Unique name for this hook
|
||||
hook: Hook instance or callable
|
||||
"""
|
||||
# Remove existing hook with same name
|
||||
self._hooks[hook_type] = [
|
||||
(name, h) for name, h in self._hooks[hook_type] if name != hook_name
|
||||
]
|
||||
self._hooks[hook_type].append((hook_name, hook))
|
||||
logger.debug("Registered hook '%s' for type '%s'", hook_name, hook_type)
|
||||
|
||||
def unregister(self, hook_type: HookType, hook_name: str) -> bool:
|
||||
"""Unregister a hook.
|
||||
|
||||
Args:
|
||||
hook_type: Type of hook
|
||||
hook_name: Name of the hook to remove
|
||||
|
||||
Returns:
|
||||
True if hook was found and removed
|
||||
"""
|
||||
original_len = len(self._hooks[hook_type])
|
||||
self._hooks[hook_type] = [
|
||||
(name, h) for name, h in self._hooks[hook_type] if name != hook_name
|
||||
]
|
||||
removed = len(self._hooks[hook_type]) < original_len
|
||||
if removed:
|
||||
logger.debug("Unregistered hook '%s' from type '%s'", hook_name, hook_type)
|
||||
return removed
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
hook_type: HookType,
|
||||
agent: "ReActAgent",
|
||||
kwargs: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""Execute all hooks of a given type.
|
||||
|
||||
Args:
|
||||
hook_type: Type of hooks to execute
|
||||
agent: The agent instance
|
||||
kwargs: Input arguments
|
||||
|
||||
Returns:
|
||||
Potentially modified kwargs
|
||||
"""
|
||||
for name, hook in self._hooks[hook_type]:
|
||||
try:
|
||||
result = await hook(agent, kwargs)
|
||||
if result is not None:
|
||||
kwargs = result
|
||||
except Exception as e:
|
||||
logger.error("Hook '%s' failed: %s", name, e, exc_info=True)
|
||||
|
||||
return kwargs
|
||||
|
||||
def list_hooks(self, hook_type: Optional[HookType] = None) -> List[str]:
|
||||
"""List registered hook names.
|
||||
|
||||
Args:
|
||||
hook_type: Optional type to filter by
|
||||
|
||||
Returns:
|
||||
List of hook names
|
||||
"""
|
||||
if hook_type:
|
||||
return [name for name, _ in self._hooks.get(hook_type, [])]
|
||||
|
||||
names = []
|
||||
for hooks in self._hooks.values():
|
||||
names.extend([name for name, _ in hooks])
|
||||
return names
|
||||
|
||||
|
||||
class BootstrapHook(Hook):
|
||||
"""Hook for bootstrap guidance on first user interaction.
|
||||
|
||||
This hook looks for a BOOTSTRAP.md file in the working directory
|
||||
and if found, prepends guidance to the first user message to help
|
||||
establish the agent's identity and user preferences.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
workspace_dir: Path,
|
||||
language: str = "zh",
|
||||
):
|
||||
"""Initialize bootstrap hook.
|
||||
|
||||
Args:
|
||||
workspace_dir: Working directory containing BOOTSTRAP.md
|
||||
language: Language code for bootstrap guidance (en/zh)
|
||||
"""
|
||||
self.workspace_dir = Path(workspace_dir)
|
||||
self.language = language
|
||||
self._completed_flag = self.workspace_dir / ".bootstrap_completed"
|
||||
|
||||
def _is_first_user_interaction(self, agent: "ReActAgent") -> bool:
|
||||
"""Check if this is the first user interaction.
|
||||
|
||||
Args:
|
||||
agent: The agent instance
|
||||
|
||||
Returns:
|
||||
True if first user interaction
|
||||
"""
|
||||
if not hasattr(agent, "memory") or not agent.memory.content:
|
||||
return True
|
||||
|
||||
# Count user messages (excluding system)
|
||||
user_count = sum(
|
||||
1 for msg, _ in agent.memory.content if msg.role == "user"
|
||||
)
|
||||
return user_count <= 1
|
||||
|
||||
def _build_bootstrap_guidance(self) -> str:
|
||||
"""Build bootstrap guidance message.
|
||||
|
||||
Returns:
|
||||
Formatted bootstrap guidance
|
||||
"""
|
||||
if self.language == "zh":
|
||||
return (
|
||||
"# 引导模式\n"
|
||||
"\n"
|
||||
"工作目录中存在 `BOOTSTRAP.md` — 首次设置。\n"
|
||||
"\n"
|
||||
"1. 阅读 BOOTSTRAP.md,友好地表示初次见面,"
|
||||
"引导用户完成设置。\n"
|
||||
"2. 按照 BOOTSTRAP.md 的指示,"
|
||||
"帮助用户定义你的身份和偏好。\n"
|
||||
"3. 按指南创建/更新必要文件"
|
||||
"(PROFILE.md、MEMORY.md 等)。\n"
|
||||
"4. 完成后删除 BOOTSTRAP.md。\n"
|
||||
"\n"
|
||||
"如果用户希望跳过,直接回答下面的问题即可。\n"
|
||||
"\n"
|
||||
"---\n"
|
||||
"\n"
|
||||
)
|
||||
|
||||
return (
|
||||
"# BOOTSTRAP MODE\n"
|
||||
"\n"
|
||||
"`BOOTSTRAP.md` exists — first-time setup.\n"
|
||||
"\n"
|
||||
"1. Read BOOTSTRAP.md, greet the user, "
|
||||
"and guide them through setup.\n"
|
||||
"2. Follow BOOTSTRAP.md instructions "
|
||||
"to define identity and preferences.\n"
|
||||
"3. Create/update files "
|
||||
"(PROFILE.md, MEMORY.md, etc.) as described.\n"
|
||||
"4. Delete BOOTSTRAP.md when done.\n"
|
||||
"\n"
|
||||
"If the user wants to skip, answer their "
|
||||
"question directly instead.\n"
|
||||
"\n"
|
||||
"---\n"
|
||||
"\n"
|
||||
)
|
||||
|
||||
async def __call__(
|
||||
self,
|
||||
agent: "ReActAgent",
|
||||
kwargs: Dict[str, Any],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Check and load BOOTSTRAP.md on first user interaction.
|
||||
|
||||
Args:
|
||||
agent: The agent instance
|
||||
kwargs: Input arguments to the _reasoning method
|
||||
|
||||
Returns:
|
||||
None (hook doesn't modify kwargs)
|
||||
"""
|
||||
try:
|
||||
bootstrap_path = self.workspace_dir / "BOOTSTRAP.md"
|
||||
|
||||
# Check if bootstrap has already been triggered
|
||||
if self._completed_flag.exists():
|
||||
return None
|
||||
|
||||
if not bootstrap_path.exists():
|
||||
return None
|
||||
|
||||
if not self._is_first_user_interaction(agent):
|
||||
return None
|
||||
|
||||
bootstrap_guidance = self._build_bootstrap_guidance()
|
||||
|
||||
logger.debug("Found BOOTSTRAP.md [%s], prepending guidance", self.language)
|
||||
|
||||
# Prepend to first user message in memory
|
||||
if hasattr(agent, "memory") and agent.memory.content:
|
||||
system_count = sum(
|
||||
1 for msg, _ in agent.memory.content if msg.role == "system"
|
||||
)
|
||||
for msg, _ in agent.memory.content[system_count:]:
|
||||
if msg.role == "user":
|
||||
# Prepend guidance to message content
|
||||
original_content = msg.content
|
||||
msg.content = bootstrap_guidance + original_content
|
||||
break
|
||||
|
||||
logger.debug("Bootstrap guidance prepended to first user message")
|
||||
|
||||
# Create completion flag to prevent repeated triggering
|
||||
self._completed_flag.touch()
|
||||
logger.debug("Created bootstrap completion flag")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to process bootstrap: %s", e, exc_info=True)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class WorkspaceWatchHook(Hook):
|
||||
"""Hook for auto-reloading workspace markdown files on change.
|
||||
|
||||
Monitors SOUL.md, AGENTS.md, PROFILE.md, etc. and triggers
|
||||
a prompt rebuild when any of them change. Based on CoPaw's
|
||||
AgentConfigWatcher approach but for markdown files.
|
||||
"""
|
||||
|
||||
# Files to monitor (same as PromptBuilder.DEFAULT_FILES)
|
||||
WATCHED_FILES = frozenset([
|
||||
"SOUL.md", "AGENTS.md", "PROFILE.md",
|
||||
"POLICY.md", "MEMORY.md",
|
||||
"BOOTSTRAP.md",
|
||||
])
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
workspace_dir: Path,
|
||||
poll_interval: float = 2.0,
|
||||
):
|
||||
"""Initialize workspace watch hook.
|
||||
|
||||
Args:
|
||||
workspace_dir: Workspace directory to monitor
|
||||
poll_interval: How often to check for changes (seconds)
|
||||
"""
|
||||
self.workspace_dir = Path(workspace_dir)
|
||||
self.poll_interval = poll_interval
|
||||
self._last_mtimes: dict[str, float] = {}
|
||||
self._initialized = False
|
||||
|
||||
def _scan_mtimes(self) -> dict[str, float]:
|
||||
"""Scan watched files and return their current mtimes."""
|
||||
mtimes = {}
|
||||
for name in self.WATCHED_FILES:
|
||||
path = self.workspace_dir / name
|
||||
if path.exists():
|
||||
mtimes[name] = path.stat().st_mtime
|
||||
return mtimes
|
||||
|
||||
def _has_changes(self) -> bool:
|
||||
"""Check if any watched file has changed since last check."""
|
||||
current = self._scan_mtimes()
|
||||
|
||||
if not self._initialized:
|
||||
self._last_mtimes = current
|
||||
self._initialized = True
|
||||
return False
|
||||
|
||||
# Check for new, modified, or deleted files
|
||||
if set(current.keys()) != set(self._last_mtimes.keys()):
|
||||
self._last_mtimes = current
|
||||
return True
|
||||
|
||||
for name, mtime in current.items():
|
||||
if mtime != self._last_mtimes.get(name):
|
||||
self._last_mtimes = current
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def __call__(
|
||||
self,
|
||||
agent: "ReActAgent",
|
||||
kwargs: Dict[str, Any],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Check for file changes and rebuild prompt if needed.
|
||||
|
||||
Args:
|
||||
agent: The agent instance
|
||||
kwargs: Input arguments (unused)
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
try:
|
||||
if self._has_changes():
|
||||
logger.info(
|
||||
"Workspace files changed, triggering prompt rebuild for: %s",
|
||||
getattr(agent, "agent_id", "unknown"),
|
||||
)
|
||||
if hasattr(agent, "rebuild_sys_prompt"):
|
||||
agent.rebuild_sys_prompt()
|
||||
else:
|
||||
logger.warning(
|
||||
"Agent %s has no rebuild_sys_prompt method",
|
||||
getattr(agent, "agent_id", "unknown"),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Workspace watch hook failed: %s", e, exc_info=True)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class MemoryCompactionHook(Hook):
|
||||
"""Hook for automatic memory compaction when context is full.
|
||||
|
||||
This hook monitors the token count of messages and triggers compaction
|
||||
when it exceeds the threshold. It preserves the system prompt and recent
|
||||
messages while summarizing older conversation history.
|
||||
|
||||
Based on CoPaw's memory compaction design with additional improvements:
|
||||
- memory_compact_ratio: Ratio to compact when threshold reached
|
||||
- memory_reserve_ratio: Always keep a reserve of tokens for recent messages
|
||||
- enable_tool_result_compact: Compact tool results separately
|
||||
- tool_result_compact_keep_n: Number of tool results to keep
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
memory_manager: Any,
|
||||
memory_compact_threshold: Optional[int] = None,
|
||||
memory_compact_ratio: float = 0.75,
|
||||
memory_reserve_ratio: float = 0.1,
|
||||
enable_tool_result_compact: bool = False,
|
||||
tool_result_compact_keep_n: int = 5,
|
||||
):
|
||||
"""Initialize memory compaction hook.
|
||||
|
||||
Args:
|
||||
memory_manager: Memory manager instance for compaction
|
||||
memory_compact_threshold: Token threshold for compaction
|
||||
memory_compact_ratio: Target ratio to compact to (e.g., 0.75 = compact to 75%)
|
||||
memory_reserve_ratio: Reserve ratio to always keep free (e.g., 0.1 = 10%)
|
||||
enable_tool_result_compact: Enable tool result compaction
|
||||
tool_result_compact_keep_n: Number of tool results to keep
|
||||
"""
|
||||
self.memory_manager = memory_manager
|
||||
self.memory_compact_threshold = memory_compact_threshold
|
||||
self.memory_compact_ratio = memory_compact_ratio
|
||||
self.memory_reserve_ratio = memory_reserve_ratio
|
||||
self.enable_tool_result_compact = enable_tool_result_compact
|
||||
self.tool_result_compact_keep_n = tool_result_compact_keep_n
|
||||
|
||||
async def __call__(
|
||||
self,
|
||||
agent: "ReActAgent",
|
||||
kwargs: Dict[str, Any],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Pre-reasoning hook to check and compact memory if needed.
|
||||
|
||||
Args:
|
||||
agent: The agent instance
|
||||
kwargs: Input arguments to the _reasoning method
|
||||
|
||||
Returns:
|
||||
None (hook doesn't modify kwargs)
|
||||
"""
|
||||
try:
|
||||
if not hasattr(agent, "memory") or not self.memory_manager:
|
||||
return None
|
||||
|
||||
memory = agent.memory
|
||||
|
||||
# Get current token count estimate
|
||||
messages = await memory.get_memory()
|
||||
total_tokens = self._estimate_tokens(messages)
|
||||
|
||||
if self.memory_compact_threshold is None:
|
||||
return None
|
||||
|
||||
if total_tokens < self.memory_compact_threshold:
|
||||
return None
|
||||
|
||||
logger.info(
|
||||
"Memory compaction triggered: %d tokens (threshold: %d)",
|
||||
total_tokens,
|
||||
self.memory_compact_threshold,
|
||||
)
|
||||
|
||||
# Compact memory
|
||||
await self._compact_memory(agent, messages)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to compact memory: %s", e, exc_info=True)
|
||||
|
||||
return None
|
||||
|
||||
def _estimate_tokens(self, messages: List[Any]) -> int:
|
||||
"""Estimate token count for messages.
|
||||
|
||||
Args:
|
||||
messages: List of messages
|
||||
|
||||
Returns:
|
||||
Estimated token count
|
||||
"""
|
||||
# Simple estimation: ~4 chars per token
|
||||
total_chars = sum(
|
||||
len(str(getattr(msg, "content", "")))
|
||||
for msg in messages
|
||||
)
|
||||
return total_chars // 4
|
||||
|
||||
async def _compact_memory(
|
||||
self,
|
||||
agent: "ReActAgent",
|
||||
messages: List[Any],
|
||||
) -> None:
|
||||
"""Compact memory by summarizing older messages.
|
||||
|
||||
Uses CoPaw-style memory management:
|
||||
- memory_compact_ratio: Target ratio to compact to (e.g., 0.75 means compact to 75%)
|
||||
- memory_reserve_ratio: Always keep this ratio free (e.g., 0.1 means keep 10% for recent)
|
||||
|
||||
Args:
|
||||
agent: The agent instance
|
||||
messages: Current messages in memory
|
||||
"""
|
||||
if self.memory_compact_threshold is None:
|
||||
return
|
||||
|
||||
# Estimate total tokens
|
||||
total_tokens = self._estimate_tokens(messages)
|
||||
|
||||
# Calculate reserve based on ratio (CoPaw-style)
|
||||
reserve_tokens = int(total_tokens * self.memory_reserve_ratio)
|
||||
|
||||
# Calculate target tokens after compaction
|
||||
target_tokens = int(total_tokens * self.memory_compact_ratio)
|
||||
target_tokens = max(target_tokens, total_tokens - reserve_tokens)
|
||||
|
||||
# Find messages to compact (older ones)
|
||||
# Keep recent messages that fit within target
|
||||
messages_to_compact = []
|
||||
kept_tokens = 0
|
||||
|
||||
# Start from oldest, stop when we've kept enough
|
||||
for msg in messages:
|
||||
msg_tokens = self._estimate_tokens([msg])
|
||||
if kept_tokens + msg_tokens > target_tokens:
|
||||
messages_to_compact.append(msg)
|
||||
else:
|
||||
kept_tokens += msg_tokens
|
||||
|
||||
if not messages_to_compact:
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Compacting %d messages (%d tokens) to target %d tokens",
|
||||
len(messages_to_compact),
|
||||
self._estimate_tokens(messages_to_compact),
|
||||
target_tokens,
|
||||
)
|
||||
|
||||
# Use memory manager to compact if available
|
||||
if hasattr(self.memory_manager, "compact_memory"):
|
||||
try:
|
||||
summary = await self.memory_manager.compact_memory(
|
||||
messages=messages_to_compact,
|
||||
)
|
||||
logger.info(
|
||||
"Memory compacted: %d messages summarized, summary: %s",
|
||||
len(messages_to_compact),
|
||||
summary[:200] if summary else "N/A",
|
||||
)
|
||||
|
||||
# Mark messages as compressed if supported
|
||||
if hasattr(agent.memory, "update_messages_mark"):
|
||||
from agentscope.agent._react_agent import _MemoryMark
|
||||
await agent.memory.update_messages_mark(
|
||||
new_mark=_MemoryMark.COMPRESSED,
|
||||
msg_ids=[msg.id for msg in messages_to_compact],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Memory manager compaction failed: %s", e)
|
||||
|
||||
# Tool result compaction (CoPaw-style)
|
||||
if self.enable_tool_result_compact:
|
||||
await self._compact_tool_results(agent, messages)
|
||||
|
||||
async def _compact_tool_results(
|
||||
self,
|
||||
agent: "ReActAgent",
|
||||
messages: List[Any],
|
||||
) -> None:
|
||||
"""Compact tool results by keeping only recent ones.
|
||||
|
||||
Based on CoPaw's tool_result_compact_keep_n pattern.
|
||||
Tool results can be very verbose, so we keep only the N most recent ones.
|
||||
|
||||
Args:
|
||||
agent: The agent instance
|
||||
messages: Current messages in memory
|
||||
"""
|
||||
if not hasattr(agent.memory, "content"):
|
||||
return
|
||||
|
||||
# Find tool result messages (usually have "tool" role or tool_related content)
|
||||
tool_results = []
|
||||
for msg, _ in agent.memory.content:
|
||||
if hasattr(msg, "role") and msg.role == "tool":
|
||||
tool_results.append(msg)
|
||||
|
||||
if len(tool_results) <= self.tool_result_compact_keep_n:
|
||||
return
|
||||
|
||||
# Keep only the most recent N tool results
|
||||
excess_results = tool_results[:-self.tool_result_compact_keep_n]
|
||||
|
||||
logger.info(
|
||||
"Tool result compaction: %d tool results found, keeping %d, compacting %d",
|
||||
len(tool_results),
|
||||
self.tool_result_compact_keep_n,
|
||||
len(excess_results),
|
||||
)
|
||||
|
||||
# Mark excess tool results as compressed if supported
|
||||
if hasattr(agent.memory, "update_messages_mark"):
|
||||
from agentscope.agent._react_agent import _MemoryMark
|
||||
await agent.memory.update_messages_mark(
|
||||
new_mark=_MemoryMark.COMPRESSED,
|
||||
msg_ids=[msg.id for msg in excess_results],
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"Hook",
|
||||
"HookManager",
|
||||
"HookType",
|
||||
"HOOK_PRE_REASONING",
|
||||
"HOOK_POST_ACTING",
|
||||
"BootstrapHook",
|
||||
"MemoryCompactionHook",
|
||||
"WorkspaceWatchHook",
|
||||
]
|
||||
489
backend/agents/base/skill_adaptation_hook.py
Normal file
489
backend/agents/base/skill_adaptation_hook.py
Normal file
@@ -0,0 +1,489 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Skill adaptation hook for automatic evaluation-to-iteration闭环.
|
||||
|
||||
Monitors evaluation metrics against configurable thresholds and triggers
|
||||
automatic skill reload or logs warnings when thresholds are breached.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
from .evaluation_hook import (
|
||||
EvaluationCollector,
|
||||
EvaluationResult,
|
||||
MetricType,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AdaptationAction(Enum):
|
||||
"""Actions to take when threshold is breached."""
|
||||
RELOAD = "reload" # 自动重新加载技能
|
||||
WARN = "warn" # 记录警告供人工审核
|
||||
BOTH = "both" # 同时执行重载和警告
|
||||
NONE = "none" # 不做任何操作
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdaptationThreshold:
|
||||
"""Threshold configuration for a metric."""
|
||||
metric_type: MetricType
|
||||
operator: str = "lt" # lt (less than), gt (greater than), lte, gte, eq
|
||||
value: float = 0.0
|
||||
window_size: int = 10 # 移动窗口大小,用于计算滑动平均
|
||||
min_samples: int = 5 # 最少样本数才触发检查
|
||||
action: AdaptationAction = AdaptationAction.WARN
|
||||
cooldown_seconds: int = 300 # 触发后的冷却时间
|
||||
|
||||
def evaluate(self, current_value: float) -> bool:
|
||||
"""Evaluate if threshold is breached."""
|
||||
ops = {
|
||||
"lt": lambda x, y: x < y,
|
||||
"lte": lambda x, y: x <= y,
|
||||
"gt": lambda x, y: x > y,
|
||||
"gte": lambda x, y: x >= y,
|
||||
"eq": lambda x, y: x == y,
|
||||
}
|
||||
op_func = ops.get(self.operator)
|
||||
if op_func is None:
|
||||
logger.warning(f"Unknown operator: {self.operator}")
|
||||
return False
|
||||
return op_func(current_value, self.value)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"metric_type": self.metric_type.value,
|
||||
"operator": self.operator,
|
||||
"value": self.value,
|
||||
"window_size": self.window_size,
|
||||
"min_samples": self.min_samples,
|
||||
"action": self.action.value,
|
||||
"cooldown_seconds": self.cooldown_seconds,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdaptationEvent:
|
||||
"""Record of an adaptation trigger event."""
|
||||
timestamp: str
|
||||
skill_name: str
|
||||
metric_type: MetricType
|
||||
threshold: AdaptationThreshold
|
||||
current_value: float
|
||||
avg_value: float
|
||||
action_taken: AdaptationAction
|
||||
details: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"skill_name": self.skill_name,
|
||||
"metric_type": self.metric_type.value,
|
||||
"threshold": self.threshold.to_dict(),
|
||||
"current_value": self.current_value,
|
||||
"avg_value": self.avg_value,
|
||||
"action_taken": self.action_taken.value,
|
||||
"details": self.details,
|
||||
}
|
||||
|
||||
|
||||
class SkillAdaptationHook:
|
||||
"""Hook for monitoring evaluation metrics and triggering skill adaptation.
|
||||
|
||||
This hook wraps EvaluationHook to add threshold-based adaptation logic.
|
||||
When metrics breach configured thresholds, it can:
|
||||
- Automatically reload skills via SkillsManager
|
||||
- Log warnings for human review
|
||||
- Both
|
||||
"""
|
||||
|
||||
# Default thresholds for common metrics
|
||||
DEFAULT_THRESHOLDS: List[AdaptationThreshold] = [
|
||||
AdaptationThreshold(
|
||||
metric_type=MetricType.HIT_RATE,
|
||||
operator="lt",
|
||||
value=0.5,
|
||||
action=AdaptationAction.WARN,
|
||||
cooldown_seconds=600,
|
||||
),
|
||||
AdaptationThreshold(
|
||||
metric_type=MetricType.RISK_VIOLATION,
|
||||
operator="gt",
|
||||
value=0.1,
|
||||
action=AdaptationAction.WARN,
|
||||
cooldown_seconds=300,
|
||||
),
|
||||
AdaptationThreshold(
|
||||
metric_type=MetricType.DECISION_LATENCY,
|
||||
operator="gt",
|
||||
value=5000, # 5 seconds
|
||||
action=AdaptationAction.WARN,
|
||||
cooldown_seconds=300,
|
||||
),
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_dir: Path,
|
||||
run_id: str,
|
||||
agent_id: str,
|
||||
thresholds: Optional[List[AdaptationThreshold]] = None,
|
||||
collector: Optional[EvaluationCollector] = None,
|
||||
):
|
||||
"""Initialize skill adaptation hook.
|
||||
|
||||
Args:
|
||||
storage_dir: Directory to store adaptation events
|
||||
run_id: Current run identifier
|
||||
agent_id: Current agent identifier
|
||||
thresholds: Custom threshold configurations (uses defaults if None)
|
||||
collector: Optional EvaluationCollector for historical data
|
||||
"""
|
||||
self.storage_dir = Path(storage_dir)
|
||||
self.run_id = run_id
|
||||
self.agent_id = agent_id
|
||||
self.thresholds = thresholds or self.DEFAULT_THRESHOLDS
|
||||
self.collector = collector or EvaluationCollector(storage_dir)
|
||||
|
||||
# Track cooldowns to prevent rapid re-triggering
|
||||
self._cooldowns: Dict[str, datetime] = {}
|
||||
|
||||
# Store recent metrics in memory for quick access
|
||||
self._recent_metrics: Dict[str, List[float]] = {}
|
||||
|
||||
# Pending adaptation events
|
||||
self._pending_events: List[AdaptationEvent] = []
|
||||
|
||||
def check_threshold(
|
||||
self,
|
||||
skill_name: str,
|
||||
metric_type: MetricType,
|
||||
current_value: float,
|
||||
) -> Optional[AdaptationEvent]:
|
||||
"""Check if a metric breaches any threshold.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill
|
||||
metric_type: Type of metric
|
||||
current_value: Current metric value
|
||||
|
||||
Returns:
|
||||
AdaptationEvent if threshold breached, None otherwise
|
||||
"""
|
||||
# Find applicable thresholds
|
||||
applicable_thresholds = [
|
||||
t for t in self.thresholds
|
||||
if t.metric_type == metric_type
|
||||
]
|
||||
|
||||
if not applicable_thresholds:
|
||||
return None
|
||||
|
||||
# Check cooldown
|
||||
cooldown_key = f"{skill_name}:{metric_type.value}"
|
||||
now = datetime.now()
|
||||
last_trigger = self._cooldowns.get(cooldown_key)
|
||||
|
||||
# Store current value first for avg calculation
|
||||
self._store_metric(cooldown_key, current_value)
|
||||
|
||||
for threshold in applicable_thresholds:
|
||||
if last_trigger:
|
||||
elapsed = (now - last_trigger).total_seconds()
|
||||
if elapsed < threshold.cooldown_seconds:
|
||||
continue
|
||||
|
||||
# Evaluate threshold
|
||||
if threshold.evaluate(current_value):
|
||||
# Calculate moving average
|
||||
avg_value = self._calculate_avg(skill_name, metric_type, current_value)
|
||||
|
||||
# Check minimum samples (allow immediate trigger if min_samples <= 1)
|
||||
sample_count = len(self._recent_metrics.get(cooldown_key, []))
|
||||
if threshold.min_samples > 1 and sample_count < threshold.min_samples:
|
||||
# Not enough samples yet
|
||||
continue
|
||||
|
||||
# Trigger adaptation
|
||||
event = AdaptationEvent(
|
||||
timestamp=now.isoformat(),
|
||||
skill_name=skill_name,
|
||||
metric_type=metric_type,
|
||||
threshold=threshold,
|
||||
current_value=current_value,
|
||||
avg_value=avg_value,
|
||||
action_taken=threshold.action,
|
||||
details={
|
||||
"run_id": self.run_id,
|
||||
"agent_id": self.agent_id,
|
||||
},
|
||||
)
|
||||
|
||||
# Update cooldown
|
||||
self._cooldowns[cooldown_key] = now
|
||||
|
||||
# Persist event
|
||||
self._persist_event(event)
|
||||
|
||||
logger.info(
|
||||
f"Threshold breached for {skill_name}.{metric_type.value}: "
|
||||
f"current={current_value}, avg={avg_value}, action={threshold.action.value}"
|
||||
)
|
||||
|
||||
return event
|
||||
|
||||
return None
|
||||
|
||||
def _calculate_avg(
|
||||
self,
|
||||
skill_name: str,
|
||||
metric_type: MetricType,
|
||||
current_value: float,
|
||||
) -> float:
|
||||
"""Calculate moving average for a metric."""
|
||||
key = f"{skill_name}:{metric_type.value}"
|
||||
values = self._recent_metrics.get(key, [])
|
||||
if not values:
|
||||
return current_value
|
||||
return sum(values) / len(values)
|
||||
|
||||
def _store_metric(self, key: str, value: float) -> None:
|
||||
"""Store metric value with sliding window."""
|
||||
if key not in self._recent_metrics:
|
||||
self._recent_metrics[key] = []
|
||||
self._recent_metrics[key].append(value)
|
||||
# Keep only last 100 values
|
||||
if len(self._recent_metrics[key]) > 100:
|
||||
self._recent_metrics[key] = self._recent_metrics[key][-100:]
|
||||
|
||||
def _persist_event(self, event: AdaptationEvent) -> None:
|
||||
"""Persist adaptation event to storage."""
|
||||
run_dir = self.storage_dir / self.run_id / "adaptations"
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
filename = f"{event.skill_name}_{event.metric_type.value}_{timestamp}.json"
|
||||
filepath = run_dir / filename
|
||||
|
||||
try:
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
json.dump(event.to_dict(), f, ensure_ascii=False, indent=2)
|
||||
logger.debug(f"Persisted adaptation event to: {filepath}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to persist adaptation event: {e}")
|
||||
|
||||
# Also add to pending list
|
||||
self._pending_events.append(event)
|
||||
|
||||
def get_pending_warnings(self) -> List[AdaptationEvent]:
|
||||
"""Get all pending warning events that need human review."""
|
||||
return [
|
||||
e for e in self._pending_events
|
||||
if e.action_taken in (AdaptationAction.WARN, AdaptationAction.BOTH)
|
||||
]
|
||||
|
||||
def clear_pending_warnings(self) -> None:
|
||||
"""Clear pending warnings after they have been reviewed."""
|
||||
self._pending_events = [
|
||||
e for e in self._pending_events
|
||||
if e.action_taken == AdaptationAction.RELOAD
|
||||
]
|
||||
|
||||
def get_recent_events(
|
||||
self,
|
||||
skill_name: Optional[str] = None,
|
||||
metric_type: Optional[MetricType] = None,
|
||||
limit: int = 50,
|
||||
) -> List[AdaptationEvent]:
|
||||
"""Get recent adaptation events.
|
||||
|
||||
Args:
|
||||
skill_name: Optional filter by skill name
|
||||
metric_type: Optional filter by metric type
|
||||
limit: Maximum number of events to return
|
||||
|
||||
Returns:
|
||||
List of recent adaptation events
|
||||
"""
|
||||
events_dir = self.storage_dir / self.run_id / "adaptations"
|
||||
if not events_dir.exists():
|
||||
return []
|
||||
|
||||
events = []
|
||||
for eval_file in sorted(events_dir.glob("*.json"), reverse=True)[:limit]:
|
||||
try:
|
||||
with open(eval_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
event = self._parse_event(data)
|
||||
if skill_name and event.skill_name != skill_name:
|
||||
continue
|
||||
if metric_type and event.metric_type != metric_type:
|
||||
continue
|
||||
events.append(event)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load adaptation event {eval_file}: {e}")
|
||||
|
||||
return events
|
||||
|
||||
def _parse_event(self, data: Dict[str, Any]) -> AdaptationEvent:
|
||||
"""Parse adaptation event from JSON data."""
|
||||
threshold_data = data.get("threshold", {})
|
||||
metric_type = MetricType(threshold_data.get("metric_type", "custom"))
|
||||
|
||||
threshold = AdaptationThreshold(
|
||||
metric_type=metric_type,
|
||||
operator=threshold_data.get("operator", "lt"),
|
||||
value=threshold_data.get("value", 0.0),
|
||||
window_size=threshold_data.get("window_size", 10),
|
||||
min_samples=threshold_data.get("min_samples", 5),
|
||||
action=AdaptationAction(threshold_data.get("action", "warn")),
|
||||
cooldown_seconds=threshold_data.get("cooldown_seconds", 300),
|
||||
)
|
||||
|
||||
return AdaptationEvent(
|
||||
timestamp=data.get("timestamp", ""),
|
||||
skill_name=data.get("skill_name", ""),
|
||||
metric_type=metric_type,
|
||||
threshold=threshold,
|
||||
current_value=data.get("current_value", 0.0),
|
||||
avg_value=data.get("avg_value", 0.0),
|
||||
action_taken=AdaptationAction(data.get("action_taken", "warn")),
|
||||
details=data.get("details", {}),
|
||||
)
|
||||
|
||||
def add_threshold(self, threshold: AdaptationThreshold) -> None:
|
||||
"""Add a new threshold configuration."""
|
||||
self.thresholds.append(threshold)
|
||||
|
||||
def remove_threshold(self, metric_type: MetricType) -> None:
|
||||
"""Remove all thresholds for a specific metric type."""
|
||||
self.thresholds = [
|
||||
t for t in self.thresholds
|
||||
if t.metric_type != metric_type
|
||||
]
|
||||
|
||||
def update_threshold(
|
||||
self,
|
||||
metric_type: MetricType,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Update threshold configuration for a metric type."""
|
||||
for threshold in self.thresholds:
|
||||
if threshold.metric_type == metric_type:
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(threshold, key):
|
||||
setattr(threshold, key, value)
|
||||
|
||||
def get_thresholds(self) -> List[AdaptationThreshold]:
|
||||
"""Get current threshold configurations."""
|
||||
return list(self.thresholds)
|
||||
|
||||
def is_in_cooldown(self, skill_name: str, metric_type: MetricType) -> bool:
|
||||
"""Check if a skill/metric combination is in cooldown period."""
|
||||
key = f"{skill_name}:{metric_type.value}"
|
||||
last_trigger = self._cooldowns.get(key)
|
||||
if not last_trigger:
|
||||
return False
|
||||
|
||||
# Find the threshold for this metric type
|
||||
for threshold in self.thresholds:
|
||||
if threshold.metric_type == metric_type:
|
||||
elapsed = (datetime.now() - last_trigger).total_seconds()
|
||||
return elapsed < threshold.cooldown_seconds
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class AdaptationManager:
|
||||
"""Manager for coordinating skill adaptation across multiple agents.
|
||||
|
||||
Provides centralized tracking of adaptation events and skill reloads.
|
||||
"""
|
||||
|
||||
def __init__(self, storage_dir: Path):
|
||||
"""Initialize adaptation manager.
|
||||
|
||||
Args:
|
||||
storage_dir: Root directory for storing adaptation data
|
||||
"""
|
||||
self.storage_dir = Path(storage_dir)
|
||||
self._hooks: Dict[str, SkillAdaptationHook] = {}
|
||||
|
||||
def get_hook(
|
||||
self,
|
||||
run_id: str,
|
||||
agent_id: str,
|
||||
thresholds: Optional[List[AdaptationThreshold]] = None,
|
||||
) -> SkillAdaptationHook:
|
||||
"""Get or create an adaptation hook for an agent.
|
||||
|
||||
Args:
|
||||
run_id: Run identifier
|
||||
agent_id: Agent identifier
|
||||
thresholds: Optional custom thresholds
|
||||
|
||||
Returns:
|
||||
SkillAdaptationHook instance
|
||||
"""
|
||||
key = f"{run_id}:{agent_id}"
|
||||
if key not in self._hooks:
|
||||
self._hooks[key] = SkillAdaptationHook(
|
||||
storage_dir=self.storage_dir,
|
||||
run_id=run_id,
|
||||
agent_id=agent_id,
|
||||
thresholds=thresholds,
|
||||
)
|
||||
return self._hooks[key]
|
||||
|
||||
def get_all_pending_warnings(self) -> List[AdaptationEvent]:
|
||||
"""Get all pending warnings from all hooks."""
|
||||
warnings = []
|
||||
for hook in self._hooks.values():
|
||||
warnings.extend(hook.get_pending_warnings())
|
||||
return warnings
|
||||
|
||||
def get_run_adaptations(self, run_id: str) -> List[AdaptationEvent]:
|
||||
"""Get all adaptation events for a run."""
|
||||
events = []
|
||||
for hook in self._hooks.values():
|
||||
if hook.run_id == run_id:
|
||||
events.extend(hook.get_recent_events())
|
||||
return events
|
||||
|
||||
|
||||
# Global manager instance
|
||||
_adaptation_manager: Optional[AdaptationManager] = None
|
||||
|
||||
|
||||
def get_adaptation_manager(storage_dir: Optional[Path] = None) -> AdaptationManager:
|
||||
"""Get global adaptation manager instance.
|
||||
|
||||
Args:
|
||||
storage_dir: Optional storage directory (required on first call)
|
||||
|
||||
Returns:
|
||||
AdaptationManager instance
|
||||
"""
|
||||
global _adaptation_manager
|
||||
if _adaptation_manager is None:
|
||||
if storage_dir is None:
|
||||
raise ValueError("storage_dir required on first initialization")
|
||||
_adaptation_manager = AdaptationManager(storage_dir)
|
||||
return _adaptation_manager
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AdaptationAction",
|
||||
"AdaptationThreshold",
|
||||
"AdaptationEvent",
|
||||
"SkillAdaptationHook",
|
||||
"AdaptationManager",
|
||||
"get_adaptation_manager",
|
||||
]
|
||||
684
backend/agents/base/tool_guard.py
Normal file
684
backend/agents/base/tool_guard.py
Normal file
@@ -0,0 +1,684 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""ToolGuardMixin - Security interception for dangerous tool calls.
|
||||
|
||||
Provides ``_acting`` and ``_reasoning`` overrides that intercept
|
||||
sensitive tool calls before execution, implementing the deny /
|
||||
guard / approve flow.
|
||||
|
||||
Based on CoPaw's tool_guard_mixin.py design.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
from typing import Any, Callable, Dict, Iterable, List, Optional, Set
|
||||
|
||||
from agentscope.message import Msg
|
||||
from backend.runtime.manager import get_global_runtime_manager
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class SeverityLevel(str, Enum):
|
||||
"""Risk severity level."""
|
||||
|
||||
LOW = "low"
|
||||
MEDIUM = "medium"
|
||||
HIGH = "high"
|
||||
CRITICAL = "critical"
|
||||
|
||||
|
||||
class ApprovalStatus(str, Enum):
|
||||
"""Approval lifecycle state."""
|
||||
|
||||
PENDING = "pending"
|
||||
APPROVED = "approved"
|
||||
DENIED = "denied"
|
||||
EXPIRED = "expired"
|
||||
|
||||
|
||||
class ToolFindingRecord:
|
||||
"""Internal representation of a guard finding."""
|
||||
|
||||
def __init__(self, severity: SeverityLevel, message: str, field: Optional[str] = None) -> None:
|
||||
self.severity = severity
|
||||
self.message = message
|
||||
self.field = field
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"severity": self.severity.value,
|
||||
"message": self.message,
|
||||
"field": self.field,
|
||||
}
|
||||
|
||||
|
||||
class ApprovalRecord:
|
||||
"""Stores the state of an approval request."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
approval_id: str,
|
||||
tool_name: str,
|
||||
tool_input: Dict[str, Any],
|
||||
agent_id: str,
|
||||
workspace_id: str,
|
||||
session_id: Optional[str] = None,
|
||||
findings: Optional[List[ToolFindingRecord]] = None,
|
||||
) -> None:
|
||||
self.approval_id = approval_id
|
||||
self.tool_name = tool_name
|
||||
self.tool_input = tool_input
|
||||
self.agent_id = agent_id
|
||||
self.workspace_id = workspace_id
|
||||
self.session_id = session_id
|
||||
self.status = ApprovalStatus.PENDING
|
||||
self.findings = findings or []
|
||||
self.created_at = datetime.utcnow()
|
||||
self.resolved_at: Optional[datetime] = None
|
||||
self.resolved_by: Optional[str] = None
|
||||
self.metadata: Dict[str, Any] = {}
|
||||
self.pending_request: "ToolApprovalRequest" | None = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"approval_id": self.approval_id,
|
||||
"status": self.status.value,
|
||||
"tool_name": self.tool_name,
|
||||
"tool_input": self.tool_input,
|
||||
"agent_id": self.agent_id,
|
||||
"workspace_id": self.workspace_id,
|
||||
"session_id": self.session_id,
|
||||
"findings": [f.to_dict() for f in self.findings],
|
||||
"created_at": self.created_at.isoformat(),
|
||||
"resolved_at": self.resolved_at.isoformat() if self.resolved_at else None,
|
||||
"resolved_by": self.resolved_by,
|
||||
}
|
||||
|
||||
|
||||
class ToolGuardStore:
|
||||
"""Simple in-memory approval store for development/testing."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._records: Dict[str, ApprovalRecord] = {}
|
||||
self._counter = 0
|
||||
|
||||
def next_id(self) -> str:
|
||||
self._counter += 1
|
||||
return f"approval_{self._counter:06d}"
|
||||
|
||||
def list(
|
||||
self,
|
||||
status: ApprovalStatus | None = None,
|
||||
workspace_id: Optional[str] = None,
|
||||
agent_id: Optional[str] = None,
|
||||
) -> Iterable[ApprovalRecord]:
|
||||
for record in self._records.values():
|
||||
if status and record.status != status:
|
||||
continue
|
||||
if workspace_id and record.workspace_id != workspace_id:
|
||||
continue
|
||||
if agent_id and record.agent_id != agent_id:
|
||||
continue
|
||||
yield record
|
||||
|
||||
def get(self, approval_id: str) -> Optional[ApprovalRecord]:
|
||||
return self._records.get(approval_id)
|
||||
|
||||
def create_pending(
|
||||
self,
|
||||
tool_name: str,
|
||||
tool_input: Dict[str, Any],
|
||||
agent_id: str,
|
||||
workspace_id: str,
|
||||
session_id: Optional[str] = None,
|
||||
findings: Optional[List[ToolFindingRecord]] = None,
|
||||
) -> ApprovalRecord:
|
||||
record = ApprovalRecord(
|
||||
approval_id=self.next_id(),
|
||||
tool_name=tool_name,
|
||||
tool_input=tool_input,
|
||||
agent_id=agent_id,
|
||||
workspace_id=workspace_id,
|
||||
session_id=session_id,
|
||||
findings=findings,
|
||||
)
|
||||
self._records[record.approval_id] = record
|
||||
return record
|
||||
|
||||
def set_status(
|
||||
self,
|
||||
approval_id: str,
|
||||
status: ApprovalStatus,
|
||||
resolved_by: Optional[str] = None,
|
||||
notify_request: bool = True,
|
||||
) -> ApprovalRecord:
|
||||
record = self._records[approval_id]
|
||||
if record.status == status:
|
||||
return record
|
||||
|
||||
record.status = status
|
||||
record.resolved_at = datetime.utcnow()
|
||||
record.resolved_by = resolved_by
|
||||
if notify_request and record.pending_request:
|
||||
if status == ApprovalStatus.APPROVED:
|
||||
record.pending_request.approve()
|
||||
elif status == ApprovalStatus.DENIED:
|
||||
record.pending_request.deny()
|
||||
return record
|
||||
|
||||
def cancel(self, approval_id: str) -> None:
|
||||
self._records.pop(approval_id, None)
|
||||
|
||||
|
||||
TOOL_GUARD_STORE = ToolGuardStore()
|
||||
|
||||
|
||||
def get_tool_guard_store() -> ToolGuardStore:
|
||||
return TOOL_GUARD_STORE
|
||||
|
||||
|
||||
# Default tools that require approval
|
||||
DEFAULT_GUARDED_TOOLS: Set[str] = {
|
||||
"execute_shell_command",
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"place_order",
|
||||
"modify_position",
|
||||
"delete_file",
|
||||
}
|
||||
|
||||
# Default denied tools (cannot be approved)
|
||||
DEFAULT_DENIED_TOOLS: Set[str] = {
|
||||
"execute_shell_command", # Shell execution is dangerous
|
||||
}
|
||||
|
||||
# Mark for tool guard denied messages
|
||||
TOOL_GUARD_DENIED_MARK = "tool_guard_denied"
|
||||
|
||||
|
||||
def default_findings_for_tool(tool_name: str) -> List[ToolFindingRecord]:
|
||||
findings: List[ToolFindingRecord] = []
|
||||
if tool_name in {"execute_trade", "modify_portfolio"}:
|
||||
findings.append(
|
||||
ToolFindingRecord(
|
||||
severity=SeverityLevel.HIGH,
|
||||
message=f"Tool '{tool_name}' touches portfolio state",
|
||||
)
|
||||
)
|
||||
return findings
|
||||
|
||||
|
||||
class ToolApprovalRequest:
|
||||
"""Represents a pending tool approval request."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
approval_id: str,
|
||||
tool_name: str,
|
||||
tool_input: Dict[str, Any],
|
||||
tool_call_id: str,
|
||||
session_id: Optional[str] = None,
|
||||
):
|
||||
self.approval_id = approval_id
|
||||
self.tool_name = tool_name
|
||||
self.tool_input = tool_input
|
||||
self.tool_call_id = tool_call_id
|
||||
self.session_id = session_id
|
||||
self.approved: Optional[bool] = None
|
||||
self._event = asyncio.Event()
|
||||
|
||||
async def wait_for_approval(self, timeout: Optional[float] = None) -> bool:
|
||||
"""Wait for approval decision.
|
||||
|
||||
Args:
|
||||
timeout: Maximum time to wait in seconds
|
||||
|
||||
Returns:
|
||||
True if approved, False otherwise
|
||||
"""
|
||||
try:
|
||||
await asyncio.wait_for(self._event.wait(), timeout=timeout)
|
||||
except asyncio.TimeoutError:
|
||||
return False
|
||||
return self.approved is True
|
||||
|
||||
def approve(self) -> None:
|
||||
"""Approve this request."""
|
||||
self.approved = True
|
||||
self._event.set()
|
||||
|
||||
def deny(self) -> None:
|
||||
"""Deny this request."""
|
||||
self.approved = False
|
||||
self._event.set()
|
||||
|
||||
|
||||
class ToolGuardMixin:
|
||||
"""Mixin that adds tool-guard interception to a ReActAgent.
|
||||
|
||||
At runtime this class is combined with ReActAgent via MRO,
|
||||
so ``super()._acting`` and ``super()._reasoning`` resolve to
|
||||
the concrete agent methods.
|
||||
|
||||
Usage:
|
||||
class MyAgent(ToolGuardMixin, ReActAgent):
|
||||
def __init__(self, ...):
|
||||
super().__init__(...)
|
||||
self._init_tool_guard()
|
||||
"""
|
||||
|
||||
def _init_tool_guard(
|
||||
self,
|
||||
guarded_tools: Optional[Set[str]] = None,
|
||||
denied_tools: Optional[Set[str]] = None,
|
||||
approval_timeout: float = 300.0,
|
||||
) -> None:
|
||||
"""Initialize tool guard.
|
||||
|
||||
Args:
|
||||
guarded_tools: Set of tool names requiring approval
|
||||
denied_tools: Set of tool names that are always denied
|
||||
approval_timeout: Timeout for approval requests in seconds
|
||||
"""
|
||||
self._guarded_tools = guarded_tools or DEFAULT_GUARDED_TOOLS.copy()
|
||||
self._denied_tools = denied_tools or DEFAULT_DENIED_TOOLS.copy()
|
||||
self._approval_timeout = approval_timeout
|
||||
self._pending_approval: Optional[ToolApprovalRequest] = None
|
||||
self._approval_callback: Optional[Callable[[ToolApprovalRequest], None]] = None
|
||||
self._approval_lock = asyncio.Lock()
|
||||
|
||||
def set_approval_callback(
|
||||
self,
|
||||
callback: Callable[[ToolApprovalRequest], None],
|
||||
) -> None:
|
||||
"""Set callback for approval requests.
|
||||
|
||||
Args:
|
||||
callback: Function called when approval is needed
|
||||
"""
|
||||
self._approval_callback = callback
|
||||
|
||||
def _is_tool_guarded(self, tool_name: str) -> bool:
|
||||
"""Check if a tool requires approval.
|
||||
|
||||
Args:
|
||||
tool_name: Name of the tool
|
||||
|
||||
Returns:
|
||||
True if tool requires approval
|
||||
"""
|
||||
return tool_name in self._guarded_tools
|
||||
|
||||
def _is_tool_denied(self, tool_name: str) -> bool:
|
||||
"""Check if a tool is always denied.
|
||||
|
||||
Args:
|
||||
tool_name: Name of the tool
|
||||
|
||||
Returns:
|
||||
True if tool is denied
|
||||
"""
|
||||
return tool_name in self._denied_tools
|
||||
|
||||
def _last_tool_response_is_denied(self) -> bool:
|
||||
"""Check if the last message is a guard-denied tool result."""
|
||||
if not hasattr(self, "memory") or not self.memory.content:
|
||||
return False
|
||||
|
||||
msg, marks = self.memory.content[-1]
|
||||
return TOOL_GUARD_DENIED_MARK in marks and msg.role == "system"
|
||||
|
||||
async def _cleanup_tool_guard_denied_messages(
|
||||
self,
|
||||
include_denial_response: bool = True,
|
||||
) -> None:
|
||||
"""Remove tool-guard denied messages from memory.
|
||||
|
||||
Args:
|
||||
include_denial_response: Also remove the assistant's denial explanation
|
||||
"""
|
||||
if not hasattr(self, "memory"):
|
||||
return
|
||||
|
||||
ids_to_delete: list[str] = []
|
||||
last_marked_idx = -1
|
||||
|
||||
for i, (msg, marks) in enumerate(self.memory.content):
|
||||
if TOOL_GUARD_DENIED_MARK in marks:
|
||||
ids_to_delete.append(msg.id)
|
||||
last_marked_idx = i
|
||||
|
||||
if (
|
||||
include_denial_response
|
||||
and last_marked_idx >= 0
|
||||
and last_marked_idx + 1 < len(self.memory.content)
|
||||
):
|
||||
next_msg, _ = self.memory.content[last_marked_idx + 1]
|
||||
if next_msg.role == "assistant":
|
||||
ids_to_delete.append(next_msg.id)
|
||||
|
||||
if ids_to_delete:
|
||||
removed = await self.memory.delete(ids_to_delete)
|
||||
logger.info("Tool guard: cleaned up %d denied message(s)", removed)
|
||||
|
||||
async def _request_guard_approval(
|
||||
self,
|
||||
tool_name: str,
|
||||
tool_input: Dict[str, Any],
|
||||
tool_call_id: str,
|
||||
) -> bool:
|
||||
"""Request approval for a guarded tool call.
|
||||
|
||||
This method creates a ToolApprovalRequest and waits for
|
||||
external approval via approve_guard_call() or deny_guard_call().
|
||||
|
||||
Args:
|
||||
tool_name: Name of the tool
|
||||
tool_input: Tool input parameters
|
||||
tool_call_id: ID of the tool call
|
||||
|
||||
Returns:
|
||||
True if approved, False otherwise
|
||||
"""
|
||||
async with self._approval_lock:
|
||||
record = TOOL_GUARD_STORE.create_pending(
|
||||
tool_name=tool_name,
|
||||
tool_input=tool_input,
|
||||
agent_id=getattr(self, "agent_id", "unknown"),
|
||||
workspace_id=getattr(self, "workspace_id", "default"),
|
||||
session_id=getattr(self, "session_id", None),
|
||||
findings=default_findings_for_tool(tool_name),
|
||||
)
|
||||
|
||||
manager = get_global_runtime_manager()
|
||||
if manager:
|
||||
manager.register_pending_approval(
|
||||
record.approval_id,
|
||||
{
|
||||
"tool_name": record.tool_name,
|
||||
"agent_id": record.agent_id,
|
||||
"workspace_id": record.workspace_id,
|
||||
"session_id": record.session_id,
|
||||
"tool_input": record.tool_input,
|
||||
},
|
||||
)
|
||||
|
||||
self._pending_approval = ToolApprovalRequest(
|
||||
approval_id=record.approval_id,
|
||||
tool_name=tool_name,
|
||||
tool_input=tool_input,
|
||||
tool_call_id=tool_call_id,
|
||||
session_id=getattr(self, "session_id", None),
|
||||
)
|
||||
record.pending_request = self._pending_approval
|
||||
|
||||
# Notify via callback if set
|
||||
if self._approval_callback:
|
||||
self._approval_callback(self._pending_approval)
|
||||
|
||||
# Wait for approval (lock is released during wait, re-acquired after)
|
||||
approval_request = self._pending_approval
|
||||
|
||||
# Wait for approval outside the lock to allow concurrent approval
|
||||
approved = await approval_request.wait_for_approval(
|
||||
timeout=self._approval_timeout
|
||||
)
|
||||
|
||||
async with self._approval_lock:
|
||||
if approval_request:
|
||||
status = (
|
||||
ApprovalStatus.APPROVED
|
||||
if approval_request.approved is True
|
||||
else ApprovalStatus.DENIED
|
||||
if approval_request.approved is False
|
||||
else ApprovalStatus.EXPIRED
|
||||
)
|
||||
TOOL_GUARD_STORE.set_status(
|
||||
approval_request.approval_id,
|
||||
status,
|
||||
resolved_by="agent",
|
||||
notify_request=False,
|
||||
)
|
||||
manager = get_global_runtime_manager()
|
||||
if manager:
|
||||
manager.resolve_pending_approval(
|
||||
approval_request.approval_id,
|
||||
resolved_by="agent",
|
||||
status=status.value,
|
||||
)
|
||||
|
||||
# Only clear if this is still the same request
|
||||
if self._pending_approval is approval_request:
|
||||
self._pending_approval = None
|
||||
|
||||
return approved
|
||||
|
||||
async def approve_guard_call(self, request_id: Optional[str] = None) -> bool:
|
||||
"""Approve a pending guard request.
|
||||
|
||||
This method is called externally to approve a tool call
|
||||
that is waiting for approval.
|
||||
|
||||
Args:
|
||||
request_id: Optional request ID to verify (not yet implemented)
|
||||
|
||||
Returns:
|
||||
True if a request was approved, False if no pending request
|
||||
"""
|
||||
async with self._approval_lock:
|
||||
if self._pending_approval is None:
|
||||
logger.warning("No pending approval request to approve")
|
||||
return False
|
||||
|
||||
TOOL_GUARD_STORE.set_status(
|
||||
self._pending_approval.approval_id,
|
||||
ApprovalStatus.APPROVED,
|
||||
resolved_by="agent",
|
||||
notify_request=False,
|
||||
)
|
||||
manager = get_global_runtime_manager()
|
||||
if manager:
|
||||
manager.resolve_pending_approval(
|
||||
self._pending_approval.approval_id,
|
||||
resolved_by="agent",
|
||||
status=ApprovalStatus.APPROVED.value,
|
||||
)
|
||||
self._pending_approval.approve()
|
||||
logger.info("Approved tool call: %s", self._pending_approval.tool_name)
|
||||
return True
|
||||
|
||||
async def deny_guard_call(self, request_id: Optional[str] = None) -> bool:
|
||||
"""Deny a pending guard request.
|
||||
|
||||
This method is called externally to deny a tool call
|
||||
that is waiting for approval.
|
||||
|
||||
Args:
|
||||
request_id: Optional request ID to verify (not yet implemented)
|
||||
|
||||
Returns:
|
||||
True if a request was denied, False if no pending request
|
||||
"""
|
||||
async with self._approval_lock:
|
||||
if self._pending_approval is None:
|
||||
logger.warning("No pending approval request to deny")
|
||||
return False
|
||||
|
||||
TOOL_GUARD_STORE.set_status(
|
||||
self._pending_approval.approval_id,
|
||||
ApprovalStatus.DENIED,
|
||||
resolved_by="agent",
|
||||
notify_request=False,
|
||||
)
|
||||
manager = get_global_runtime_manager()
|
||||
if manager:
|
||||
manager.resolve_pending_approval(
|
||||
self._pending_approval.approval_id,
|
||||
resolved_by="agent",
|
||||
status=ApprovalStatus.DENIED.value,
|
||||
)
|
||||
self._pending_approval.deny()
|
||||
logger.info("Denied tool call: %s", self._pending_approval.tool_name)
|
||||
return True
|
||||
|
||||
async def _acting(self, tool_call) -> dict | None:
|
||||
"""Intercept sensitive tool calls before execution.
|
||||
|
||||
1. If tool is in denied_tools, auto-deny unconditionally.
|
||||
2. Check for a one-shot pre-approval.
|
||||
3. If tool is in the guarded scope, request approval.
|
||||
4. Otherwise, delegate to parent _acting.
|
||||
|
||||
Args:
|
||||
tool_call: Tool call from the model
|
||||
|
||||
Returns:
|
||||
Tool result dict or None
|
||||
"""
|
||||
tool_name: str = tool_call.get("name", "")
|
||||
tool_input: dict = tool_call.get("input", {})
|
||||
tool_call_id: str = tool_call.get("id", "")
|
||||
|
||||
# Check if tool is denied
|
||||
if tool_name and self._is_tool_denied(tool_name):
|
||||
logger.warning("Tool '%s' is in the denied set, auto-denying", tool_name)
|
||||
return await self._acting_auto_denied(tool_call, tool_name)
|
||||
|
||||
# Check if tool is guarded
|
||||
if tool_name and self._is_tool_guarded(tool_name):
|
||||
approved = await self._request_guard_approval(
|
||||
tool_name=tool_name,
|
||||
tool_input=tool_input,
|
||||
tool_call_id=tool_call_id,
|
||||
)
|
||||
|
||||
if not approved:
|
||||
return await self._acting_with_denial(tool_call, tool_name)
|
||||
|
||||
# Call parent _acting
|
||||
return await super()._acting(tool_call) # type: ignore[misc]
|
||||
|
||||
async def _acting_auto_denied(
|
||||
self,
|
||||
tool_call: Dict[str, Any],
|
||||
tool_name: str,
|
||||
) -> dict | None:
|
||||
"""Auto-deny a tool call without offering approval.
|
||||
|
||||
Args:
|
||||
tool_call: Tool call from the model
|
||||
tool_name: Name of the denied tool
|
||||
|
||||
Returns:
|
||||
Denial result
|
||||
"""
|
||||
from agentscope.message import ToolResultBlock
|
||||
|
||||
denied_text = (
|
||||
f"⛔ **Tool Blocked / 工具已拦截**\n\n"
|
||||
f"- Tool / 工具: `{tool_name}`\n"
|
||||
f"- Reason / 原因: This tool is blocked for security reasons\n\n"
|
||||
f"This tool is blocked and cannot be approved.\n"
|
||||
f"该工具已被禁止,无法批准执行。"
|
||||
)
|
||||
|
||||
tool_res_msg = Msg(
|
||||
"system",
|
||||
[
|
||||
ToolResultBlock(
|
||||
type="tool_result",
|
||||
id=tool_call.get("id", ""),
|
||||
name=tool_name,
|
||||
output=[{"type": "text", "text": denied_text}],
|
||||
),
|
||||
],
|
||||
"system",
|
||||
)
|
||||
|
||||
await self.print(tool_res_msg, True)
|
||||
await self.memory.add(tool_res_msg)
|
||||
return None
|
||||
|
||||
async def _acting_with_denial(
|
||||
self,
|
||||
tool_call: Dict[str, Any],
|
||||
tool_name: str,
|
||||
) -> dict | None:
|
||||
"""Deny the tool call after approval was rejected.
|
||||
|
||||
Args:
|
||||
tool_call: Tool call from the model
|
||||
tool_name: Name of the tool
|
||||
|
||||
Returns:
|
||||
Denial result
|
||||
"""
|
||||
from agentscope.message import ToolResultBlock
|
||||
|
||||
params_text = json.dumps(
|
||||
tool_call.get("input", {}),
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
|
||||
denied_text = (
|
||||
f"⚠️ **Tool Call Denied / 工具调用被拒绝**\n\n"
|
||||
f"- Tool / 工具: `{tool_name}`\n"
|
||||
f"- Parameters / 参数:\n"
|
||||
f"```json\n{params_text}\n```\n\n"
|
||||
f"The tool call was denied by the user or timed out.\n"
|
||||
f"工具调用被用户拒绝或已超时。"
|
||||
)
|
||||
|
||||
tool_res_msg = Msg(
|
||||
"system",
|
||||
[
|
||||
ToolResultBlock(
|
||||
type="tool_result",
|
||||
id=tool_call.get("id", ""),
|
||||
name=tool_name,
|
||||
output=[{"type": "text", "text": denied_text}],
|
||||
),
|
||||
],
|
||||
"system",
|
||||
)
|
||||
|
||||
await self.print(tool_res_msg, True)
|
||||
await self.memory.add(tool_res_msg, marks=TOOL_GUARD_DENIED_MARK)
|
||||
return None
|
||||
|
||||
async def _reasoning(self, **kwargs) -> Msg:
|
||||
"""Short-circuit reasoning when awaiting guard approval.
|
||||
|
||||
If the last message was a guard denial, return a waiting message
|
||||
instead of continuing reasoning.
|
||||
|
||||
Returns:
|
||||
Response message
|
||||
"""
|
||||
if self._last_tool_response_is_denied():
|
||||
msg = Msg(
|
||||
self.name,
|
||||
"⏳ Waiting for approval / 等待审批...\n\n"
|
||||
"Type `/approve` to approve, or send any message to deny.\n"
|
||||
"输入 `/approve` 批准执行,或发送任意消息拒绝。",
|
||||
"assistant",
|
||||
)
|
||||
await self.print(msg, True)
|
||||
await self.memory.add(msg)
|
||||
return msg
|
||||
|
||||
return await super()._reasoning(**kwargs) # type: ignore[misc]
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ToolGuardMixin",
|
||||
"ToolApprovalRequest",
|
||||
"DEFAULT_GUARDED_TOOLS",
|
||||
"DEFAULT_DENIED_TOOLS",
|
||||
"TOOL_GUARD_DENIED_MARK",
|
||||
]
|
||||
Reference in New Issue
Block a user