feat: Refactor services architecture and update project structure
- Remove Docker-based microservices (docker-compose.yml, Makefile, Dockerfiles) - Update start-dev.sh to use backend.app:app entry point - Add shared schema and client modules for service communication - Add team coordination modules (messenger, registry, task_delegator, coordinator) - Add evaluation hooks and skill adaptation hooks - Add skill template and gateway server - Update frontend WebSocket URL configuration - Add explain components for insider and technical analysis Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
452
backend/agents/base/evaluation_hook.py
Normal file
452
backend/agents/base/evaluation_hook.py
Normal file
@@ -0,0 +1,452 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Evaluation hooks system for skills.
|
||||
|
||||
Provides evaluation metric collection and storage for skill performance tracking.
|
||||
Based on the evaluation hooks design in SKILL_TEMPLATE.md.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MetricType(Enum):
|
||||
"""Types of evaluation metrics."""
|
||||
HIT_RATE = "hit_rate" # 信号命中率
|
||||
RISK_VIOLATION = "risk_violation" # 风控违例率
|
||||
POSITION_DEVIATION = "position_deviation" # 仓位偏离率
|
||||
PnL_ATTRIBUTION = "pnl_attribution" # P&L 归因一致性
|
||||
SIGNAL_CONSISTENCY = "signal_consistency" # 信号一致性
|
||||
DECISION_LATENCY = "decision_latency" # 决策延迟
|
||||
TOOL_USAGE = "tool_usage" # 工具使用率
|
||||
CUSTOM = "custom" # 自定义指标
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvaluationMetric:
|
||||
"""A single evaluation metric."""
|
||||
name: str
|
||||
metric_type: MetricType
|
||||
value: float
|
||||
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"metric_type": self.metric_type.value,
|
||||
"value": self.value,
|
||||
"timestamp": self.timestamp,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvaluationResult:
|
||||
"""Evaluation result for a skill execution."""
|
||||
skill_name: str
|
||||
run_id: str
|
||||
agent_id: str
|
||||
metrics: List[EvaluationMetric] = field(default_factory=list)
|
||||
inputs: Dict[str, Any] = field(default_factory=dict)
|
||||
outputs: Dict[str, Any] = field(default_factory=dict)
|
||||
decision: Optional[str] = None
|
||||
success: bool = True
|
||||
error_message: Optional[str] = None
|
||||
started_at: Optional[str] = None
|
||||
completed_at: Optional[str] = field(default_factory=lambda: datetime.now().isoformat())
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"skill_name": self.skill_name,
|
||||
"run_id": self.run_id,
|
||||
"agent_id": self.agent_id,
|
||||
"metrics": [m.to_dict() for m in self.metrics],
|
||||
"inputs": self.inputs,
|
||||
"outputs": self.outputs,
|
||||
"decision": self.decision,
|
||||
"success": self.success,
|
||||
"error_message": self.error_message,
|
||||
"started_at": self.started_at,
|
||||
"completed_at": self.completed_at,
|
||||
}
|
||||
|
||||
|
||||
class EvaluationHook:
|
||||
"""Hook for collecting skill evaluation metrics.
|
||||
|
||||
This hook collects and stores evaluation metrics after skill execution
|
||||
for later analysis and memory/reflection stages.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_dir: Path,
|
||||
run_id: str,
|
||||
agent_id: str,
|
||||
):
|
||||
"""Initialize evaluation hook.
|
||||
|
||||
Args:
|
||||
storage_dir: Directory to store evaluation results
|
||||
run_id: Current run identifier
|
||||
agent_id: Current agent identifier
|
||||
"""
|
||||
self.storage_dir = Path(storage_dir)
|
||||
self.run_id = run_id
|
||||
self.agent_id = agent_id
|
||||
self._current_evaluation: Optional[EvaluationResult] = None
|
||||
|
||||
def start_evaluation(
|
||||
self,
|
||||
skill_name: str,
|
||||
inputs: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Start a new evaluation session.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill being evaluated
|
||||
inputs: Input parameters for the skill
|
||||
"""
|
||||
self._current_evaluation = EvaluationResult(
|
||||
skill_name=skill_name,
|
||||
run_id=self.run_id,
|
||||
agent_id=self.agent_id,
|
||||
inputs=inputs,
|
||||
started_at=datetime.now().isoformat(),
|
||||
)
|
||||
logger.debug(f"Started evaluation for skill: {skill_name}")
|
||||
|
||||
def add_metric(
|
||||
self,
|
||||
name: str,
|
||||
metric_type: MetricType,
|
||||
value: float,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Add an evaluation metric.
|
||||
|
||||
Args:
|
||||
name: Metric name
|
||||
metric_type: Type of metric
|
||||
value: Metric value
|
||||
metadata: Additional metadata
|
||||
"""
|
||||
if self._current_evaluation is None:
|
||||
logger.warning("No active evaluation session, ignoring metric")
|
||||
return
|
||||
|
||||
metric = EvaluationMetric(
|
||||
name=name,
|
||||
metric_type=metric_type,
|
||||
value=value,
|
||||
metadata=metadata or {},
|
||||
)
|
||||
self._current_evaluation.metrics.append(metric)
|
||||
logger.debug(f"Added metric: {name} = {value}")
|
||||
|
||||
def add_metrics(self, metrics: List[EvaluationMetric]) -> None:
|
||||
"""Add multiple evaluation metrics at once.
|
||||
|
||||
Args:
|
||||
metrics: List of metrics to add
|
||||
"""
|
||||
if self._current_evaluation is None:
|
||||
logger.warning("No active evaluation session, ignoring metrics")
|
||||
return
|
||||
|
||||
self._current_evaluation.metrics.extend(metrics)
|
||||
|
||||
def record_outputs(self, outputs: Dict[str, Any]) -> None:
|
||||
"""Record skill outputs.
|
||||
|
||||
Args:
|
||||
outputs: Output from skill execution
|
||||
"""
|
||||
if self._current_evaluation is None:
|
||||
logger.warning("No active evaluation session, ignoring outputs")
|
||||
return
|
||||
|
||||
self._current_evaluation.outputs = outputs
|
||||
|
||||
def record_decision(self, decision: str) -> None:
|
||||
"""Record the final decision.
|
||||
|
||||
Args:
|
||||
decision: Final decision made by the skill
|
||||
"""
|
||||
if self._current_evaluation is None:
|
||||
logger.warning("No active evaluation session, ignoring decision")
|
||||
return
|
||||
|
||||
self._current_evaluation.decision = decision
|
||||
|
||||
def complete_evaluation(
|
||||
self,
|
||||
success: bool = True,
|
||||
error_message: Optional[str] = None,
|
||||
) -> Optional[EvaluationResult]:
|
||||
"""Complete the evaluation session and persist results.
|
||||
|
||||
Args:
|
||||
success: Whether the skill execution was successful
|
||||
error_message: Error message if failed
|
||||
|
||||
Returns:
|
||||
The completed evaluation result, or None if no active evaluation
|
||||
"""
|
||||
if self._current_evaluation is None:
|
||||
logger.warning("No active evaluation to complete")
|
||||
return None
|
||||
|
||||
self._current_evaluation.success = success
|
||||
self._current_evaluation.error_message = error_message
|
||||
self._current_evaluation.completed_at = datetime.now().isoformat()
|
||||
|
||||
# Persist to storage
|
||||
result = self._persist_evaluation(self._current_evaluation)
|
||||
|
||||
self._current_evaluation = None
|
||||
logger.debug(f"Completed evaluation for skill: {result.skill_name}")
|
||||
|
||||
return result
|
||||
|
||||
def _persist_evaluation(self, evaluation: EvaluationResult) -> EvaluationResult:
|
||||
"""Persist evaluation result to storage.
|
||||
|
||||
Args:
|
||||
evaluation: Evaluation result to persist
|
||||
|
||||
Returns:
|
||||
The persisted evaluation
|
||||
"""
|
||||
# Create run-specific directory
|
||||
run_dir = self.storage_dir / self.run_id
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create agent-specific subdirectory
|
||||
agent_dir = run_dir / self.agent_id
|
||||
agent_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Generate filename with timestamp
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
filename = f"{evaluation.skill_name}_{timestamp}.json"
|
||||
filepath = agent_dir / filename
|
||||
|
||||
# Write evaluation result
|
||||
try:
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
json.dump(evaluation.to_dict(), f, ensure_ascii=False, indent=2)
|
||||
logger.info(f"Persisted evaluation to: {filepath}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to persist evaluation: {e}")
|
||||
|
||||
return evaluation
|
||||
|
||||
def cancel_evaluation(self) -> None:
|
||||
"""Cancel the current evaluation session without saving."""
|
||||
if self._current_evaluation is not None:
|
||||
logger.debug(f"Cancelled evaluation for: {self._current_evaluation.skill_name}")
|
||||
self._current_evaluation = None
|
||||
|
||||
|
||||
class EvaluationCollector:
|
||||
"""Collector for aggregating evaluation metrics across runs.
|
||||
|
||||
Provides methods to query and analyze evaluation results.
|
||||
"""
|
||||
|
||||
def __init__(self, storage_dir: Path):
|
||||
"""Initialize evaluation collector.
|
||||
|
||||
Args:
|
||||
storage_dir: Root directory containing evaluation results
|
||||
"""
|
||||
self.storage_dir = Path(storage_dir)
|
||||
|
||||
def get_run_evaluations(
|
||||
self,
|
||||
run_id: str,
|
||||
agent_id: Optional[str] = None,
|
||||
) -> List[EvaluationResult]:
|
||||
"""Get all evaluations for a run.
|
||||
|
||||
Args:
|
||||
run_id: Run identifier
|
||||
agent_id: Optional agent identifier to filter by
|
||||
|
||||
Returns:
|
||||
List of evaluation results
|
||||
"""
|
||||
run_dir = self.storage_dir / run_id
|
||||
if not run_dir.exists():
|
||||
return []
|
||||
|
||||
evaluations = []
|
||||
|
||||
agent_dirs = [run_dir / agent_id] if agent_id else run_dir.iterdir()
|
||||
|
||||
for agent_dir in agent_dirs:
|
||||
if not agent_dir.is_dir():
|
||||
continue
|
||||
|
||||
for eval_file in agent_dir.glob("*.json"):
|
||||
try:
|
||||
with open(eval_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
evaluations.append(self._parse_evaluation(data))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load evaluation {eval_file}: {e}")
|
||||
|
||||
return evaluations
|
||||
|
||||
def get_skill_metrics(
|
||||
self,
|
||||
skill_name: str,
|
||||
run_ids: Optional[List[str]] = None,
|
||||
) -> List[EvaluationMetric]:
|
||||
"""Get all metrics for a specific skill.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill
|
||||
run_ids: Optional list of run IDs to filter by
|
||||
|
||||
Returns:
|
||||
List of metrics for the skill
|
||||
"""
|
||||
metrics = []
|
||||
|
||||
if run_ids is None:
|
||||
run_ids = [d.name for d in self.storage_dir.iterdir() if d.is_dir()]
|
||||
|
||||
for run_id in run_ids:
|
||||
evaluations = self.get_run_evaluations(run_id)
|
||||
for eval_result in evaluations:
|
||||
if eval_result.skill_name == skill_name:
|
||||
metrics.extend(eval_result.metrics)
|
||||
|
||||
return metrics
|
||||
|
||||
def calculate_skill_stats(
|
||||
self,
|
||||
skill_name: str,
|
||||
metric_type: MetricType,
|
||||
run_ids: Optional[List[str]] = None,
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate statistics for a specific metric type.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill
|
||||
metric_type: Type of metric to calculate
|
||||
run_ids: Optional list of run IDs to filter by
|
||||
|
||||
Returns:
|
||||
Dictionary with min, max, avg, count statistics
|
||||
"""
|
||||
metrics = self.get_skill_metrics(skill_name, run_ids)
|
||||
filtered = [m for m in metrics if m.metric_type == metric_type]
|
||||
|
||||
if not filtered:
|
||||
return {"count": 0}
|
||||
|
||||
values = [m.value for m in filtered]
|
||||
return {
|
||||
"count": len(values),
|
||||
"min": min(values),
|
||||
"max": max(values),
|
||||
"avg": sum(values) / len(values),
|
||||
}
|
||||
|
||||
def _parse_evaluation(self, data: Dict[str, Any]) -> EvaluationResult:
|
||||
"""Parse evaluation data into EvaluationResult.
|
||||
|
||||
Args:
|
||||
data: Raw evaluation data
|
||||
|
||||
Returns:
|
||||
Parsed EvaluationResult
|
||||
"""
|
||||
metrics = []
|
||||
for m in data.get("metrics", []):
|
||||
metrics.append(EvaluationMetric(
|
||||
name=m["name"],
|
||||
metric_type=MetricType(m["metric_type"]),
|
||||
value=m["value"],
|
||||
timestamp=m.get("timestamp", ""),
|
||||
metadata=m.get("metadata", {}),
|
||||
))
|
||||
|
||||
return EvaluationResult(
|
||||
skill_name=data["skill_name"],
|
||||
run_id=data["run_id"],
|
||||
agent_id=data["agent_id"],
|
||||
metrics=metrics,
|
||||
inputs=data.get("inputs", {}),
|
||||
outputs=data.get("outputs", {}),
|
||||
decision=data.get("decision"),
|
||||
success=data.get("success", True),
|
||||
error_message=data.get("error_message"),
|
||||
started_at=data.get("started_at"),
|
||||
completed_at=data.get("completed_at"),
|
||||
)
|
||||
|
||||
|
||||
def parse_evaluation_hooks(skill_dir: Path) -> Dict[str, Any]:
|
||||
"""Parse evaluation hooks from SKILL.md.
|
||||
|
||||
Extracts the Optional: Evaluation hooks section from skill documentation.
|
||||
|
||||
Args:
|
||||
skill_dir: Skill directory path
|
||||
|
||||
Returns:
|
||||
Dictionary containing evaluation hook definitions
|
||||
"""
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
if not skill_md.exists():
|
||||
return {}
|
||||
|
||||
try:
|
||||
content = skill_md.read_text(encoding="utf-8")
|
||||
|
||||
# Extract evaluation hooks section
|
||||
if "## Optional: Evaluation hooks" in content:
|
||||
start = content.find("## Optional: Evaluation hooks")
|
||||
# Find the next ## section or end of file
|
||||
next_section = content.find("\n## ", start + 1)
|
||||
if next_section == -1:
|
||||
eval_section = content[start:]
|
||||
else:
|
||||
eval_section = content[start:next_section]
|
||||
|
||||
# Parse metrics from the section
|
||||
metrics = []
|
||||
for metric_type in MetricType:
|
||||
if metric_type.value.replace("_", " ") in eval_section.lower():
|
||||
metrics.append(metric_type.value)
|
||||
|
||||
return {
|
||||
"supported_metrics": metrics,
|
||||
"section_content": eval_section.strip(),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse evaluation hooks: {e}")
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
__all__ = [
|
||||
"MetricType",
|
||||
"EvaluationMetric",
|
||||
"EvaluationResult",
|
||||
"EvaluationHook",
|
||||
"EvaluationCollector",
|
||||
"parse_evaluation_hooks",
|
||||
]
|
||||
489
backend/agents/base/skill_adaptation_hook.py
Normal file
489
backend/agents/base/skill_adaptation_hook.py
Normal file
@@ -0,0 +1,489 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Skill adaptation hook for automatic evaluation-to-iteration闭环.
|
||||
|
||||
Monitors evaluation metrics against configurable thresholds and triggers
|
||||
automatic skill reload or logs warnings when thresholds are breached.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
from .evaluation_hook import (
|
||||
EvaluationCollector,
|
||||
EvaluationResult,
|
||||
MetricType,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AdaptationAction(Enum):
|
||||
"""Actions to take when threshold is breached."""
|
||||
RELOAD = "reload" # 自动重新加载技能
|
||||
WARN = "warn" # 记录警告供人工审核
|
||||
BOTH = "both" # 同时执行重载和警告
|
||||
NONE = "none" # 不做任何操作
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdaptationThreshold:
|
||||
"""Threshold configuration for a metric."""
|
||||
metric_type: MetricType
|
||||
operator: str = "lt" # lt (less than), gt (greater than), lte, gte, eq
|
||||
value: float = 0.0
|
||||
window_size: int = 10 # 移动窗口大小,用于计算滑动平均
|
||||
min_samples: int = 5 # 最少样本数才触发检查
|
||||
action: AdaptationAction = AdaptationAction.WARN
|
||||
cooldown_seconds: int = 300 # 触发后的冷却时间
|
||||
|
||||
def evaluate(self, current_value: float) -> bool:
|
||||
"""Evaluate if threshold is breached."""
|
||||
ops = {
|
||||
"lt": lambda x, y: x < y,
|
||||
"lte": lambda x, y: x <= y,
|
||||
"gt": lambda x, y: x > y,
|
||||
"gte": lambda x, y: x >= y,
|
||||
"eq": lambda x, y: x == y,
|
||||
}
|
||||
op_func = ops.get(self.operator)
|
||||
if op_func is None:
|
||||
logger.warning(f"Unknown operator: {self.operator}")
|
||||
return False
|
||||
return op_func(current_value, self.value)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"metric_type": self.metric_type.value,
|
||||
"operator": self.operator,
|
||||
"value": self.value,
|
||||
"window_size": self.window_size,
|
||||
"min_samples": self.min_samples,
|
||||
"action": self.action.value,
|
||||
"cooldown_seconds": self.cooldown_seconds,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdaptationEvent:
|
||||
"""Record of an adaptation trigger event."""
|
||||
timestamp: str
|
||||
skill_name: str
|
||||
metric_type: MetricType
|
||||
threshold: AdaptationThreshold
|
||||
current_value: float
|
||||
avg_value: float
|
||||
action_taken: AdaptationAction
|
||||
details: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"skill_name": self.skill_name,
|
||||
"metric_type": self.metric_type.value,
|
||||
"threshold": self.threshold.to_dict(),
|
||||
"current_value": self.current_value,
|
||||
"avg_value": self.avg_value,
|
||||
"action_taken": self.action_taken.value,
|
||||
"details": self.details,
|
||||
}
|
||||
|
||||
|
||||
class SkillAdaptationHook:
|
||||
"""Hook for monitoring evaluation metrics and triggering skill adaptation.
|
||||
|
||||
This hook wraps EvaluationHook to add threshold-based adaptation logic.
|
||||
When metrics breach configured thresholds, it can:
|
||||
- Automatically reload skills via SkillsManager
|
||||
- Log warnings for human review
|
||||
- Both
|
||||
"""
|
||||
|
||||
# Default thresholds for common metrics
|
||||
DEFAULT_THRESHOLDS: List[AdaptationThreshold] = [
|
||||
AdaptationThreshold(
|
||||
metric_type=MetricType.HIT_RATE,
|
||||
operator="lt",
|
||||
value=0.5,
|
||||
action=AdaptationAction.WARN,
|
||||
cooldown_seconds=600,
|
||||
),
|
||||
AdaptationThreshold(
|
||||
metric_type=MetricType.RISK_VIOLATION,
|
||||
operator="gt",
|
||||
value=0.1,
|
||||
action=AdaptationAction.WARN,
|
||||
cooldown_seconds=300,
|
||||
),
|
||||
AdaptationThreshold(
|
||||
metric_type=MetricType.DECISION_LATENCY,
|
||||
operator="gt",
|
||||
value=5000, # 5 seconds
|
||||
action=AdaptationAction.WARN,
|
||||
cooldown_seconds=300,
|
||||
),
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_dir: Path,
|
||||
run_id: str,
|
||||
agent_id: str,
|
||||
thresholds: Optional[List[AdaptationThreshold]] = None,
|
||||
collector: Optional[EvaluationCollector] = None,
|
||||
):
|
||||
"""Initialize skill adaptation hook.
|
||||
|
||||
Args:
|
||||
storage_dir: Directory to store adaptation events
|
||||
run_id: Current run identifier
|
||||
agent_id: Current agent identifier
|
||||
thresholds: Custom threshold configurations (uses defaults if None)
|
||||
collector: Optional EvaluationCollector for historical data
|
||||
"""
|
||||
self.storage_dir = Path(storage_dir)
|
||||
self.run_id = run_id
|
||||
self.agent_id = agent_id
|
||||
self.thresholds = thresholds or self.DEFAULT_THRESHOLDS
|
||||
self.collector = collector or EvaluationCollector(storage_dir)
|
||||
|
||||
# Track cooldowns to prevent rapid re-triggering
|
||||
self._cooldowns: Dict[str, datetime] = {}
|
||||
|
||||
# Store recent metrics in memory for quick access
|
||||
self._recent_metrics: Dict[str, List[float]] = {}
|
||||
|
||||
# Pending adaptation events
|
||||
self._pending_events: List[AdaptationEvent] = []
|
||||
|
||||
def check_threshold(
|
||||
self,
|
||||
skill_name: str,
|
||||
metric_type: MetricType,
|
||||
current_value: float,
|
||||
) -> Optional[AdaptationEvent]:
|
||||
"""Check if a metric breaches any threshold.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill
|
||||
metric_type: Type of metric
|
||||
current_value: Current metric value
|
||||
|
||||
Returns:
|
||||
AdaptationEvent if threshold breached, None otherwise
|
||||
"""
|
||||
# Find applicable thresholds
|
||||
applicable_thresholds = [
|
||||
t for t in self.thresholds
|
||||
if t.metric_type == metric_type
|
||||
]
|
||||
|
||||
if not applicable_thresholds:
|
||||
return None
|
||||
|
||||
# Check cooldown
|
||||
cooldown_key = f"{skill_name}:{metric_type.value}"
|
||||
now = datetime.now()
|
||||
last_trigger = self._cooldowns.get(cooldown_key)
|
||||
|
||||
# Store current value first for avg calculation
|
||||
self._store_metric(cooldown_key, current_value)
|
||||
|
||||
for threshold in applicable_thresholds:
|
||||
if last_trigger:
|
||||
elapsed = (now - last_trigger).total_seconds()
|
||||
if elapsed < threshold.cooldown_seconds:
|
||||
continue
|
||||
|
||||
# Evaluate threshold
|
||||
if threshold.evaluate(current_value):
|
||||
# Calculate moving average
|
||||
avg_value = self._calculate_avg(skill_name, metric_type, current_value)
|
||||
|
||||
# Check minimum samples (allow immediate trigger if min_samples <= 1)
|
||||
sample_count = len(self._recent_metrics.get(cooldown_key, []))
|
||||
if threshold.min_samples > 1 and sample_count < threshold.min_samples:
|
||||
# Not enough samples yet
|
||||
continue
|
||||
|
||||
# Trigger adaptation
|
||||
event = AdaptationEvent(
|
||||
timestamp=now.isoformat(),
|
||||
skill_name=skill_name,
|
||||
metric_type=metric_type,
|
||||
threshold=threshold,
|
||||
current_value=current_value,
|
||||
avg_value=avg_value,
|
||||
action_taken=threshold.action,
|
||||
details={
|
||||
"run_id": self.run_id,
|
||||
"agent_id": self.agent_id,
|
||||
},
|
||||
)
|
||||
|
||||
# Update cooldown
|
||||
self._cooldowns[cooldown_key] = now
|
||||
|
||||
# Persist event
|
||||
self._persist_event(event)
|
||||
|
||||
logger.info(
|
||||
f"Threshold breached for {skill_name}.{metric_type.value}: "
|
||||
f"current={current_value}, avg={avg_value}, action={threshold.action.value}"
|
||||
)
|
||||
|
||||
return event
|
||||
|
||||
return None
|
||||
|
||||
def _calculate_avg(
|
||||
self,
|
||||
skill_name: str,
|
||||
metric_type: MetricType,
|
||||
current_value: float,
|
||||
) -> float:
|
||||
"""Calculate moving average for a metric."""
|
||||
key = f"{skill_name}:{metric_type.value}"
|
||||
values = self._recent_metrics.get(key, [])
|
||||
if not values:
|
||||
return current_value
|
||||
return sum(values) / len(values)
|
||||
|
||||
def _store_metric(self, key: str, value: float) -> None:
|
||||
"""Store metric value with sliding window."""
|
||||
if key not in self._recent_metrics:
|
||||
self._recent_metrics[key] = []
|
||||
self._recent_metrics[key].append(value)
|
||||
# Keep only last 100 values
|
||||
if len(self._recent_metrics[key]) > 100:
|
||||
self._recent_metrics[key] = self._recent_metrics[key][-100:]
|
||||
|
||||
def _persist_event(self, event: AdaptationEvent) -> None:
|
||||
"""Persist adaptation event to storage."""
|
||||
run_dir = self.storage_dir / self.run_id / "adaptations"
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
filename = f"{event.skill_name}_{event.metric_type.value}_{timestamp}.json"
|
||||
filepath = run_dir / filename
|
||||
|
||||
try:
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
json.dump(event.to_dict(), f, ensure_ascii=False, indent=2)
|
||||
logger.debug(f"Persisted adaptation event to: {filepath}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to persist adaptation event: {e}")
|
||||
|
||||
# Also add to pending list
|
||||
self._pending_events.append(event)
|
||||
|
||||
def get_pending_warnings(self) -> List[AdaptationEvent]:
|
||||
"""Get all pending warning events that need human review."""
|
||||
return [
|
||||
e for e in self._pending_events
|
||||
if e.action_taken in (AdaptationAction.WARN, AdaptationAction.BOTH)
|
||||
]
|
||||
|
||||
def clear_pending_warnings(self) -> None:
|
||||
"""Clear pending warnings after they have been reviewed."""
|
||||
self._pending_events = [
|
||||
e for e in self._pending_events
|
||||
if e.action_taken == AdaptationAction.RELOAD
|
||||
]
|
||||
|
||||
def get_recent_events(
|
||||
self,
|
||||
skill_name: Optional[str] = None,
|
||||
metric_type: Optional[MetricType] = None,
|
||||
limit: int = 50,
|
||||
) -> List[AdaptationEvent]:
|
||||
"""Get recent adaptation events.
|
||||
|
||||
Args:
|
||||
skill_name: Optional filter by skill name
|
||||
metric_type: Optional filter by metric type
|
||||
limit: Maximum number of events to return
|
||||
|
||||
Returns:
|
||||
List of recent adaptation events
|
||||
"""
|
||||
events_dir = self.storage_dir / self.run_id / "adaptations"
|
||||
if not events_dir.exists():
|
||||
return []
|
||||
|
||||
events = []
|
||||
for eval_file in sorted(events_dir.glob("*.json"), reverse=True)[:limit]:
|
||||
try:
|
||||
with open(eval_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
event = self._parse_event(data)
|
||||
if skill_name and event.skill_name != skill_name:
|
||||
continue
|
||||
if metric_type and event.metric_type != metric_type:
|
||||
continue
|
||||
events.append(event)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load adaptation event {eval_file}: {e}")
|
||||
|
||||
return events
|
||||
|
||||
def _parse_event(self, data: Dict[str, Any]) -> AdaptationEvent:
|
||||
"""Parse adaptation event from JSON data."""
|
||||
threshold_data = data.get("threshold", {})
|
||||
metric_type = MetricType(threshold_data.get("metric_type", "custom"))
|
||||
|
||||
threshold = AdaptationThreshold(
|
||||
metric_type=metric_type,
|
||||
operator=threshold_data.get("operator", "lt"),
|
||||
value=threshold_data.get("value", 0.0),
|
||||
window_size=threshold_data.get("window_size", 10),
|
||||
min_samples=threshold_data.get("min_samples", 5),
|
||||
action=AdaptationAction(threshold_data.get("action", "warn")),
|
||||
cooldown_seconds=threshold_data.get("cooldown_seconds", 300),
|
||||
)
|
||||
|
||||
return AdaptationEvent(
|
||||
timestamp=data.get("timestamp", ""),
|
||||
skill_name=data.get("skill_name", ""),
|
||||
metric_type=metric_type,
|
||||
threshold=threshold,
|
||||
current_value=data.get("current_value", 0.0),
|
||||
avg_value=data.get("avg_value", 0.0),
|
||||
action_taken=AdaptationAction(data.get("action_taken", "warn")),
|
||||
details=data.get("details", {}),
|
||||
)
|
||||
|
||||
def add_threshold(self, threshold: AdaptationThreshold) -> None:
|
||||
"""Add a new threshold configuration."""
|
||||
self.thresholds.append(threshold)
|
||||
|
||||
def remove_threshold(self, metric_type: MetricType) -> None:
|
||||
"""Remove all thresholds for a specific metric type."""
|
||||
self.thresholds = [
|
||||
t for t in self.thresholds
|
||||
if t.metric_type != metric_type
|
||||
]
|
||||
|
||||
def update_threshold(
|
||||
self,
|
||||
metric_type: MetricType,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Update threshold configuration for a metric type."""
|
||||
for threshold in self.thresholds:
|
||||
if threshold.metric_type == metric_type:
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(threshold, key):
|
||||
setattr(threshold, key, value)
|
||||
|
||||
def get_thresholds(self) -> List[AdaptationThreshold]:
|
||||
"""Get current threshold configurations."""
|
||||
return list(self.thresholds)
|
||||
|
||||
def is_in_cooldown(self, skill_name: str, metric_type: MetricType) -> bool:
|
||||
"""Check if a skill/metric combination is in cooldown period."""
|
||||
key = f"{skill_name}:{metric_type.value}"
|
||||
last_trigger = self._cooldowns.get(key)
|
||||
if not last_trigger:
|
||||
return False
|
||||
|
||||
# Find the threshold for this metric type
|
||||
for threshold in self.thresholds:
|
||||
if threshold.metric_type == metric_type:
|
||||
elapsed = (datetime.now() - last_trigger).total_seconds()
|
||||
return elapsed < threshold.cooldown_seconds
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class AdaptationManager:
|
||||
"""Manager for coordinating skill adaptation across multiple agents.
|
||||
|
||||
Provides centralized tracking of adaptation events and skill reloads.
|
||||
"""
|
||||
|
||||
def __init__(self, storage_dir: Path):
|
||||
"""Initialize adaptation manager.
|
||||
|
||||
Args:
|
||||
storage_dir: Root directory for storing adaptation data
|
||||
"""
|
||||
self.storage_dir = Path(storage_dir)
|
||||
self._hooks: Dict[str, SkillAdaptationHook] = {}
|
||||
|
||||
def get_hook(
|
||||
self,
|
||||
run_id: str,
|
||||
agent_id: str,
|
||||
thresholds: Optional[List[AdaptationThreshold]] = None,
|
||||
) -> SkillAdaptationHook:
|
||||
"""Get or create an adaptation hook for an agent.
|
||||
|
||||
Args:
|
||||
run_id: Run identifier
|
||||
agent_id: Agent identifier
|
||||
thresholds: Optional custom thresholds
|
||||
|
||||
Returns:
|
||||
SkillAdaptationHook instance
|
||||
"""
|
||||
key = f"{run_id}:{agent_id}"
|
||||
if key not in self._hooks:
|
||||
self._hooks[key] = SkillAdaptationHook(
|
||||
storage_dir=self.storage_dir,
|
||||
run_id=run_id,
|
||||
agent_id=agent_id,
|
||||
thresholds=thresholds,
|
||||
)
|
||||
return self._hooks[key]
|
||||
|
||||
def get_all_pending_warnings(self) -> List[AdaptationEvent]:
|
||||
"""Get all pending warnings from all hooks."""
|
||||
warnings = []
|
||||
for hook in self._hooks.values():
|
||||
warnings.extend(hook.get_pending_warnings())
|
||||
return warnings
|
||||
|
||||
def get_run_adaptations(self, run_id: str) -> List[AdaptationEvent]:
|
||||
"""Get all adaptation events for a run."""
|
||||
events = []
|
||||
for hook in self._hooks.values():
|
||||
if hook.run_id == run_id:
|
||||
events.extend(hook.get_recent_events())
|
||||
return events
|
||||
|
||||
|
||||
# Global manager instance
|
||||
_adaptation_manager: Optional[AdaptationManager] = None
|
||||
|
||||
|
||||
def get_adaptation_manager(storage_dir: Optional[Path] = None) -> AdaptationManager:
|
||||
"""Get global adaptation manager instance.
|
||||
|
||||
Args:
|
||||
storage_dir: Optional storage directory (required on first call)
|
||||
|
||||
Returns:
|
||||
AdaptationManager instance
|
||||
"""
|
||||
global _adaptation_manager
|
||||
if _adaptation_manager is None:
|
||||
if storage_dir is None:
|
||||
raise ValueError("storage_dir required on first initialization")
|
||||
_adaptation_manager = AdaptationManager(storage_dir)
|
||||
return _adaptation_manager
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AdaptationAction",
|
||||
"AdaptationThreshold",
|
||||
"AdaptationEvent",
|
||||
"SkillAdaptationHook",
|
||||
"AdaptationManager",
|
||||
"get_adaptation_manager",
|
||||
]
|
||||
Reference in New Issue
Block a user