# -*- coding: utf-8 -*- """Skill adaptation hook for automatic evaluation-to-iteration闭环. Monitors evaluation metrics against configurable thresholds and triggers automatic skill reload or logs warnings when thresholds are breached. """ from __future__ import annotations import json import logging from dataclasses import dataclass, field from datetime import datetime from enum import Enum from pathlib import Path from typing import Any, Dict, List, Optional, Set from .evaluation_hook import ( EvaluationCollector, EvaluationResult, MetricType, ) logger = logging.getLogger(__name__) class AdaptationAction(Enum): """Actions to take when threshold is breached.""" RELOAD = "reload" # 自动重新加载技能 WARN = "warn" # 记录警告供人工审核 BOTH = "both" # 同时执行重载和警告 NONE = "none" # 不做任何操作 @dataclass class AdaptationThreshold: """Threshold configuration for a metric.""" metric_type: MetricType operator: str = "lt" # lt (less than), gt (greater than), lte, gte, eq value: float = 0.0 window_size: int = 10 # 移动窗口大小,用于计算滑动平均 min_samples: int = 5 # 最少样本数才触发检查 action: AdaptationAction = AdaptationAction.WARN cooldown_seconds: int = 300 # 触发后的冷却时间 def evaluate(self, current_value: float) -> bool: """Evaluate if threshold is breached.""" ops = { "lt": lambda x, y: x < y, "lte": lambda x, y: x <= y, "gt": lambda x, y: x > y, "gte": lambda x, y: x >= y, "eq": lambda x, y: x == y, } op_func = ops.get(self.operator) if op_func is None: logger.warning(f"Unknown operator: {self.operator}") return False return op_func(current_value, self.value) def to_dict(self) -> Dict[str, Any]: return { "metric_type": self.metric_type.value, "operator": self.operator, "value": self.value, "window_size": self.window_size, "min_samples": self.min_samples, "action": self.action.value, "cooldown_seconds": self.cooldown_seconds, } @dataclass class AdaptationEvent: """Record of an adaptation trigger event.""" timestamp: str skill_name: str metric_type: MetricType threshold: AdaptationThreshold current_value: float avg_value: float action_taken: AdaptationAction details: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: return { "timestamp": self.timestamp, "skill_name": self.skill_name, "metric_type": self.metric_type.value, "threshold": self.threshold.to_dict(), "current_value": self.current_value, "avg_value": self.avg_value, "action_taken": self.action_taken.value, "details": self.details, } class SkillAdaptationHook: """Hook for monitoring evaluation metrics and triggering skill adaptation. This hook wraps EvaluationHook to add threshold-based adaptation logic. When metrics breach configured thresholds, it can: - Automatically reload skills via SkillsManager - Log warnings for human review - Both """ # Default thresholds for common metrics DEFAULT_THRESHOLDS: List[AdaptationThreshold] = [ AdaptationThreshold( metric_type=MetricType.HIT_RATE, operator="lt", value=0.5, action=AdaptationAction.WARN, cooldown_seconds=600, ), AdaptationThreshold( metric_type=MetricType.RISK_VIOLATION, operator="gt", value=0.1, action=AdaptationAction.WARN, cooldown_seconds=300, ), AdaptationThreshold( metric_type=MetricType.DECISION_LATENCY, operator="gt", value=5000, # 5 seconds action=AdaptationAction.WARN, cooldown_seconds=300, ), ] def __init__( self, storage_dir: Path, run_id: str, agent_id: str, thresholds: Optional[List[AdaptationThreshold]] = None, collector: Optional[EvaluationCollector] = None, ): """Initialize skill adaptation hook. Args: storage_dir: Directory to store adaptation events run_id: Current run identifier agent_id: Current agent identifier thresholds: Custom threshold configurations (uses defaults if None) collector: Optional EvaluationCollector for historical data """ self.storage_dir = Path(storage_dir) self.run_id = run_id self.agent_id = agent_id self.thresholds = thresholds or self.DEFAULT_THRESHOLDS self.collector = collector or EvaluationCollector(storage_dir) # Track cooldowns to prevent rapid re-triggering self._cooldowns: Dict[str, datetime] = {} # Store recent metrics in memory for quick access self._recent_metrics: Dict[str, List[float]] = {} # Pending adaptation events self._pending_events: List[AdaptationEvent] = [] def check_threshold( self, skill_name: str, metric_type: MetricType, current_value: float, ) -> Optional[AdaptationEvent]: """Check if a metric breaches any threshold. Args: skill_name: Name of the skill metric_type: Type of metric current_value: Current metric value Returns: AdaptationEvent if threshold breached, None otherwise """ # Find applicable thresholds applicable_thresholds = [ t for t in self.thresholds if t.metric_type == metric_type ] if not applicable_thresholds: return None # Check cooldown cooldown_key = f"{skill_name}:{metric_type.value}" now = datetime.now() last_trigger = self._cooldowns.get(cooldown_key) # Store current value first for avg calculation self._store_metric(cooldown_key, current_value) for threshold in applicable_thresholds: if last_trigger: elapsed = (now - last_trigger).total_seconds() if elapsed < threshold.cooldown_seconds: continue # Evaluate threshold if threshold.evaluate(current_value): # Calculate moving average avg_value = self._calculate_avg(skill_name, metric_type, current_value) # Check minimum samples (allow immediate trigger if min_samples <= 1) sample_count = len(self._recent_metrics.get(cooldown_key, [])) if threshold.min_samples > 1 and sample_count < threshold.min_samples: # Not enough samples yet continue # Trigger adaptation event = AdaptationEvent( timestamp=now.isoformat(), skill_name=skill_name, metric_type=metric_type, threshold=threshold, current_value=current_value, avg_value=avg_value, action_taken=threshold.action, details={ "run_id": self.run_id, "agent_id": self.agent_id, }, ) # Update cooldown self._cooldowns[cooldown_key] = now # Persist event self._persist_event(event) logger.info( f"Threshold breached for {skill_name}.{metric_type.value}: " f"current={current_value}, avg={avg_value}, action={threshold.action.value}" ) return event return None def _calculate_avg( self, skill_name: str, metric_type: MetricType, current_value: float, ) -> float: """Calculate moving average for a metric.""" key = f"{skill_name}:{metric_type.value}" values = self._recent_metrics.get(key, []) if not values: return current_value return sum(values) / len(values) def _store_metric(self, key: str, value: float) -> None: """Store metric value with sliding window.""" if key not in self._recent_metrics: self._recent_metrics[key] = [] self._recent_metrics[key].append(value) # Keep only last 100 values if len(self._recent_metrics[key]) > 100: self._recent_metrics[key] = self._recent_metrics[key][-100:] def _persist_event(self, event: AdaptationEvent) -> None: """Persist adaptation event to storage.""" run_dir = self.storage_dir / self.run_id / "adaptations" run_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") filename = f"{event.skill_name}_{event.metric_type.value}_{timestamp}.json" filepath = run_dir / filename try: with open(filepath, "w", encoding="utf-8") as f: json.dump(event.to_dict(), f, ensure_ascii=False, indent=2) logger.debug(f"Persisted adaptation event to: {filepath}") except Exception as e: logger.error(f"Failed to persist adaptation event: {e}") # Also add to pending list self._pending_events.append(event) def get_pending_warnings(self) -> List[AdaptationEvent]: """Get all pending warning events that need human review.""" return [ e for e in self._pending_events if e.action_taken in (AdaptationAction.WARN, AdaptationAction.BOTH) ] def clear_pending_warnings(self) -> None: """Clear pending warnings after they have been reviewed.""" self._pending_events = [ e for e in self._pending_events if e.action_taken == AdaptationAction.RELOAD ] def get_recent_events( self, skill_name: Optional[str] = None, metric_type: Optional[MetricType] = None, limit: int = 50, ) -> List[AdaptationEvent]: """Get recent adaptation events. Args: skill_name: Optional filter by skill name metric_type: Optional filter by metric type limit: Maximum number of events to return Returns: List of recent adaptation events """ events_dir = self.storage_dir / self.run_id / "adaptations" if not events_dir.exists(): return [] events = [] for eval_file in sorted(events_dir.glob("*.json"), reverse=True)[:limit]: try: with open(eval_file, "r", encoding="utf-8") as f: data = json.load(f) event = self._parse_event(data) if skill_name and event.skill_name != skill_name: continue if metric_type and event.metric_type != metric_type: continue events.append(event) except Exception as e: logger.warning(f"Failed to load adaptation event {eval_file}: {e}") return events def _parse_event(self, data: Dict[str, Any]) -> AdaptationEvent: """Parse adaptation event from JSON data.""" threshold_data = data.get("threshold", {}) metric_type = MetricType(threshold_data.get("metric_type", "custom")) threshold = AdaptationThreshold( metric_type=metric_type, operator=threshold_data.get("operator", "lt"), value=threshold_data.get("value", 0.0), window_size=threshold_data.get("window_size", 10), min_samples=threshold_data.get("min_samples", 5), action=AdaptationAction(threshold_data.get("action", "warn")), cooldown_seconds=threshold_data.get("cooldown_seconds", 300), ) return AdaptationEvent( timestamp=data.get("timestamp", ""), skill_name=data.get("skill_name", ""), metric_type=metric_type, threshold=threshold, current_value=data.get("current_value", 0.0), avg_value=data.get("avg_value", 0.0), action_taken=AdaptationAction(data.get("action_taken", "warn")), details=data.get("details", {}), ) def add_threshold(self, threshold: AdaptationThreshold) -> None: """Add a new threshold configuration.""" self.thresholds.append(threshold) def remove_threshold(self, metric_type: MetricType) -> None: """Remove all thresholds for a specific metric type.""" self.thresholds = [ t for t in self.thresholds if t.metric_type != metric_type ] def update_threshold( self, metric_type: MetricType, **kwargs, ) -> None: """Update threshold configuration for a metric type.""" for threshold in self.thresholds: if threshold.metric_type == metric_type: for key, value in kwargs.items(): if hasattr(threshold, key): setattr(threshold, key, value) def get_thresholds(self) -> List[AdaptationThreshold]: """Get current threshold configurations.""" return list(self.thresholds) def is_in_cooldown(self, skill_name: str, metric_type: MetricType) -> bool: """Check if a skill/metric combination is in cooldown period.""" key = f"{skill_name}:{metric_type.value}" last_trigger = self._cooldowns.get(key) if not last_trigger: return False # Find the threshold for this metric type for threshold in self.thresholds: if threshold.metric_type == metric_type: elapsed = (datetime.now() - last_trigger).total_seconds() return elapsed < threshold.cooldown_seconds return False class AdaptationManager: """Manager for coordinating skill adaptation across multiple agents. Provides centralized tracking of adaptation events and skill reloads. """ def __init__(self, storage_dir: Path): """Initialize adaptation manager. Args: storage_dir: Root directory for storing adaptation data """ self.storage_dir = Path(storage_dir) self._hooks: Dict[str, SkillAdaptationHook] = {} def get_hook( self, run_id: str, agent_id: str, thresholds: Optional[List[AdaptationThreshold]] = None, ) -> SkillAdaptationHook: """Get or create an adaptation hook for an agent. Args: run_id: Run identifier agent_id: Agent identifier thresholds: Optional custom thresholds Returns: SkillAdaptationHook instance """ key = f"{run_id}:{agent_id}" if key not in self._hooks: self._hooks[key] = SkillAdaptationHook( storage_dir=self.storage_dir, run_id=run_id, agent_id=agent_id, thresholds=thresholds, ) return self._hooks[key] def get_all_pending_warnings(self) -> List[AdaptationEvent]: """Get all pending warnings from all hooks.""" warnings = [] for hook in self._hooks.values(): warnings.extend(hook.get_pending_warnings()) return warnings def get_run_adaptations(self, run_id: str) -> List[AdaptationEvent]: """Get all adaptation events for a run.""" events = [] for hook in self._hooks.values(): if hook.run_id == run_id: events.extend(hook.get_recent_events()) return events # Global manager instance _adaptation_manager: Optional[AdaptationManager] = None def get_adaptation_manager(storage_dir: Optional[Path] = None) -> AdaptationManager: """Get global adaptation manager instance. Args: storage_dir: Optional storage directory (required on first call) Returns: AdaptationManager instance """ global _adaptation_manager if _adaptation_manager is None: if storage_dir is None: raise ValueError("storage_dir required on first initialization") _adaptation_manager = AdaptationManager(storage_dir) return _adaptation_manager __all__ = [ "AdaptationAction", "AdaptationThreshold", "AdaptationEvent", "SkillAdaptationHook", "AdaptationManager", "get_adaptation_manager", ]