feat: Add evaluation hooks, skill adaptation and team pipeline config

- Add EvaluationHook for post-execution agent evaluation - Add SkillAdaptationHook for dynamic skill adaptation - Add team/ directory with team coordination logic - Add TEAM_PIPELINE.yaml for smoke_fullstack pipeline config - Update RuntimeView, TraderView and RuntimeSettingsPanel UI - Add runtimeApi and websocket services - Add runtime_state.json to smoke_fullstack state Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 18:52:12 +08:00
parent f4a2b7f3af
commit 4b5ac86b83
87 changed files with 5042 additions and 744 deletions
--- a/backend/llm/models.py
+++ b/backend/llm/models.py
@@ -3,9 +3,11 @@
 AgentScope Native Model Factory
 Uses native AgentScope model classes for LLM calls
 """
-from enum import Enum
-from typing import Optional, Tuple
 import os
+import time
+import logging
+from enum import Enum
+from typing import Any, Callable, Optional, Tuple, TypeVar, Union
 from agentscope.formatter import (
    AnthropicChatFormatter,
    DashScopeChatFormatter,
@@ -26,6 +28,244 @@ from backend.config.env_config import (
    get_env_str,
 )

+logger = logging.getLogger(__name__)
+
+# Retry wrapper types
+T = TypeVar("T")
+
+
+class RetryChatModel:
+    """Wraps an AgentScope model with automatic retry for transient errors.
+
+    Based on CoPaw's RetryChatModel design. Handles rate limits, timeouts,
+    and other transient failures with exponential backoff.
+    """
+
+    DEFAULT_MAX_RETRIES = 3
+    DEFAULT_INITIAL_DELAY = 1.0
+    DEFAULT_MAX_DELAY = 60.0
+    DEFAULT_BACKOFF_MULTIPLIER = 2.0
+
+    # Transient error codes/messages that should trigger retry
+    TRANSIENT_ERROR_KEYWORDS = frozenset([
+        "rate_limit",
+        "429",
+        "timeout",
+        "503",
+        "502",
+        "504",
+        "connection",
+        "temporary",
+        "overloaded",
+        "too_many_requests",
+    ])
+
+    def __init__(
+        self,
+        model: Any,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        initial_delay: float = DEFAULT_INITIAL_DELAY,
+        max_delay: float = DEFAULT_MAX_DELAY,
+        backoff_multiplier: float = DEFAULT_BACKOFF_MULTIPLIER,
+        on_retry: Optional[Callable[[int, Exception, float], None]] = None,
+    ):
+        """Initialize retry wrapper.
+
+        Args:
+            model: The underlying AgentScope model to wrap
+            max_retries: Maximum number of retry attempts
+            initial_delay: Initial delay in seconds before first retry
+            max_delay: Maximum delay between retries
+            backoff_multiplier: Multiplier for exponential backoff
+            on_retry: Optional callback(retry_count, exception, delay) for logging
+        """
+        self._model = model
+        self._max_retries = max_retries
+        self._initial_delay = initial_delay
+        self._max_delay = max_delay
+        self._backoff_multiplier = backoff_multiplier
+        self._on_retry = on_retry
+        self._total_tokens_used = 0
+        self._total_cost = 0.0
+
+    @property
+    def model_name(self) -> str:
+        return getattr(self._model, "model_name", str(self._model))
+
+    @property
+    def total_tokens_used(self) -> int:
+        return self._total_tokens_used
+
+    @property
+    def total_cost(self) -> float:
+        return self._total_cost
+
+    def _is_transient_error(self, error: Exception) -> bool:
+        """Check if an error is transient and should be retried.
+
+        Args:
+            error: The exception to check
+
+        Returns:
+            True if the error is transient
+        """
+        error_str = str(error).lower()
+        for keyword in self.TRANSIENT_ERROR_KEYWORDS:
+            if keyword in error_str:
+                return True
+        return False
+
+    def _calculate_delay(self, retry_count: int) -> float:
+        """Calculate delay for given retry attempt with exponential backoff.
+
+        Args:
+            retry_count: Current retry attempt number (1-based)
+
+        Returns:
+            Delay in seconds
+        """
+        delay = self._initial_delay * (self._backoff_multiplier ** (retry_count - 1))
+        return min(delay, self._max_delay)
+
+    def _call_with_retry(self, func: Callable[..., T], *args, **kwargs) -> T:
+        """Call a function with retry logic for transient errors.
+
+        Args:
+            func: Function to call
+            *args: Positional arguments
+            **kwargs: Keyword arguments
+
+        Returns:
+            Result from func
+
+        Raises:
+            Last exception if all retries exhausted
+        """
+        last_error: Optional[Exception] = None
+
+        for attempt in range(1, self._max_retries + 1):
+            try:
+                result = func(*args, **kwargs)
+
+                # Track usage if available
+                if hasattr(result, "usage") and result.usage:
+                    usage = result.usage
+                    self._total_tokens_used += getattr(usage, "total_tokens", 0)
+                    self._total_cost += getattr(usage, "cost", 0.0)
+
+                return result
+
+            except Exception as e:
+                last_error = e
+
+                if attempt >= self._max_retries:
+                    logger.error(
+                        "RetryChatModel: Max retries (%d) exhausted for %s",
+                        self._max_retries,
+                        self.model_name,
+                    )
+                    break
+
+                if not self._is_transient_error(e):
+                    logger.warning(
+                        "RetryChatModel: Non-transient error, not retrying: %s",
+                        str(e),
+                    )
+                    break
+
+                delay = self._calculate_delay(attempt)
+                logger.warning(
+                    "RetryChatModel: Transient error on attempt %d/%d, "
+                    "retrying in %.1fs: %s",
+                    attempt,
+                    self._max_retries,
+                    delay,
+                    str(e)[:200],
+                )
+
+                if self._on_retry:
+                    self._on_retry(attempt, e, delay)
+
+                time.sleep(delay)
+
+        if last_error is not None:
+            raise last_error
+        raise RuntimeError("RetryChatModel: Unexpected state, no error but no result")
+
+    def __call__(self, *args, **kwargs) -> Any:
+        """Forward calls to the wrapped model with retry logic."""
+        return self._call_with_retry(self._model, *args, **kwargs)
+
+    def __getattr__(self, name: str) -> Any:
+        """Proxy attribute access to the wrapped model."""
+        return getattr(self._model, name)
+
+
+class TokenRecordingModelWrapper:
+    """Wraps a model to track token usage per provider.
+
+    Based on CoPaw's TokenRecordingModelWrapper design.
+    """
+
+    def __init__(self, model: Any):
+        """Initialize token recorder.
+
+        Args:
+            model: The underlying AgentScope model to wrap
+        """
+        self._model = model
+        self._total_tokens = 0
+        self._prompt_tokens = 0
+        self._completion_tokens = 0
+        self._total_cost = 0.0
+
+    @property
+    def model_name(self) -> str:
+        return getattr(self._model, "model_name", str(self._model))
+
+    @property
+    def total_tokens(self) -> int:
+        return self._total_tokens
+
+    @property
+    def prompt_tokens(self) -> int:
+        return self._prompt_tokens
+
+    @property
+    def completion_tokens(self) -> int:
+        return self._completion_tokens
+
+    @property
+    def total_cost(self) -> float:
+        return self._total_cost
+
+    def record_usage(self, usage: Any) -> None:
+        """Record token usage from a model response.
+
+        Args:
+            usage: Usage object from model response
+        """
+        if usage is None:
+            return
+
+        self._prompt_tokens += getattr(usage, "prompt_tokens", 0)
+        self._completion_tokens += getattr(usage, "completion_tokens", 0)
+        self._total_tokens += getattr(usage, "total_tokens", 0)
+        self._total_cost += getattr(usage, "cost", 0.0)
+
+    def __call__(self, *args, **kwargs) -> Any:
+        """Forward calls and record usage."""
+        result = self._model(*args, **kwargs)
+
+        if hasattr(result, "usage") and result.usage:
+            self.record_usage(result.usage)
+
+        return result
+
+    def __getattr__(self, name: str) -> Any:
+        """Proxy attribute access to the wrapped model."""
+        return getattr(self._model, name)
+

 class ModelProvider(Enum):
    """Supported model providers"""