Fix runtime logging and frontend app regressions
This commit is contained in:
@@ -3,6 +3,8 @@
|
||||
AgentScope Native Model Factory
|
||||
Uses native AgentScope model classes for LLM calls
|
||||
"""
|
||||
import asyncio
|
||||
import inspect
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
@@ -34,6 +36,27 @@ logger = logging.getLogger(__name__)
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def _usage_value(usage: Any, key: str, default: Any = 0) -> Any:
|
||||
"""Read usage fields from both object-style and dict-style usage payloads."""
|
||||
if usage is None:
|
||||
return default
|
||||
if isinstance(usage, dict):
|
||||
return usage.get(key, default)
|
||||
try:
|
||||
return getattr(usage, key)
|
||||
except (AttributeError, KeyError):
|
||||
return default
|
||||
|
||||
|
||||
def _usage_total_tokens(usage: Any) -> int:
|
||||
total = _usage_value(usage, "total_tokens", None)
|
||||
if total is not None:
|
||||
return int(total or 0)
|
||||
input_tokens = _usage_value(usage, "input_tokens", 0)
|
||||
output_tokens = _usage_value(usage, "output_tokens", 0)
|
||||
return int((input_tokens or 0) + (output_tokens or 0))
|
||||
|
||||
|
||||
class RetryChatModel:
|
||||
"""Wraps an AgentScope model with automatic retry for transient errors.
|
||||
|
||||
@@ -55,6 +78,7 @@ class RetryChatModel:
|
||||
"502",
|
||||
"504",
|
||||
"connection",
|
||||
"disconnected",
|
||||
"temporary",
|
||||
"overloaded",
|
||||
"too_many_requests",
|
||||
@@ -150,8 +174,8 @@ class RetryChatModel:
|
||||
# Track usage if available
|
||||
if hasattr(result, "usage") and result.usage:
|
||||
usage = result.usage
|
||||
self._total_tokens_used += getattr(usage, "total_tokens", 0)
|
||||
self._total_cost += getattr(usage, "cost", 0.0)
|
||||
self._total_tokens_used += _usage_total_tokens(usage)
|
||||
self._total_cost += float(_usage_value(usage, "cost", 0.0) or 0.0)
|
||||
|
||||
return result
|
||||
|
||||
@@ -192,9 +216,66 @@ class RetryChatModel:
|
||||
raise last_error
|
||||
raise RuntimeError("RetryChatModel: Unexpected state, no error but no result")
|
||||
|
||||
async def _call_with_retry_async(self, func: Callable[..., T], *args, **kwargs) -> T:
|
||||
"""Call an async function with retry logic for transient errors."""
|
||||
last_error: Optional[Exception] = None
|
||||
|
||||
for attempt in range(1, self._max_retries + 1):
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
|
||||
if hasattr(result, "usage") and result.usage:
|
||||
usage = result.usage
|
||||
self._total_tokens_used += _usage_total_tokens(usage)
|
||||
self._total_cost += float(_usage_value(usage, "cost", 0.0) or 0.0)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
|
||||
if attempt >= self._max_retries:
|
||||
logger.error(
|
||||
"RetryChatModel: Max retries (%d) exhausted for %s",
|
||||
self._max_retries,
|
||||
self.model_name,
|
||||
)
|
||||
break
|
||||
|
||||
if not self._is_transient_error(e):
|
||||
logger.warning(
|
||||
"RetryChatModel: Non-transient error, not retrying: %s",
|
||||
str(e),
|
||||
)
|
||||
break
|
||||
|
||||
delay = self._calculate_delay(attempt)
|
||||
logger.warning(
|
||||
"RetryChatModel: Transient async error on attempt %d/%d, "
|
||||
"retrying in %.1fs: %s",
|
||||
attempt,
|
||||
self._max_retries,
|
||||
delay,
|
||||
str(e)[:200],
|
||||
)
|
||||
|
||||
if self._on_retry:
|
||||
self._on_retry(attempt, e, delay)
|
||||
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
if last_error is not None:
|
||||
raise last_error
|
||||
raise RuntimeError("RetryChatModel: Unexpected async state, no error but no result")
|
||||
|
||||
def __call__(self, *args, **kwargs) -> Any:
|
||||
"""Forward calls to the wrapped model with retry logic."""
|
||||
return self._call_with_retry(self._model, *args, **kwargs)
|
||||
model_call = getattr(self._model, "__call__", None)
|
||||
if inspect.iscoroutinefunction(self._model) or inspect.iscoroutinefunction(model_call):
|
||||
return self._call_with_retry_async(self._model, *args, **kwargs)
|
||||
|
||||
result = self._model(*args, **kwargs)
|
||||
return result
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
"""Proxy attribute access to the wrapped model."""
|
||||
@@ -248,10 +329,18 @@ class TokenRecordingModelWrapper:
|
||||
if usage is None:
|
||||
return
|
||||
|
||||
self._prompt_tokens += getattr(usage, "prompt_tokens", 0)
|
||||
self._completion_tokens += getattr(usage, "completion_tokens", 0)
|
||||
self._total_tokens += getattr(usage, "total_tokens", 0)
|
||||
self._total_cost += getattr(usage, "cost", 0.0)
|
||||
prompt_tokens = _usage_value(usage, "prompt_tokens", None)
|
||||
completion_tokens = _usage_value(usage, "completion_tokens", None)
|
||||
|
||||
if prompt_tokens is None:
|
||||
prompt_tokens = _usage_value(usage, "input_tokens", 0)
|
||||
if completion_tokens is None:
|
||||
completion_tokens = _usage_value(usage, "output_tokens", 0)
|
||||
|
||||
self._prompt_tokens += int(prompt_tokens or 0)
|
||||
self._completion_tokens += int(completion_tokens or 0)
|
||||
self._total_tokens += _usage_total_tokens(usage)
|
||||
self._total_cost += float(_usage_value(usage, "cost", 0.0) or 0.0)
|
||||
|
||||
def __call__(self, *args, **kwargs) -> Any:
|
||||
"""Forward calls and record usage."""
|
||||
@@ -401,7 +490,8 @@ def create_model(
|
||||
if host:
|
||||
model_kwargs["host"] = host
|
||||
|
||||
return model_class(**model_kwargs)
|
||||
model = model_class(**model_kwargs)
|
||||
return RetryChatModel(model)
|
||||
|
||||
|
||||
def get_agent_model(agent_id: str, stream: bool = False):
|
||||
|
||||
Reference in New Issue
Block a user