feat(backend): Agent 系统和 API 增强

- task_delegator: 完善团队任务分发逻辑
- runtime API: 增强运行时管理功能
- skills_manager: 技能管理改进
- tool_guard: 工具调用守卫优化
- evo_agent: 核心 Agent 改进

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-23 17:46:06 +08:00
parent 3448667b79
commit 38102d0805
5 changed files with 725 additions and 165 deletions

View File

@@ -16,20 +16,123 @@ from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
from fastapi import APIRouter, HTTPException, BackgroundTasks
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
from pydantic import BaseModel, Field
from backend.runtime.agent_runtime import AgentRuntimeState
from backend.runtime.manager import TradingRuntimeManager, get_global_runtime_manager
from backend.config.bootstrap_config import (
resolve_runtime_config,
update_bootstrap_values_for_run,
)
router = APIRouter(prefix="/api/runtime", tags=["runtime"])
runtime_manager: Optional[TradingRuntimeManager] = None
PROJECT_ROOT = Path(__file__).resolve().parents[2]
# Gateway process management
_gateway_process: Optional[subprocess.Popen] = None
_gateway_port: int = 8765
class RuntimeState:
"""Thread-safe singleton for managing runtime state.
Encapsulates runtime_manager, _gateway_process, and _gateway_port
with asyncio.Lock protection for concurrent access.
"""
_instance: Optional["RuntimeState"] = None
_lock: asyncio.Lock = asyncio.Lock()
def __new__(cls) -> "RuntimeState":
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self) -> None:
if self._initialized:
return
self._runtime_manager: Optional[Any] = None
self._gateway_process: Optional[subprocess.Popen] = None
self._gateway_port: int = 8765
self._state_lock = asyncio.Lock()
self._initialized = True
@property
async def lock(self) -> asyncio.Lock:
"""Get the asyncio lock for state synchronization."""
return self._state_lock
@property
def runtime_manager(self) -> Optional[Any]:
"""Get the runtime manager (no lock - read only)."""
return self._runtime_manager
@runtime_manager.setter
def runtime_manager(self, value: Optional[Any]) -> None:
"""Set the runtime manager."""
self._runtime_manager = value
@property
def gateway_process(self) -> Optional[subprocess.Popen]:
"""Get the gateway process (no lock - read only)."""
return self._gateway_process
@gateway_process.setter
def gateway_process(self, value: Optional[subprocess.Popen]) -> None:
"""Set the gateway process."""
self._gateway_process = value
@property
def gateway_port(self) -> int:
"""Get the gateway port."""
return self._gateway_port
@gateway_port.setter
def gateway_port(self, value: int) -> None:
"""Set the gateway port."""
self._gateway_port = value
async def set_runtime_manager(self, manager: Any) -> None:
"""Set runtime manager with lock protection."""
async with self._state_lock:
self._runtime_manager = manager
async def get_runtime_manager(self) -> Optional[Any]:
"""Get runtime manager with lock protection."""
async with self._state_lock:
return self._runtime_manager
async def set_gateway_process(self, process: Optional[subprocess.Popen]) -> None:
"""Set gateway process with lock protection."""
async with self._state_lock:
self._gateway_process = process
async def get_gateway_process(self) -> Optional[subprocess.Popen]:
"""Get gateway process with lock protection."""
async with self._state_lock:
return self._gateway_process
async def set_gateway_port(self, port: int) -> None:
"""Set gateway port with lock protection."""
async with self._state_lock:
self._gateway_port = port
async def get_gateway_port(self) -> int:
"""Get gateway port with lock protection."""
async with self._state_lock:
return self._gateway_port
# Singleton instance
_runtime_state = RuntimeState()
def get_runtime_state() -> RuntimeState:
"""Get the RuntimeState singleton instance."""
return _runtime_state
# Backward compatibility: module-level runtime_manager for external imports
# This is set by register_runtime_manager() for backward compatibility
runtime_manager: Optional[Any] = None
class RunContextResponse(BaseModel):
@@ -96,6 +199,24 @@ class GatewayStatusResponse(BaseModel):
run_id: Optional[str] = None
class RuntimeConfigResponse(BaseModel):
run_id: str
is_running: bool
gateway_port: int
bootstrap: Dict[str, Any]
resolved: Dict[str, Any]
class UpdateRuntimeConfigRequest(BaseModel):
schedule_mode: Optional[str] = None
interval_minutes: Optional[int] = Field(default=None, ge=1)
trigger_time: Optional[str] = None
max_comm_cycles: Optional[int] = Field(default=None, ge=1)
initial_cash: Optional[float] = Field(default=None, gt=0)
margin_requirement: Optional[float] = Field(default=None, ge=0)
enable_memory: Optional[bool] = None
def _generate_run_id() -> str:
"""Generate timestamp-based run ID: YYYYMMDD_HHMMSS"""
return datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -118,31 +239,31 @@ def _find_available_port(start_port: int = 8765, max_port: int = 9000) -> int:
def _is_gateway_running() -> bool:
"""Check if Gateway process is running."""
global _gateway_process
if _gateway_process is None:
process = _runtime_state.gateway_process
if process is None:
return False
return _gateway_process.poll() is None
return process.poll() is None
def _stop_gateway() -> bool:
"""Stop the Gateway process."""
global _gateway_process
if _gateway_process is None:
process = _runtime_state.gateway_process
if process is None:
return False
try:
# Try graceful shutdown first
_gateway_process.terminate()
process.terminate()
try:
_gateway_process.wait(timeout=5)
process.wait(timeout=5)
except subprocess.TimeoutExpired:
# Force kill if graceful shutdown fails
_gateway_process.kill()
_gateway_process.wait()
process.kill()
process.wait()
except Exception as e:
logger.warning(f"Error during gateway shutdown: {e}")
finally:
_gateway_process = None
_runtime_state.gateway_process = None
return True
@@ -237,8 +358,6 @@ async def get_runtime_events() -> RuntimeEventsResponse:
@router.get("/gateway/status", response_model=GatewayStatusResponse)
async def get_gateway_status() -> GatewayStatusResponse:
"""Get Gateway process status and port."""
global _gateway_port
is_running = _is_gateway_running()
run_id = None
@@ -255,22 +374,128 @@ async def get_gateway_status() -> GatewayStatusResponse:
return GatewayStatusResponse(
is_running=is_running,
port=_gateway_port,
port=_runtime_state.gateway_port,
run_id=run_id
)
@router.get("/gateway/port")
async def get_gateway_port() -> Dict[str, Any]:
async def get_gateway_port(request: Request) -> Dict[str, Any]:
"""Get WebSocket Gateway port for frontend connection."""
global _gateway_port
gateway_port = _runtime_state.gateway_port
return {
"port": _gateway_port,
"port": gateway_port,
"is_running": _is_gateway_running(),
"ws_url": f"ws://localhost:{_gateway_port}"
"ws_url": _build_gateway_ws_url(request, gateway_port),
}
def _build_gateway_ws_url(request: Request, port: int) -> str:
"""Build a proxy-safe Gateway WebSocket URL."""
forwarded_proto = request.headers.get("x-forwarded-proto", "").split(",")[0].strip()
scheme = forwarded_proto or request.url.scheme
ws_scheme = "wss" if scheme == "https" else "ws"
forwarded_host = request.headers.get("x-forwarded-host", "").split(",")[0].strip()
host = forwarded_host or request.url.hostname or "localhost"
if ":" in host and not host.startswith("["):
host = host.split(":", 1)[0]
return f"{ws_scheme}://{host}:{port}"
def _load_latest_runtime_snapshot() -> Dict[str, Any]:
"""Load the latest persisted runtime snapshot."""
snapshots = sorted(
PROJECT_ROOT.glob("runs/*/state/runtime_state.json"),
key=lambda p: p.stat().st_mtime,
reverse=True,
)
if not snapshots:
raise HTTPException(status_code=404, detail="No runtime information available")
return json.loads(snapshots[0].read_text(encoding="utf-8"))
def _get_current_runtime_context() -> Dict[str, Any]:
"""Return the active runtime context from the latest snapshot."""
if not _is_gateway_running():
raise HTTPException(status_code=404, detail="No runtime is currently running")
latest = _load_latest_runtime_snapshot()
context = latest.get("context") or {}
if not context.get("config_name"):
raise HTTPException(status_code=404, detail="No runtime context available")
return context
def _resolve_runtime_response(run_id: str) -> RuntimeConfigResponse:
"""Build a normalized runtime config response for the active run."""
context = _get_current_runtime_context()
bootstrap = dict(context.get("bootstrap_values") or {})
resolved = resolve_runtime_config(
project_root=PROJECT_ROOT,
config_name=run_id,
enable_memory=bool(bootstrap.get("enable_memory", False)),
schedule_mode=str(bootstrap.get("schedule_mode", "daily")),
interval_minutes=int(bootstrap.get("interval_minutes", 60) or 60),
trigger_time=str(bootstrap.get("trigger_time", "09:30") or "09:30"),
)
return RuntimeConfigResponse(
run_id=run_id,
is_running=True,
gateway_port=_runtime_state.gateway_port,
bootstrap=bootstrap,
resolved=resolved,
)
def _normalize_runtime_config_updates(
request: UpdateRuntimeConfigRequest,
) -> Dict[str, Any]:
"""Validate and normalize runtime config updates."""
updates: Dict[str, Any] = {}
if request.schedule_mode is not None:
schedule_mode = str(request.schedule_mode).strip().lower()
if schedule_mode not in {"daily", "intraday"}:
raise HTTPException(
status_code=400,
detail="schedule_mode must be 'daily' or 'intraday'",
)
updates["schedule_mode"] = schedule_mode
if request.interval_minutes is not None:
updates["interval_minutes"] = int(request.interval_minutes)
if request.trigger_time is not None:
trigger_time = str(request.trigger_time).strip()
if trigger_time and trigger_time != "now":
try:
datetime.strptime(trigger_time, "%H:%M")
except ValueError as exc:
raise HTTPException(
status_code=400,
detail="trigger_time must use HH:MM or 'now'",
) from exc
updates["trigger_time"] = trigger_time or "09:30"
if request.max_comm_cycles is not None:
updates["max_comm_cycles"] = int(request.max_comm_cycles)
if request.initial_cash is not None:
updates["initial_cash"] = float(request.initial_cash)
if request.margin_requirement is not None:
updates["margin_requirement"] = float(request.margin_requirement)
if request.enable_memory is not None:
updates["enable_memory"] = bool(request.enable_memory)
if not updates:
raise HTTPException(status_code=400, detail="No runtime config updates provided")
return updates
@router.post("/start", response_model=LaunchResponse)
async def start_runtime(
config: LaunchConfig,
@@ -284,7 +509,8 @@ async def start_runtime(
4. Start Gateway as subprocess (Data Plane)
5. Return Gateway port for WebSocket connection
"""
global _gateway_process, _gateway_port
# Lazy import to avoid circular dependency
from backend.runtime.manager import TradingRuntimeManager
# 1. Stop existing Gateway
if _is_gateway_running():
@@ -325,22 +551,24 @@ async def start_runtime(
_write_bootstrap_md(run_dir, bootstrap)
# 6. Find available port and start Gateway process
_gateway_port = _find_available_port(start_port=8765)
gateway_port = _find_available_port(start_port=8765)
_runtime_state.gateway_port = gateway_port
try:
_gateway_process = _start_gateway_process(
process = _start_gateway_process(
run_id=run_id,
run_dir=run_dir,
bootstrap=bootstrap,
port=_gateway_port
port=gateway_port
)
_runtime_state.gateway_process = process
# Wait briefly to check if process started successfully
await asyncio.sleep(2)
if not _is_gateway_running():
stdout, stderr = _gateway_process.communicate(timeout=1)
_gateway_process = None
stdout, stderr = process.communicate(timeout=1)
_runtime_state.gateway_process = None
raise HTTPException(
status_code=500,
detail=f"Gateway failed to start: {stderr.decode() if stderr else 'Unknown error'}"
@@ -354,16 +582,44 @@ async def start_runtime(
run_id=run_id,
status="started",
run_dir=str(run_dir),
gateway_port=_gateway_port,
message=f"Runtime started with run_id: {run_id}, Gateway on port: {_gateway_port}",
gateway_port=gateway_port,
message=f"Runtime started with run_id: {run_id}, Gateway on port: {gateway_port}",
)
@router.get("/config", response_model=RuntimeConfigResponse)
async def get_runtime_config() -> RuntimeConfigResponse:
"""Return the current runtime bootstrap and resolved settings."""
context = _get_current_runtime_context()
return _resolve_runtime_response(context["config_name"])
@router.put("/config", response_model=RuntimeConfigResponse)
async def update_runtime_config(
request: UpdateRuntimeConfigRequest,
) -> RuntimeConfigResponse:
"""Persist selected runtime configuration updates for the active run."""
context = _get_current_runtime_context()
run_id = context["config_name"]
updates = _normalize_runtime_config_updates(request)
updated = update_bootstrap_values_for_run(PROJECT_ROOT, run_id, updates)
manager = _runtime_state.runtime_manager
if manager is not None and getattr(manager, "config_name", None) == run_id:
manager.bootstrap.update(updates)
if getattr(manager, "context", None) is not None:
manager.context.bootstrap_values.update(updates)
if hasattr(manager, "_persist_snapshot"):
manager._persist_snapshot()
response = _resolve_runtime_response(run_id)
response.bootstrap = dict(updated.values)
return response
@router.post("/stop", response_model=StopResponse)
async def stop_runtime(force: bool = True) -> StopResponse:
"""Stop the current running runtime."""
global _gateway_process
was_running = _is_gateway_running()
if not was_running:
@@ -421,21 +677,25 @@ async def get_current_runtime():
"run_id": context.get("config_name"),
"run_dir": context.get("run_dir"),
"is_running": True,
"gateway_port": _gateway_port,
"gateway_port": _runtime_state.gateway_port,
"bootstrap": context.get("bootstrap_values", {}),
}
def register_runtime_manager(manager: TradingRuntimeManager) -> None:
def register_runtime_manager(manager: Any) -> None:
"""Allow other modules to expose the runtime manager to the API."""
global runtime_manager
runtime_manager = manager
# Also update the RuntimeState singleton for internal consistency
_runtime_state.runtime_manager = manager
def unregister_runtime_manager() -> None:
"""Drop the runtime manager reference."""
global runtime_manager
runtime_manager = None
# Also update the RuntimeState singleton for internal consistency
_runtime_state.runtime_manager = None
def _write_bootstrap_md(run_dir: Path, bootstrap: Dict[str, Any]) -> None: