Files
evotraders/backend/api/runtime.py
cillin 4b5ac86b83 feat: Add evaluation hooks, skill adaptation and team pipeline config
- Add EvaluationHook for post-execution agent evaluation
- Add SkillAdaptationHook for dynamic skill adaptation
- Add team/ directory with team coordination logic
- Add TEAM_PIPELINE.yaml for smoke_fullstack pipeline config
- Update RuntimeView, TraderView and RuntimeSettingsPanel UI
- Add runtimeApi and websocket services
- Add runtime_state.json to smoke_fullstack state

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 18:52:12 +08:00

461 lines
14 KiB
Python

# -*- coding: utf-8 -*-
"""Runtime API routes - Control Plane for managing Gateway processes."""
from __future__ import annotations
import asyncio
import json
import logging
import os
import signal
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
from fastapi import APIRouter, HTTPException, BackgroundTasks
from pydantic import BaseModel, Field
from backend.runtime.agent_runtime import AgentRuntimeState
from backend.runtime.manager import TradingRuntimeManager, get_global_runtime_manager
router = APIRouter(prefix="/api/runtime", tags=["runtime"])
runtime_manager: Optional[TradingRuntimeManager] = None
PROJECT_ROOT = Path(__file__).resolve().parents[2]
# Gateway process management
_gateway_process: Optional[subprocess.Popen] = None
_gateway_port: int = 8765
class RunContextResponse(BaseModel):
config_name: str
run_dir: str
bootstrap_values: Dict[str, Any]
class RuntimeAgentState(BaseModel):
agent_id: str
status: str
last_session: Optional[str] = None
last_updated: str
class RuntimeAgentsResponse(BaseModel):
agents: List[RuntimeAgentState]
class RuntimeEvent(BaseModel):
timestamp: str
event: str
details: Dict[str, Any]
session: Optional[str]
class RuntimeEventsResponse(BaseModel):
events: List[RuntimeEvent]
class LaunchConfig(BaseModel):
"""Configuration for launching a new trading task."""
tickers: List[str] = Field(default_factory=list, description="股票池")
schedule_mode: str = Field(default="daily", description="调度模式: daily, interval")
interval_minutes: int = Field(default=60, ge=1, description="间隔分钟数")
trigger_time: str = Field(default="09:30", description="触发时间 HH:MM")
max_comm_cycles: int = Field(default=2, ge=1, description="最大会商轮数")
initial_cash: float = Field(default=100000.0, gt=0, description="初始资金")
margin_requirement: float = Field(default=0.0, ge=0, description="保证金要求")
enable_memory: bool = Field(default=False, description="是否启用长期记忆")
mode: str = Field(default="live", description="运行模式: live, backtest")
start_date: Optional[str] = Field(default=None, description="回测开始日期 YYYY-MM-DD")
end_date: Optional[str] = Field(default=None, description="回测结束日期 YYYY-MM-DD")
poll_interval: int = Field(default=10, ge=1, le=300, description="市场数据轮询间隔(秒)")
enable_mock: bool = Field(default=False, description="是否启用模拟模式(使用模拟价格数据)")
class LaunchResponse(BaseModel):
run_id: str
status: str
run_dir: str
gateway_port: int
message: str
class StopResponse(BaseModel):
status: str
message: str
class GatewayStatusResponse(BaseModel):
is_running: bool
port: int
run_id: Optional[str] = None
def _generate_run_id() -> str:
"""Generate timestamp-based run ID: YYYYMMDD_HHMMSS"""
return datetime.now().strftime("%Y%m%d_%H%M%S")
def _get_run_dir(run_id: str) -> Path:
"""Return the run directory for a given run ID."""
return PROJECT_ROOT / "runs" / run_id
def _find_available_port(start_port: int = 8765, max_port: int = 9000) -> int:
"""Find an available port for Gateway."""
import socket
for port in range(start_port, max_port):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
if s.connect_ex(('localhost', port)) != 0:
return port
raise RuntimeError("No available port found")
def _is_gateway_running() -> bool:
"""Check if Gateway process is running."""
global _gateway_process
if _gateway_process is None:
return False
return _gateway_process.poll() is None
def _stop_gateway() -> bool:
"""Stop the Gateway process."""
global _gateway_process
if _gateway_process is None:
return False
try:
# Try graceful shutdown first
_gateway_process.terminate()
try:
_gateway_process.wait(timeout=5)
except subprocess.TimeoutExpired:
# Force kill if graceful shutdown fails
_gateway_process.kill()
_gateway_process.wait()
except Exception as e:
logger.warning(f"Error during gateway shutdown: {e}")
finally:
_gateway_process = None
return True
def _start_gateway_process(
run_id: str,
run_dir: Path,
bootstrap: Dict[str, Any],
port: int
) -> subprocess.Popen:
"""Start Gateway as a separate process."""
# Prepare environment
env = os.environ.copy()
# Create command arguments
cmd = [
sys.executable,
"-m", "backend.gateway_server",
"--run-id", run_id,
"--run-dir", str(run_dir),
"--port", str(port),
"--bootstrap", json.dumps(bootstrap)
]
# Start process
process = subprocess.Popen(
cmd,
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=PROJECT_ROOT
)
return process
@router.get("/context", response_model=RunContextResponse)
async def get_run_context() -> RunContextResponse:
"""Return the most recent run context."""
snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json")
snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True)
if not snapshots:
raise HTTPException(status_code=404, detail="No run context available")
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
context = latest.get("context")
if context is None:
raise HTTPException(status_code=404, detail="Run context is not ready")
return RunContextResponse(
config_name=context["config_name"],
run_dir=context["run_dir"],
bootstrap_values=context["bootstrap_values"],
)
@router.get("/agents", response_model=RuntimeAgentsResponse)
async def get_runtime_agents() -> RuntimeAgentsResponse:
"""Return agent states from the most recent run."""
snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json")
snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True)
if not snapshots:
raise HTTPException(status_code=404, detail="No runtime state available")
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
agents = latest.get("agents", [])
return RuntimeAgentsResponse(
agents=[RuntimeAgentState(**a) for a in agents]
)
@router.get("/events", response_model=RuntimeEventsResponse)
async def get_runtime_events() -> RuntimeEventsResponse:
"""Return events from the most recent run."""
snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json")
snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True)
if not snapshots:
raise HTTPException(status_code=404, detail="No runtime state available")
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
events = latest.get("events", [])
return RuntimeEventsResponse(
events=[RuntimeEvent(**e) for e in events]
)
@router.get("/gateway/status", response_model=GatewayStatusResponse)
async def get_gateway_status() -> GatewayStatusResponse:
"""Get Gateway process status and port."""
global _gateway_port
is_running = _is_gateway_running()
run_id = None
if is_running:
# Try to find run_id from runtime state
snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json")
snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True)
if snapshots:
try:
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
run_id = latest.get("context", {}).get("config_name")
except Exception as e:
logger.warning(f"Failed to parse latest snapshot: {e}")
return GatewayStatusResponse(
is_running=is_running,
port=_gateway_port,
run_id=run_id
)
@router.get("/gateway/port")
async def get_gateway_port() -> Dict[str, Any]:
"""Get WebSocket Gateway port for frontend connection."""
global _gateway_port
return {
"port": _gateway_port,
"is_running": _is_gateway_running(),
"ws_url": f"ws://localhost:{_gateway_port}"
}
@router.post("/start", response_model=LaunchResponse)
async def start_runtime(
config: LaunchConfig,
background_tasks: BackgroundTasks
) -> LaunchResponse:
"""Start a new trading runtime with the given configuration.
1. Stop existing Gateway if running
2. Generate run ID and directory
3. Create runtime manager
4. Start Gateway as subprocess (Data Plane)
5. Return Gateway port for WebSocket connection
"""
global _gateway_process, _gateway_port
# 1. Stop existing Gateway
if _is_gateway_running():
_stop_gateway()
await asyncio.sleep(1) # Wait for port release
# 2. Generate run ID and directory
run_id = _generate_run_id()
run_dir = _get_run_dir(run_id)
# 3. Prepare bootstrap config
bootstrap = {
"tickers": config.tickers,
"schedule_mode": config.schedule_mode,
"interval_minutes": config.interval_minutes,
"trigger_time": config.trigger_time,
"max_comm_cycles": config.max_comm_cycles,
"initial_cash": config.initial_cash,
"margin_requirement": config.margin_requirement,
"enable_memory": config.enable_memory,
"mode": config.mode,
"start_date": config.start_date,
"end_date": config.end_date,
"poll_interval": config.poll_interval,
"enable_mock": config.enable_mock,
}
# 4. Create runtime manager
manager = TradingRuntimeManager(
config_name=run_id,
run_dir=run_dir,
bootstrap=bootstrap,
)
manager.prepare_run()
register_runtime_manager(manager)
# 5. Write BOOTSTRAP.md
_write_bootstrap_md(run_dir, bootstrap)
# 6. Find available port and start Gateway process
_gateway_port = _find_available_port(start_port=8765)
try:
_gateway_process = _start_gateway_process(
run_id=run_id,
run_dir=run_dir,
bootstrap=bootstrap,
port=_gateway_port
)
# Wait briefly to check if process started successfully
await asyncio.sleep(2)
if not _is_gateway_running():
stdout, stderr = _gateway_process.communicate(timeout=1)
_gateway_process = None
raise HTTPException(
status_code=500,
detail=f"Gateway failed to start: {stderr.decode() if stderr else 'Unknown error'}"
)
except Exception as e:
_stop_gateway()
raise HTTPException(status_code=500, detail=f"Failed to start Gateway: {str(e)}")
return LaunchResponse(
run_id=run_id,
status="started",
run_dir=str(run_dir),
gateway_port=_gateway_port,
message=f"Runtime started with run_id: {run_id}, Gateway on port: {_gateway_port}",
)
@router.post("/stop", response_model=StopResponse)
async def stop_runtime(force: bool = True) -> StopResponse:
"""Stop the current running runtime."""
global _gateway_process
was_running = _is_gateway_running()
if not was_running:
raise HTTPException(status_code=404, detail="No runtime is currently running")
# Stop Gateway process
_stop_gateway()
# Unregister runtime manager
unregister_runtime_manager()
return StopResponse(
status="stopped",
message="Runtime stopped successfully",
)
@router.post("/restart")
async def restart_runtime(
config: LaunchConfig,
background_tasks: BackgroundTasks
):
"""Restart the runtime with a new configuration."""
# Stop current runtime
await stop_runtime(force=True)
# Start new runtime
response = await start_runtime(config, background_tasks)
return {
"run_id": response.run_id,
"status": "restarted",
"gateway_port": response.gateway_port,
"message": f"Runtime restarted with run_id: {response.run_id}",
}
@router.get("/current")
async def get_current_runtime():
"""Get information about the currently running runtime."""
if not _is_gateway_running():
raise HTTPException(status_code=404, detail="No runtime is currently running")
# Find latest runtime state
snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json")
snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True)
if not snapshots:
raise HTTPException(status_code=404, detail="No runtime information available")
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
context = latest.get("context", {})
return {
"run_id": context.get("config_name"),
"run_dir": context.get("run_dir"),
"is_running": True,
"gateway_port": _gateway_port,
"bootstrap": context.get("bootstrap_values", {}),
}
def register_runtime_manager(manager: TradingRuntimeManager) -> None:
"""Allow other modules to expose the runtime manager to the API."""
global runtime_manager
runtime_manager = manager
def unregister_runtime_manager() -> None:
"""Drop the runtime manager reference."""
global runtime_manager
runtime_manager = None
def _write_bootstrap_md(run_dir: Path, bootstrap: Dict[str, Any]) -> None:
"""Write bootstrap configuration to BOOTSTRAP.md."""
try:
import yaml
except ImportError:
yaml = None
bootstrap_path = run_dir / "BOOTSTRAP.md"
bootstrap_path.parent.mkdir(parents=True, exist_ok=True)
# Filter out None values
values = {k: v for k, v in bootstrap.items() if v is not None}
if yaml:
front_matter = yaml.safe_dump(values, allow_unicode=True, sort_keys=False)
else:
front_matter = json.dumps(values, ensure_ascii=False, indent=2)
content = f"---\n{front_matter}---\n"
bootstrap_path.write_text(content, encoding="utf-8")