Files
evotraders/backend/api/runtime.py
cillin 3926a6bd07 feat: 架构修复 - P0/P1 问题全面修复
P0 修复:
- runtimeStore: 添加缺失的 lastDayHistory 字段
- Gateway/RuntimeService: 状态同步改为内存优先,消除 glob 竞态
- App.jsx: 从 3075 行重构到 ~500 行,提取 8 个独立文件

P1 修复:
- CORS: 4 个服务改为从环境变量读取允许 origins
- MarketStore: 改为模块级单例模式
- Domain 层: 删除 trading thin wrapper,保留 news 真实逻辑
- 测试: 补齐 77 个 gateway/runtime 测试

新增文件:
- backend/tests/test_gateway.py (43 tests)
- frontend/src/hooks/useWebSocketHandler.js
- frontend/src/hooks/useStockRequestCallbacks.js
- frontend/src/hooks/useAgentCallbacks.js
- frontend/src/hooks/useRuntimeCallbacks.js
- frontend/src/hooks/useWatchlistCallbacks.js
- frontend/src/components/TickerBar.jsx
- frontend/src/components/HeaderRight.jsx
- frontend/src/components/ChartTabs.jsx

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 18:45:57 +08:00

712 lines
23 KiB
Python

# -*- coding: utf-8 -*-
"""Runtime API routes - Control Plane for managing Gateway processes."""
from __future__ import annotations
import asyncio
import json
import logging
import os
import signal
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
from pydantic import BaseModel, Field
from backend.runtime.agent_runtime import AgentRuntimeState
from backend.config.bootstrap_config import (
resolve_runtime_config,
update_bootstrap_values_for_run,
)
router = APIRouter(prefix="/api/runtime", tags=["runtime"])
PROJECT_ROOT = Path(__file__).resolve().parents[2]
class RuntimeState:
"""Thread-safe singleton for managing runtime state.
Encapsulates runtime_manager, _gateway_process, and _gateway_port
with asyncio.Lock protection for concurrent access.
"""
_instance: Optional["RuntimeState"] = None
_lock: asyncio.Lock = asyncio.Lock()
def __new__(cls) -> "RuntimeState":
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self) -> None:
if self._initialized:
return
self._runtime_manager: Optional[Any] = None
self._gateway_process: Optional[subprocess.Popen] = None
self._gateway_port: int = 8765
self._state_lock = asyncio.Lock()
self._initialized = True
@property
async def lock(self) -> asyncio.Lock:
"""Get the asyncio lock for state synchronization."""
return self._state_lock
@property
def runtime_manager(self) -> Optional[Any]:
"""Get the runtime manager (no lock - read only)."""
return self._runtime_manager
@runtime_manager.setter
def runtime_manager(self, value: Optional[Any]) -> None:
"""Set the runtime manager."""
self._runtime_manager = value
@property
def gateway_process(self) -> Optional[subprocess.Popen]:
"""Get the gateway process (no lock - read only)."""
return self._gateway_process
@gateway_process.setter
def gateway_process(self, value: Optional[subprocess.Popen]) -> None:
"""Set the gateway process."""
self._gateway_process = value
@property
def gateway_port(self) -> int:
"""Get the gateway port."""
return self._gateway_port
@gateway_port.setter
def gateway_port(self, value: int) -> None:
"""Set the gateway port."""
self._gateway_port = value
async def set_runtime_manager(self, manager: Any) -> None:
"""Set runtime manager with lock protection."""
async with self._state_lock:
self._runtime_manager = manager
async def get_runtime_manager(self) -> Optional[Any]:
"""Get runtime manager with lock protection."""
async with self._state_lock:
return self._runtime_manager
async def set_gateway_process(self, process: Optional[subprocess.Popen]) -> None:
"""Set gateway process with lock protection."""
async with self._state_lock:
self._gateway_process = process
async def get_gateway_process(self) -> Optional[subprocess.Popen]:
"""Get gateway process with lock protection."""
async with self._state_lock:
return self._gateway_process
async def set_gateway_port(self, port: int) -> None:
"""Set gateway port with lock protection."""
async with self._state_lock:
self._gateway_port = port
async def get_gateway_port(self) -> int:
"""Get gateway port with lock protection."""
async with self._state_lock:
return self._gateway_port
# Singleton instance
_runtime_state = RuntimeState()
def get_runtime_state() -> RuntimeState:
"""Get the RuntimeState singleton instance."""
return _runtime_state
# Backward compatibility: module-level runtime_manager for external imports
# This is set by register_runtime_manager() for backward compatibility
runtime_manager: Optional[Any] = None
class RunContextResponse(BaseModel):
config_name: str
run_dir: str
bootstrap_values: Dict[str, Any]
class RuntimeAgentState(BaseModel):
agent_id: str
status: str
last_session: Optional[str] = None
last_updated: str
class RuntimeAgentsResponse(BaseModel):
agents: List[RuntimeAgentState]
class RuntimeEvent(BaseModel):
timestamp: str
event: str
details: Dict[str, Any]
session: Optional[str]
class RuntimeEventsResponse(BaseModel):
events: List[RuntimeEvent]
class LaunchConfig(BaseModel):
"""Configuration for launching a new trading task."""
tickers: List[str] = Field(default_factory=list, description="股票池")
schedule_mode: str = Field(default="daily", description="调度模式: daily, interval")
interval_minutes: int = Field(default=60, ge=1, description="间隔分钟数")
trigger_time: str = Field(default="09:30", description="触发时间 HH:MM")
max_comm_cycles: int = Field(default=2, ge=1, description="最大会商轮数")
initial_cash: float = Field(default=100000.0, gt=0, description="初始资金")
margin_requirement: float = Field(default=0.0, ge=0, description="保证金要求")
enable_memory: bool = Field(default=False, description="是否启用长期记忆")
mode: str = Field(default="live", description="运行模式: live, backtest")
start_date: Optional[str] = Field(default=None, description="回测开始日期 YYYY-MM-DD")
end_date: Optional[str] = Field(default=None, description="回测结束日期 YYYY-MM-DD")
poll_interval: int = Field(default=10, ge=1, le=300, description="市场数据轮询间隔(秒)")
enable_mock: bool = Field(default=False, description="是否启用模拟模式(使用模拟价格数据)")
class LaunchResponse(BaseModel):
run_id: str
status: str
run_dir: str
gateway_port: int
message: str
class StopResponse(BaseModel):
status: str
message: str
class GatewayStatusResponse(BaseModel):
is_running: bool
port: int
run_id: Optional[str] = None
class RuntimeConfigResponse(BaseModel):
run_id: str
is_running: bool
gateway_port: int
bootstrap: Dict[str, Any]
resolved: Dict[str, Any]
class UpdateRuntimeConfigRequest(BaseModel):
schedule_mode: Optional[str] = None
interval_minutes: Optional[int] = Field(default=None, ge=1)
trigger_time: Optional[str] = None
max_comm_cycles: Optional[int] = Field(default=None, ge=1)
initial_cash: Optional[float] = Field(default=None, gt=0)
margin_requirement: Optional[float] = Field(default=None, ge=0)
enable_memory: Optional[bool] = None
def _generate_run_id() -> str:
"""Generate timestamp-based run ID: YYYYMMDD_HHMMSS"""
return datetime.now().strftime("%Y%m%d_%H%M%S")
def _get_run_dir(run_id: str) -> Path:
"""Return the run directory for a given run ID."""
return PROJECT_ROOT / "runs" / run_id
def _find_available_port(start_port: int = 8765, max_port: int = 9000) -> int:
"""Find an available port for Gateway."""
import socket
for port in range(start_port, max_port):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
if s.connect_ex(('localhost', port)) != 0:
return port
raise RuntimeError("No available port found")
def _is_gateway_running() -> bool:
"""Check if Gateway process is running."""
process = _runtime_state.gateway_process
if process is None:
return False
return process.poll() is None
def _stop_gateway() -> bool:
"""Stop the Gateway process."""
process = _runtime_state.gateway_process
if process is None:
return False
try:
# Try graceful shutdown first
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
# Force kill if graceful shutdown fails
process.kill()
process.wait()
except Exception as e:
logger.warning(f"Error during gateway shutdown: {e}")
finally:
_runtime_state.gateway_process = None
return True
def _start_gateway_process(
run_id: str,
run_dir: Path,
bootstrap: Dict[str, Any],
port: int
) -> subprocess.Popen:
"""Start Gateway as a separate process."""
# Prepare environment
env = os.environ.copy()
# Create command arguments
cmd = [
sys.executable,
"-m", "backend.gateway_server",
"--run-id", run_id,
"--run-dir", str(run_dir),
"--port", str(port),
"--bootstrap", json.dumps(bootstrap)
]
# Start process
process = subprocess.Popen(
cmd,
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=PROJECT_ROOT
)
return process
@router.get("/context", response_model=RunContextResponse)
async def get_run_context() -> RunContextResponse:
"""Return the current run context from in-memory state (avoids glob race condition)."""
manager = _runtime_state.runtime_manager
if manager is None or manager.context is None:
raise HTTPException(status_code=404, detail="No run context available")
context = manager.context
return RunContextResponse(
config_name=context.config_name,
run_dir=str(context.run_dir),
bootstrap_values=context.bootstrap_values,
)
@router.get("/agents", response_model=RuntimeAgentsResponse)
async def get_runtime_agents() -> RuntimeAgentsResponse:
"""Return agent states from the in-memory runtime manager (avoids glob race condition)."""
manager = _runtime_state.runtime_manager
if manager is None:
raise HTTPException(status_code=404, detail="No runtime state available")
snapshot = manager.build_snapshot()
agents = snapshot.get("agents", [])
return RuntimeAgentsResponse(
agents=[RuntimeAgentState(**a) for a in agents]
)
@router.get("/events", response_model=RuntimeEventsResponse)
async def get_runtime_events() -> RuntimeEventsResponse:
"""Return events from the in-memory runtime manager (avoids glob race condition)."""
manager = _runtime_state.runtime_manager
if manager is None:
raise HTTPException(status_code=404, detail="No runtime state available")
snapshot = manager.build_snapshot()
events = snapshot.get("events", [])
return RuntimeEventsResponse(
events=[RuntimeEvent(**e) for e in events]
)
@router.get("/gateway/status", response_model=GatewayStatusResponse)
async def get_gateway_status() -> GatewayStatusResponse:
"""Get Gateway process status and port."""
is_running = _is_gateway_running()
run_id = None
if is_running:
# Get run_id from in-memory runtime manager (avoids glob race condition)
manager = _runtime_state.runtime_manager
if manager is not None and manager.context is not None:
run_id = manager.context.config_name
return GatewayStatusResponse(
is_running=is_running,
port=_runtime_state.gateway_port,
run_id=run_id
)
@router.get("/gateway/port")
async def get_gateway_port(request: Request) -> Dict[str, Any]:
"""Get WebSocket Gateway port for frontend connection."""
gateway_port = _runtime_state.gateway_port
return {
"port": gateway_port,
"is_running": _is_gateway_running(),
"ws_url": _build_gateway_ws_url(request, gateway_port),
}
def _build_gateway_ws_url(request: Request, port: int) -> str:
"""Build a proxy-safe Gateway WebSocket URL."""
forwarded_proto = request.headers.get("x-forwarded-proto", "").split(",")[0].strip()
scheme = forwarded_proto or request.url.scheme
ws_scheme = "wss" if scheme == "https" else "ws"
forwarded_host = request.headers.get("x-forwarded-host", "").split(",")[0].strip()
host = forwarded_host or request.url.hostname or "localhost"
if ":" in host and not host.startswith("["):
host = host.split(":", 1)[0]
return f"{ws_scheme}://{host}:{port}"
def _get_current_runtime_context() -> Dict[str, Any]:
"""Return the active runtime context from the in-memory manager (avoids glob race condition).
Falls back to file-based lookup only when the in-memory manager is not available
(e.g., after a service restart). File-based lookup is deprecated and exists
only for backward compatibility.
"""
if not _is_gateway_running():
raise HTTPException(status_code=404, detail="No runtime is currently running")
# Primary: use in-memory manager (always correct for current process)
manager = _runtime_state.runtime_manager
if manager is not None and manager.context is not None:
ctx = manager.context
return {
"config_name": ctx.config_name,
"run_dir": str(ctx.run_dir),
"bootstrap_values": ctx.bootstrap_values,
}
# Deprecated fallback: scan filesystem (only for backward compatibility
# after service restart without a restart of the runtime itself)
snapshots = sorted(
PROJECT_ROOT.glob("runs/*/state/runtime_state.json"),
key=lambda p: p.stat().st_mtime,
reverse=True,
)
if not snapshots:
raise HTTPException(status_code=404, detail="No runtime information available")
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
context = latest.get("context") or {}
if not context.get("config_name"):
raise HTTPException(status_code=404, detail="No runtime context available")
return context
def _resolve_runtime_response(run_id: str) -> RuntimeConfigResponse:
"""Build a normalized runtime config response for the active run."""
context = _get_current_runtime_context()
bootstrap = dict(context.get("bootstrap_values") or {})
resolved = resolve_runtime_config(
project_root=PROJECT_ROOT,
config_name=run_id,
enable_memory=bool(bootstrap.get("enable_memory", False)),
schedule_mode=str(bootstrap.get("schedule_mode", "daily")),
interval_minutes=int(bootstrap.get("interval_minutes", 60) or 60),
trigger_time=str(bootstrap.get("trigger_time", "09:30") or "09:30"),
)
return RuntimeConfigResponse(
run_id=run_id,
is_running=True,
gateway_port=_runtime_state.gateway_port,
bootstrap=bootstrap,
resolved=resolved,
)
def _normalize_runtime_config_updates(
request: UpdateRuntimeConfigRequest,
) -> Dict[str, Any]:
"""Validate and normalize runtime config updates."""
updates: Dict[str, Any] = {}
if request.schedule_mode is not None:
schedule_mode = str(request.schedule_mode).strip().lower()
if schedule_mode not in {"daily", "intraday"}:
raise HTTPException(
status_code=400,
detail="schedule_mode must be 'daily' or 'intraday'",
)
updates["schedule_mode"] = schedule_mode
if request.interval_minutes is not None:
updates["interval_minutes"] = int(request.interval_minutes)
if request.trigger_time is not None:
trigger_time = str(request.trigger_time).strip()
if trigger_time and trigger_time != "now":
try:
datetime.strptime(trigger_time, "%H:%M")
except ValueError as exc:
raise HTTPException(
status_code=400,
detail="trigger_time must use HH:MM or 'now'",
) from exc
updates["trigger_time"] = trigger_time or "09:30"
if request.max_comm_cycles is not None:
updates["max_comm_cycles"] = int(request.max_comm_cycles)
if request.initial_cash is not None:
updates["initial_cash"] = float(request.initial_cash)
if request.margin_requirement is not None:
updates["margin_requirement"] = float(request.margin_requirement)
if request.enable_memory is not None:
updates["enable_memory"] = bool(request.enable_memory)
if not updates:
raise HTTPException(status_code=400, detail="No runtime config updates provided")
return updates
@router.post("/start", response_model=LaunchResponse)
async def start_runtime(
config: LaunchConfig,
background_tasks: BackgroundTasks
) -> LaunchResponse:
"""Start a new trading runtime with the given configuration.
1. Stop existing Gateway if running
2. Generate run ID and directory
3. Create runtime manager
4. Start Gateway as subprocess (Data Plane)
5. Return Gateway port for WebSocket connection
"""
# Lazy import to avoid circular dependency
from backend.runtime.manager import TradingRuntimeManager
# 1. Stop existing Gateway
if _is_gateway_running():
_stop_gateway()
await asyncio.sleep(1) # Wait for port release
# 2. Generate run ID and directory
run_id = _generate_run_id()
run_dir = _get_run_dir(run_id)
# 3. Prepare bootstrap config
bootstrap = {
"tickers": config.tickers,
"schedule_mode": config.schedule_mode,
"interval_minutes": config.interval_minutes,
"trigger_time": config.trigger_time,
"max_comm_cycles": config.max_comm_cycles,
"initial_cash": config.initial_cash,
"margin_requirement": config.margin_requirement,
"enable_memory": config.enable_memory,
"mode": config.mode,
"start_date": config.start_date,
"end_date": config.end_date,
"poll_interval": config.poll_interval,
"enable_mock": config.enable_mock,
}
# 4. Create runtime manager
manager = TradingRuntimeManager(
config_name=run_id,
run_dir=run_dir,
bootstrap=bootstrap,
)
manager.prepare_run()
register_runtime_manager(manager)
# 5. Write BOOTSTRAP.md
_write_bootstrap_md(run_dir, bootstrap)
# 6. Find available port and start Gateway process
gateway_port = _find_available_port(start_port=8765)
_runtime_state.gateway_port = gateway_port
try:
process = _start_gateway_process(
run_id=run_id,
run_dir=run_dir,
bootstrap=bootstrap,
port=gateway_port
)
_runtime_state.gateway_process = process
# Wait briefly to check if process started successfully
await asyncio.sleep(2)
if not _is_gateway_running():
stdout, stderr = process.communicate(timeout=1)
_runtime_state.gateway_process = None
raise HTTPException(
status_code=500,
detail=f"Gateway failed to start: {stderr.decode() if stderr else 'Unknown error'}"
)
except Exception as e:
_stop_gateway()
raise HTTPException(status_code=500, detail=f"Failed to start Gateway: {str(e)}")
return LaunchResponse(
run_id=run_id,
status="started",
run_dir=str(run_dir),
gateway_port=gateway_port,
message=f"Runtime started with run_id: {run_id}, Gateway on port: {gateway_port}",
)
@router.get("/config", response_model=RuntimeConfigResponse)
async def get_runtime_config() -> RuntimeConfigResponse:
"""Return the current runtime bootstrap and resolved settings."""
context = _get_current_runtime_context()
return _resolve_runtime_response(context["config_name"])
@router.put("/config", response_model=RuntimeConfigResponse)
async def update_runtime_config(
request: UpdateRuntimeConfigRequest,
) -> RuntimeConfigResponse:
"""Persist selected runtime configuration updates for the active run."""
context = _get_current_runtime_context()
run_id = context["config_name"]
updates = _normalize_runtime_config_updates(request)
updated = update_bootstrap_values_for_run(PROJECT_ROOT, run_id, updates)
manager = _runtime_state.runtime_manager
if manager is not None and getattr(manager, "config_name", None) == run_id:
manager.bootstrap.update(updates)
if getattr(manager, "context", None) is not None:
manager.context.bootstrap_values.update(updates)
if hasattr(manager, "_persist_snapshot"):
manager._persist_snapshot()
response = _resolve_runtime_response(run_id)
response.bootstrap = dict(updated.values)
return response
@router.post("/stop", response_model=StopResponse)
async def stop_runtime(force: bool = True) -> StopResponse:
"""Stop the current running runtime."""
was_running = _is_gateway_running()
if not was_running:
raise HTTPException(status_code=404, detail="No runtime is currently running")
# Stop Gateway process
_stop_gateway()
# Unregister runtime manager
unregister_runtime_manager()
return StopResponse(
status="stopped",
message="Runtime stopped successfully",
)
@router.post("/restart")
async def restart_runtime(
config: LaunchConfig,
background_tasks: BackgroundTasks
):
"""Restart the runtime with a new configuration."""
# Stop current runtime
await stop_runtime(force=True)
# Start new runtime
response = await start_runtime(config, background_tasks)
return {
"run_id": response.run_id,
"status": "restarted",
"gateway_port": response.gateway_port,
"message": f"Runtime restarted with run_id: {response.run_id}",
}
@router.get("/current")
async def get_current_runtime():
"""Get information about the currently running runtime."""
if not _is_gateway_running():
raise HTTPException(status_code=404, detail="No runtime is currently running")
# Get context from in-memory manager (avoids glob race condition)
context = _get_current_runtime_context()
return {
"run_id": context.get("config_name"),
"run_dir": context.get("run_dir"),
"is_running": True,
"gateway_port": _runtime_state.gateway_port,
"bootstrap": context.get("bootstrap_values", {}),
}
def register_runtime_manager(manager: Any) -> None:
"""Allow other modules to expose the runtime manager to the API."""
global runtime_manager
runtime_manager = manager
# Also update the RuntimeState singleton for internal consistency
_runtime_state.runtime_manager = manager
def unregister_runtime_manager() -> None:
"""Drop the runtime manager reference."""
global runtime_manager
runtime_manager = None
# Also update the RuntimeState singleton for internal consistency
_runtime_state.runtime_manager = None
def _write_bootstrap_md(run_dir: Path, bootstrap: Dict[str, Any]) -> None:
"""Write bootstrap configuration to BOOTSTRAP.md."""
try:
import yaml
except ImportError:
yaml = None
bootstrap_path = run_dir / "BOOTSTRAP.md"
bootstrap_path.parent.mkdir(parents=True, exist_ok=True)
# Filter out None values
values = {k: v for k, v in bootstrap.items() if v is not None}
if yaml:
front_matter = yaml.safe_dump(values, allow_unicode=True, sort_keys=False)
else:
front_matter = json.dumps(values, ensure_ascii=False, indent=2)
content = f"---\n{front_matter}---\n"
bootstrap_path.write_text(content, encoding="utf-8")