# -*- coding: utf-8 -*- """Runtime API routes - Control Plane for managing Gateway processes.""" from __future__ import annotations import asyncio import json import logging import os import signal import subprocess import sys from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) from fastapi import APIRouter, HTTPException, BackgroundTasks from pydantic import BaseModel, Field from backend.runtime.agent_runtime import AgentRuntimeState from backend.runtime.manager import TradingRuntimeManager, get_global_runtime_manager router = APIRouter(prefix="/api/runtime", tags=["runtime"]) runtime_manager: Optional[TradingRuntimeManager] = None PROJECT_ROOT = Path(__file__).resolve().parents[2] # Gateway process management _gateway_process: Optional[subprocess.Popen] = None _gateway_port: int = 8765 class RunContextResponse(BaseModel): config_name: str run_dir: str bootstrap_values: Dict[str, Any] class RuntimeAgentState(BaseModel): agent_id: str status: str last_session: Optional[str] = None last_updated: str class RuntimeAgentsResponse(BaseModel): agents: List[RuntimeAgentState] class RuntimeEvent(BaseModel): timestamp: str event: str details: Dict[str, Any] session: Optional[str] class RuntimeEventsResponse(BaseModel): events: List[RuntimeEvent] class LaunchConfig(BaseModel): """Configuration for launching a new trading task.""" tickers: List[str] = Field(default_factory=list, description="股票池") schedule_mode: str = Field(default="daily", description="调度模式: daily, interval") interval_minutes: int = Field(default=60, ge=1, description="间隔分钟数") trigger_time: str = Field(default="09:30", description="触发时间 HH:MM") max_comm_cycles: int = Field(default=2, ge=1, description="最大会商轮数") initial_cash: float = Field(default=100000.0, gt=0, description="初始资金") margin_requirement: float = Field(default=0.0, ge=0, description="保证金要求") enable_memory: bool = Field(default=False, description="是否启用长期记忆") mode: str = Field(default="live", description="运行模式: live, backtest") start_date: Optional[str] = Field(default=None, description="回测开始日期 YYYY-MM-DD") end_date: Optional[str] = Field(default=None, description="回测结束日期 YYYY-MM-DD") poll_interval: int = Field(default=10, ge=1, le=300, description="市场数据轮询间隔(秒)") enable_mock: bool = Field(default=False, description="是否启用模拟模式(使用模拟价格数据)") class LaunchResponse(BaseModel): run_id: str status: str run_dir: str gateway_port: int message: str class StopResponse(BaseModel): status: str message: str class GatewayStatusResponse(BaseModel): is_running: bool port: int run_id: Optional[str] = None def _generate_run_id() -> str: """Generate timestamp-based run ID: YYYYMMDD_HHMMSS""" return datetime.now().strftime("%Y%m%d_%H%M%S") def _get_run_dir(run_id: str) -> Path: """Return the run directory for a given run ID.""" return PROJECT_ROOT / "runs" / run_id def _find_available_port(start_port: int = 8765, max_port: int = 9000) -> int: """Find an available port for Gateway.""" import socket for port in range(start_port, max_port): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: if s.connect_ex(('localhost', port)) != 0: return port raise RuntimeError("No available port found") def _is_gateway_running() -> bool: """Check if Gateway process is running.""" global _gateway_process if _gateway_process is None: return False return _gateway_process.poll() is None def _stop_gateway() -> bool: """Stop the Gateway process.""" global _gateway_process if _gateway_process is None: return False try: # Try graceful shutdown first _gateway_process.terminate() try: _gateway_process.wait(timeout=5) except subprocess.TimeoutExpired: # Force kill if graceful shutdown fails _gateway_process.kill() _gateway_process.wait() except Exception as e: logger.warning(f"Error during gateway shutdown: {e}") finally: _gateway_process = None return True def _start_gateway_process( run_id: str, run_dir: Path, bootstrap: Dict[str, Any], port: int ) -> subprocess.Popen: """Start Gateway as a separate process.""" # Prepare environment env = os.environ.copy() # Create command arguments cmd = [ sys.executable, "-m", "backend.gateway_server", "--run-id", run_id, "--run-dir", str(run_dir), "--port", str(port), "--bootstrap", json.dumps(bootstrap) ] # Start process process = subprocess.Popen( cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=PROJECT_ROOT ) return process @router.get("/context", response_model=RunContextResponse) async def get_run_context() -> RunContextResponse: """Return the most recent run context.""" snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json") snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True) if not snapshots: raise HTTPException(status_code=404, detail="No run context available") latest = json.loads(snapshots[0].read_text(encoding="utf-8")) context = latest.get("context") if context is None: raise HTTPException(status_code=404, detail="Run context is not ready") return RunContextResponse( config_name=context["config_name"], run_dir=context["run_dir"], bootstrap_values=context["bootstrap_values"], ) @router.get("/agents", response_model=RuntimeAgentsResponse) async def get_runtime_agents() -> RuntimeAgentsResponse: """Return agent states from the most recent run.""" snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json") snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True) if not snapshots: raise HTTPException(status_code=404, detail="No runtime state available") latest = json.loads(snapshots[0].read_text(encoding="utf-8")) agents = latest.get("agents", []) return RuntimeAgentsResponse( agents=[RuntimeAgentState(**a) for a in agents] ) @router.get("/events", response_model=RuntimeEventsResponse) async def get_runtime_events() -> RuntimeEventsResponse: """Return events from the most recent run.""" snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json") snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True) if not snapshots: raise HTTPException(status_code=404, detail="No runtime state available") latest = json.loads(snapshots[0].read_text(encoding="utf-8")) events = latest.get("events", []) return RuntimeEventsResponse( events=[RuntimeEvent(**e) for e in events] ) @router.get("/gateway/status", response_model=GatewayStatusResponse) async def get_gateway_status() -> GatewayStatusResponse: """Get Gateway process status and port.""" global _gateway_port is_running = _is_gateway_running() run_id = None if is_running: # Try to find run_id from runtime state snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json") snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True) if snapshots: try: latest = json.loads(snapshots[0].read_text(encoding="utf-8")) run_id = latest.get("context", {}).get("config_name") except Exception as e: logger.warning(f"Failed to parse latest snapshot: {e}") return GatewayStatusResponse( is_running=is_running, port=_gateway_port, run_id=run_id ) @router.get("/gateway/port") async def get_gateway_port() -> Dict[str, Any]: """Get WebSocket Gateway port for frontend connection.""" global _gateway_port return { "port": _gateway_port, "is_running": _is_gateway_running(), "ws_url": f"ws://localhost:{_gateway_port}" } @router.post("/start", response_model=LaunchResponse) async def start_runtime( config: LaunchConfig, background_tasks: BackgroundTasks ) -> LaunchResponse: """Start a new trading runtime with the given configuration. 1. Stop existing Gateway if running 2. Generate run ID and directory 3. Create runtime manager 4. Start Gateway as subprocess (Data Plane) 5. Return Gateway port for WebSocket connection """ global _gateway_process, _gateway_port # 1. Stop existing Gateway if _is_gateway_running(): _stop_gateway() await asyncio.sleep(1) # Wait for port release # 2. Generate run ID and directory run_id = _generate_run_id() run_dir = _get_run_dir(run_id) # 3. Prepare bootstrap config bootstrap = { "tickers": config.tickers, "schedule_mode": config.schedule_mode, "interval_minutes": config.interval_minutes, "trigger_time": config.trigger_time, "max_comm_cycles": config.max_comm_cycles, "initial_cash": config.initial_cash, "margin_requirement": config.margin_requirement, "enable_memory": config.enable_memory, "mode": config.mode, "start_date": config.start_date, "end_date": config.end_date, "poll_interval": config.poll_interval, "enable_mock": config.enable_mock, } # 4. Create runtime manager manager = TradingRuntimeManager( config_name=run_id, run_dir=run_dir, bootstrap=bootstrap, ) manager.prepare_run() register_runtime_manager(manager) # 5. Write BOOTSTRAP.md _write_bootstrap_md(run_dir, bootstrap) # 6. Find available port and start Gateway process _gateway_port = _find_available_port(start_port=8765) try: _gateway_process = _start_gateway_process( run_id=run_id, run_dir=run_dir, bootstrap=bootstrap, port=_gateway_port ) # Wait briefly to check if process started successfully await asyncio.sleep(2) if not _is_gateway_running(): stdout, stderr = _gateway_process.communicate(timeout=1) _gateway_process = None raise HTTPException( status_code=500, detail=f"Gateway failed to start: {stderr.decode() if stderr else 'Unknown error'}" ) except Exception as e: _stop_gateway() raise HTTPException(status_code=500, detail=f"Failed to start Gateway: {str(e)}") return LaunchResponse( run_id=run_id, status="started", run_dir=str(run_dir), gateway_port=_gateway_port, message=f"Runtime started with run_id: {run_id}, Gateway on port: {_gateway_port}", ) @router.post("/stop", response_model=StopResponse) async def stop_runtime(force: bool = True) -> StopResponse: """Stop the current running runtime.""" global _gateway_process was_running = _is_gateway_running() if not was_running: raise HTTPException(status_code=404, detail="No runtime is currently running") # Stop Gateway process _stop_gateway() # Unregister runtime manager unregister_runtime_manager() return StopResponse( status="stopped", message="Runtime stopped successfully", ) @router.post("/restart") async def restart_runtime( config: LaunchConfig, background_tasks: BackgroundTasks ): """Restart the runtime with a new configuration.""" # Stop current runtime await stop_runtime(force=True) # Start new runtime response = await start_runtime(config, background_tasks) return { "run_id": response.run_id, "status": "restarted", "gateway_port": response.gateway_port, "message": f"Runtime restarted with run_id: {response.run_id}", } @router.get("/current") async def get_current_runtime(): """Get information about the currently running runtime.""" if not _is_gateway_running(): raise HTTPException(status_code=404, detail="No runtime is currently running") # Find latest runtime state snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json") snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True) if not snapshots: raise HTTPException(status_code=404, detail="No runtime information available") latest = json.loads(snapshots[0].read_text(encoding="utf-8")) context = latest.get("context", {}) return { "run_id": context.get("config_name"), "run_dir": context.get("run_dir"), "is_running": True, "gateway_port": _gateway_port, "bootstrap": context.get("bootstrap_values", {}), } def register_runtime_manager(manager: TradingRuntimeManager) -> None: """Allow other modules to expose the runtime manager to the API.""" global runtime_manager runtime_manager = manager def unregister_runtime_manager() -> None: """Drop the runtime manager reference.""" global runtime_manager runtime_manager = None def _write_bootstrap_md(run_dir: Path, bootstrap: Dict[str, Any]) -> None: """Write bootstrap configuration to BOOTSTRAP.md.""" try: import yaml except ImportError: yaml = None bootstrap_path = run_dir / "BOOTSTRAP.md" bootstrap_path.parent.mkdir(parents=True, exist_ok=True) # Filter out None values values = {k: v for k, v in bootstrap.items() if v is not None} if yaml: front_matter = yaml.safe_dump(values, allow_unicode=True, sort_keys=False) else: front_matter = json.dumps(values, ensure_ascii=False, indent=2) content = f"---\n{front_matter}---\n" bootstrap_path.write_text(content, encoding="utf-8")