970 lines
32 KiB
Python
970 lines
32 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""Runtime API routes - Control Plane for managing Gateway processes."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
import os
|
||
import signal
|
||
import shutil
|
||
import subprocess
|
||
import sys
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
|
||
from pydantic import BaseModel, Field
|
||
|
||
from backend.runtime.agent_runtime import AgentRuntimeState
|
||
from backend.config.bootstrap_config import (
|
||
resolve_runtime_config,
|
||
update_bootstrap_values_for_run,
|
||
)
|
||
|
||
router = APIRouter(prefix="/api/runtime", tags=["runtime"])
|
||
|
||
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||
|
||
|
||
class RuntimeState:
|
||
"""Thread-safe singleton for managing runtime state.
|
||
|
||
Encapsulates runtime_manager, _gateway_process, and _gateway_port
|
||
with asyncio.Lock protection for concurrent access.
|
||
"""
|
||
|
||
_instance: Optional["RuntimeState"] = None
|
||
_lock: "threading.Lock" = __import__("threading").Lock()
|
||
|
||
def __new__(cls) -> "RuntimeState":
|
||
with cls._lock:
|
||
if cls._instance is None:
|
||
cls._instance = super().__new__(cls)
|
||
cls._instance._initialized = False
|
||
return cls._instance
|
||
|
||
def __init__(self) -> None:
|
||
if self._initialized:
|
||
return
|
||
self._runtime_manager: Optional[Any] = None
|
||
self._gateway_process: Optional[subprocess.Popen] = None
|
||
self._gateway_port: int = 8765
|
||
self._state_lock = asyncio.Lock()
|
||
self._initialized = True
|
||
|
||
@property
|
||
async def lock(self) -> asyncio.Lock:
|
||
"""Get the asyncio lock for state synchronization."""
|
||
return self._state_lock
|
||
|
||
@property
|
||
def runtime_manager(self) -> Optional[Any]:
|
||
"""Get the runtime manager (no lock - read only)."""
|
||
return self._runtime_manager
|
||
|
||
@runtime_manager.setter
|
||
def runtime_manager(self, value: Optional[Any]) -> None:
|
||
"""Set the runtime manager."""
|
||
self._runtime_manager = value
|
||
|
||
@property
|
||
def gateway_process(self) -> Optional[subprocess.Popen]:
|
||
"""Get the gateway process (no lock - read only)."""
|
||
return self._gateway_process
|
||
|
||
@gateway_process.setter
|
||
def gateway_process(self, value: Optional[subprocess.Popen]) -> None:
|
||
"""Set the gateway process."""
|
||
self._gateway_process = value
|
||
|
||
@property
|
||
def gateway_port(self) -> int:
|
||
"""Get the gateway port."""
|
||
return self._gateway_port
|
||
|
||
@gateway_port.setter
|
||
def gateway_port(self, value: int) -> None:
|
||
"""Set the gateway port."""
|
||
self._gateway_port = value
|
||
|
||
async def set_runtime_manager(self, manager: Any) -> None:
|
||
"""Set runtime manager with lock protection."""
|
||
async with self._state_lock:
|
||
self._runtime_manager = manager
|
||
|
||
async def get_runtime_manager(self) -> Optional[Any]:
|
||
"""Get runtime manager with lock protection."""
|
||
async with self._state_lock:
|
||
return self._runtime_manager
|
||
|
||
async def set_gateway_process(self, process: Optional[subprocess.Popen]) -> None:
|
||
"""Set gateway process with lock protection."""
|
||
async with self._state_lock:
|
||
self._gateway_process = process
|
||
|
||
async def get_gateway_process(self) -> Optional[subprocess.Popen]:
|
||
"""Get gateway process with lock protection."""
|
||
async with self._state_lock:
|
||
return self._gateway_process
|
||
|
||
async def set_gateway_port(self, port: int) -> None:
|
||
"""Set gateway port with lock protection."""
|
||
async with self._state_lock:
|
||
self._gateway_port = port
|
||
|
||
async def get_gateway_port(self) -> int:
|
||
"""Get gateway port with lock protection."""
|
||
async with self._state_lock:
|
||
return self._gateway_port
|
||
|
||
|
||
# Singleton instance
|
||
_runtime_state = RuntimeState()
|
||
|
||
|
||
def get_runtime_state() -> RuntimeState:
|
||
"""Get the RuntimeState singleton instance."""
|
||
return _runtime_state
|
||
|
||
|
||
# Backward compatibility: module-level runtime_manager for external imports
|
||
# This is set by register_runtime_manager() for backward compatibility
|
||
runtime_manager: Optional[Any] = None
|
||
|
||
|
||
class RunContextResponse(BaseModel):
|
||
config_name: str
|
||
run_dir: str
|
||
bootstrap_values: Dict[str, Any]
|
||
|
||
|
||
class RuntimeAgentState(BaseModel):
|
||
agent_id: str
|
||
status: str
|
||
last_session: Optional[str] = None
|
||
last_updated: str
|
||
|
||
|
||
class RuntimeAgentsResponse(BaseModel):
|
||
agents: List[RuntimeAgentState]
|
||
|
||
|
||
class RuntimeEvent(BaseModel):
|
||
timestamp: str
|
||
event: str
|
||
details: Dict[str, Any]
|
||
session: Optional[str]
|
||
|
||
|
||
class RuntimeEventsResponse(BaseModel):
|
||
events: List[RuntimeEvent]
|
||
|
||
|
||
class LaunchConfig(BaseModel):
|
||
"""Configuration for launching a new trading task."""
|
||
launch_mode: str = Field(default="fresh", description="启动形式: fresh, restore")
|
||
restore_run_id: Optional[str] = Field(default=None, description="历史任务 run_id,用于恢复启动")
|
||
tickers: List[str] = Field(default_factory=list, description="股票池")
|
||
schedule_mode: str = Field(default="daily", description="调度模式: daily, interval")
|
||
interval_minutes: int = Field(default=60, ge=1, description="间隔分钟数")
|
||
trigger_time: str = Field(default="09:30", description="触发时间 HH:MM")
|
||
max_comm_cycles: int = Field(default=2, ge=1, description="最大会商轮数")
|
||
initial_cash: float = Field(default=100000.0, gt=0, description="初始资金")
|
||
margin_requirement: float = Field(default=0.0, ge=0, description="保证金要求")
|
||
enable_memory: bool = Field(default=False, description="是否启用长期记忆")
|
||
mode: str = Field(default="live", description="运行模式: live, backtest")
|
||
start_date: Optional[str] = Field(default=None, description="回测开始日期 YYYY-MM-DD")
|
||
end_date: Optional[str] = Field(default=None, description="回测结束日期 YYYY-MM-DD")
|
||
poll_interval: int = Field(default=10, ge=1, le=300, description="市场数据轮询间隔(秒)")
|
||
|
||
|
||
class LaunchResponse(BaseModel):
|
||
run_id: str
|
||
status: str
|
||
run_dir: str
|
||
gateway_port: int
|
||
message: str
|
||
|
||
|
||
class RuntimeHistoryItem(BaseModel):
|
||
run_id: str
|
||
run_dir: str
|
||
updated_at: Optional[str] = None
|
||
total_trades: int = 0
|
||
total_asset_value: Optional[float] = None
|
||
bootstrap: Dict[str, Any] = Field(default_factory=dict)
|
||
|
||
|
||
class RuntimeHistoryResponse(BaseModel):
|
||
runs: List[RuntimeHistoryItem]
|
||
|
||
|
||
class StopResponse(BaseModel):
|
||
status: str
|
||
message: str
|
||
|
||
|
||
class CleanupResponse(BaseModel):
|
||
status: str
|
||
kept: int
|
||
pruned_run_ids: List[str]
|
||
|
||
|
||
class GatewayStatusResponse(BaseModel):
|
||
is_running: bool
|
||
port: int
|
||
run_id: Optional[str] = None
|
||
|
||
|
||
class RuntimeConfigResponse(BaseModel):
|
||
run_id: str
|
||
is_running: bool
|
||
gateway_port: int
|
||
bootstrap: Dict[str, Any]
|
||
resolved: Dict[str, Any]
|
||
|
||
|
||
class RuntimeLogResponse(BaseModel):
|
||
run_id: Optional[str] = None
|
||
is_running: bool
|
||
log_path: Optional[str] = None
|
||
content: str = ""
|
||
|
||
|
||
class UpdateRuntimeConfigRequest(BaseModel):
|
||
schedule_mode: Optional[str] = None
|
||
interval_minutes: Optional[int] = Field(default=None, ge=1)
|
||
trigger_time: Optional[str] = None
|
||
max_comm_cycles: Optional[int] = Field(default=None, ge=1)
|
||
initial_cash: Optional[float] = Field(default=None, gt=0)
|
||
margin_requirement: Optional[float] = Field(default=None, ge=0)
|
||
enable_memory: Optional[bool] = None
|
||
|
||
|
||
def _generate_run_id() -> str:
|
||
"""Generate timestamp-based run ID: YYYYMMDD_HHMMSS"""
|
||
return datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
|
||
|
||
def _get_run_dir(run_id: str) -> Path:
|
||
"""Return the run directory for a given run ID."""
|
||
return PROJECT_ROOT / "runs" / run_id
|
||
|
||
|
||
def _load_run_snapshot(run_id: str) -> Dict[str, Any]:
|
||
"""Load a specific run snapshot by run_id."""
|
||
snapshot_path = _get_run_dir(run_id) / "state" / "runtime_state.json"
|
||
if not snapshot_path.exists():
|
||
raise HTTPException(status_code=404, detail=f"Run snapshot not found: {run_id}")
|
||
return json.loads(snapshot_path.read_text(encoding="utf-8"))
|
||
|
||
|
||
def _copy_path_if_exists(src: Path, dst: Path) -> None:
|
||
if not src.exists():
|
||
return
|
||
if src.is_dir():
|
||
shutil.copytree(src, dst, dirs_exist_ok=True)
|
||
else:
|
||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||
shutil.copy2(src, dst)
|
||
|
||
|
||
def _restore_run_assets(source_run_id: str, target_run_dir: Path) -> None:
|
||
"""Seed a fresh run directory from a historical run snapshot."""
|
||
source_run_dir = _get_run_dir(source_run_id)
|
||
if not source_run_dir.exists():
|
||
raise HTTPException(status_code=404, detail=f"Source run not found: {source_run_id}")
|
||
|
||
for relative in [
|
||
"team_dashboard",
|
||
"agents",
|
||
"skills",
|
||
"memory",
|
||
"state/server_state.json",
|
||
"state/runtime.db",
|
||
"state/research.db",
|
||
]:
|
||
_copy_path_if_exists(source_run_dir / relative, target_run_dir / relative)
|
||
|
||
|
||
def _list_runs(limit: int = 50) -> list[RuntimeHistoryItem]:
|
||
runs_root = PROJECT_ROOT / "runs"
|
||
if not runs_root.exists():
|
||
return []
|
||
|
||
items: list[RuntimeHistoryItem] = []
|
||
run_dirs = sorted(
|
||
[path for path in runs_root.iterdir() if path.is_dir()],
|
||
key=lambda path: path.stat().st_mtime,
|
||
reverse=True,
|
||
)
|
||
|
||
for run_dir in run_dirs[: max(1, int(limit))]:
|
||
run_id = run_dir.name
|
||
runtime_state_path = run_dir / "state" / "runtime_state.json"
|
||
summary_path = run_dir / "team_dashboard" / "summary.json"
|
||
|
||
bootstrap: Dict[str, Any] = {}
|
||
updated_at: Optional[str] = None
|
||
total_trades = 0
|
||
total_asset_value: Optional[float] = None
|
||
|
||
if runtime_state_path.exists():
|
||
try:
|
||
snapshot = json.loads(runtime_state_path.read_text(encoding="utf-8"))
|
||
context = snapshot.get("context") or {}
|
||
bootstrap = dict(context.get("bootstrap_values") or {})
|
||
updated_at = snapshot.get("events", [{}])[-1].get("timestamp") if snapshot.get("events") else None
|
||
except Exception:
|
||
bootstrap = {}
|
||
|
||
if summary_path.exists():
|
||
try:
|
||
summary = json.loads(summary_path.read_text(encoding="utf-8"))
|
||
total_trades = int(summary.get("totalTrades") or 0)
|
||
total_asset_value = float(summary.get("totalAssetValue")) if summary.get("totalAssetValue") is not None else None
|
||
except Exception:
|
||
total_trades = 0
|
||
total_asset_value = None
|
||
|
||
items.append(
|
||
RuntimeHistoryItem(
|
||
run_id=run_id,
|
||
run_dir=str(run_dir),
|
||
updated_at=updated_at,
|
||
total_trades=total_trades,
|
||
total_asset_value=total_asset_value,
|
||
bootstrap=bootstrap,
|
||
)
|
||
)
|
||
|
||
return items
|
||
|
||
|
||
def _is_timestamped_run_dir(path: Path) -> bool:
|
||
try:
|
||
datetime.strptime(path.name, "%Y%m%d_%H%M%S")
|
||
return True
|
||
except ValueError:
|
||
return False
|
||
|
||
|
||
def _prune_old_timestamped_runs(*, keep: int = 20, exclude_run_ids: Optional[set[str]] = None) -> list[str]:
|
||
"""Prune old timestamped run directories, preserving the newest N and excluded ids."""
|
||
exclude = exclude_run_ids or set()
|
||
runs_root = PROJECT_ROOT / "runs"
|
||
if not runs_root.exists():
|
||
return []
|
||
|
||
candidates = sorted(
|
||
[
|
||
path
|
||
for path in runs_root.iterdir()
|
||
if path.is_dir() and _is_timestamped_run_dir(path) and path.name not in exclude
|
||
],
|
||
key=lambda path: path.name,
|
||
reverse=True,
|
||
)
|
||
|
||
pruned: list[str] = []
|
||
for path in candidates[max(0, keep):]:
|
||
shutil.rmtree(path, ignore_errors=True)
|
||
pruned.append(path.name)
|
||
return pruned
|
||
|
||
|
||
def _find_available_port(start_port: int = 8765, max_port: int = 9000) -> int:
|
||
"""Find an available port for Gateway."""
|
||
import socket
|
||
for port in range(start_port, max_port):
|
||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||
if s.connect_ex(('localhost', port)) != 0:
|
||
return port
|
||
raise RuntimeError("No available port found")
|
||
|
||
|
||
def _is_gateway_running() -> bool:
|
||
"""Check if Gateway process is running.
|
||
|
||
Checks both the internally-managed gateway process and falls back to
|
||
port availability (for externally-managed gateway processes).
|
||
"""
|
||
process = _runtime_state.gateway_process
|
||
if process is not None and process.poll() is None:
|
||
return True
|
||
# Fallback: check if the gateway port is in use (for externally started gateway)
|
||
import socket
|
||
try:
|
||
with socket.create_connection(("127.0.0.1", _runtime_state.gateway_port), timeout=1):
|
||
return True
|
||
except OSError:
|
||
return False
|
||
|
||
|
||
def _stop_gateway() -> bool:
|
||
"""Stop the Gateway process."""
|
||
process = _runtime_state.gateway_process
|
||
if process is None:
|
||
return False
|
||
|
||
try:
|
||
# Try graceful shutdown first
|
||
process.terminate()
|
||
try:
|
||
process.wait(timeout=5)
|
||
except subprocess.TimeoutExpired:
|
||
# Force kill if graceful shutdown fails
|
||
process.kill()
|
||
process.wait()
|
||
except Exception as e:
|
||
logger.warning(f"Error during gateway shutdown: {e}")
|
||
finally:
|
||
_runtime_state.gateway_process = None
|
||
|
||
return True
|
||
|
||
|
||
def _start_gateway_process(
|
||
run_id: str,
|
||
run_dir: Path,
|
||
bootstrap: Dict[str, Any],
|
||
port: int
|
||
) -> subprocess.Popen:
|
||
"""Start Gateway as a separate process."""
|
||
# Prepare environment
|
||
env = os.environ.copy()
|
||
|
||
# Create command arguments
|
||
cmd = [
|
||
sys.executable,
|
||
"-m", "backend.gateway_server",
|
||
"--run-id", run_id,
|
||
"--run-dir", str(run_dir),
|
||
"--port", str(port),
|
||
"--bootstrap", json.dumps(bootstrap)
|
||
]
|
||
|
||
log_path = run_dir / "logs" / "gateway.log"
|
||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
log_file = log_path.open("ab")
|
||
try:
|
||
process = subprocess.Popen(
|
||
cmd,
|
||
env=env,
|
||
stdout=log_file,
|
||
stderr=subprocess.STDOUT,
|
||
cwd=PROJECT_ROOT
|
||
)
|
||
finally:
|
||
log_file.close()
|
||
|
||
return process
|
||
|
||
|
||
@router.get("/context", response_model=RunContextResponse)
|
||
async def get_run_context() -> RunContextResponse:
|
||
"""Return active runtime context, or latest persisted context when stopped."""
|
||
snapshot = _get_active_runtime_snapshot() if _is_gateway_running() else _load_latest_runtime_snapshot()
|
||
context = snapshot.get("context")
|
||
if context is None:
|
||
raise HTTPException(status_code=404, detail="Run context is not ready")
|
||
|
||
return RunContextResponse(
|
||
config_name=context["config_name"],
|
||
run_dir=context["run_dir"],
|
||
bootstrap_values=context["bootstrap_values"],
|
||
)
|
||
|
||
|
||
@router.get("/agents", response_model=RuntimeAgentsResponse)
|
||
async def get_runtime_agents() -> RuntimeAgentsResponse:
|
||
"""Return agent states from the active runtime, or latest persisted run."""
|
||
snapshot = _get_active_runtime_snapshot() if _is_gateway_running() else _load_latest_runtime_snapshot()
|
||
agents = snapshot.get("agents", [])
|
||
|
||
return RuntimeAgentsResponse(
|
||
agents=[RuntimeAgentState(**a) for a in agents]
|
||
)
|
||
|
||
|
||
@router.get("/events", response_model=RuntimeEventsResponse)
|
||
async def get_runtime_events() -> RuntimeEventsResponse:
|
||
"""Return events from the active runtime, or latest persisted run."""
|
||
snapshot = _get_active_runtime_snapshot() if _is_gateway_running() else _load_latest_runtime_snapshot()
|
||
events = snapshot.get("events", [])
|
||
|
||
return RuntimeEventsResponse(
|
||
events=[RuntimeEvent(**e) for e in events]
|
||
)
|
||
|
||
|
||
@router.get("/history", response_model=RuntimeHistoryResponse)
|
||
async def get_runtime_history(limit: int = 20) -> RuntimeHistoryResponse:
|
||
"""List recent historical runs for restore/start selection."""
|
||
return RuntimeHistoryResponse(runs=_list_runs(limit=limit))
|
||
|
||
|
||
@router.get("/gateway/status", response_model=GatewayStatusResponse)
|
||
async def get_gateway_status() -> GatewayStatusResponse:
|
||
"""Get Gateway process status and port."""
|
||
is_running = _is_gateway_running()
|
||
run_id = None
|
||
|
||
if is_running:
|
||
try:
|
||
run_id = _get_active_runtime_context().get("config_name")
|
||
except Exception as e:
|
||
logger.warning(f"Failed to resolve active runtime context: {e}")
|
||
|
||
return GatewayStatusResponse(
|
||
is_running=is_running,
|
||
port=_runtime_state.gateway_port,
|
||
run_id=run_id
|
||
)
|
||
|
||
|
||
@router.get("/gateway/port")
|
||
async def get_gateway_port(request: Request) -> Dict[str, Any]:
|
||
"""Get WebSocket Gateway port for frontend connection."""
|
||
gateway_port = _runtime_state.gateway_port
|
||
return {
|
||
"port": gateway_port,
|
||
"is_running": _is_gateway_running(),
|
||
"ws_url": _build_gateway_ws_url(request, gateway_port),
|
||
}
|
||
|
||
|
||
@router.get("/logs", response_model=RuntimeLogResponse)
|
||
async def get_runtime_logs() -> RuntimeLogResponse:
|
||
"""Return current runtime log tail, or the latest run log if runtime is stopped."""
|
||
try:
|
||
context = _get_active_runtime_context() if _is_gateway_running() else _get_runtime_context_from_latest_snapshot()
|
||
except HTTPException:
|
||
return RuntimeLogResponse(is_running=False, content="")
|
||
|
||
run_id = str(context.get("config_name") or "").strip() or None
|
||
log_path = _get_gateway_log_path_for_run(run_id) if run_id else None
|
||
content = _read_log_tail(log_path) if log_path else ""
|
||
|
||
return RuntimeLogResponse(
|
||
run_id=run_id,
|
||
is_running=_is_gateway_running(),
|
||
log_path=str(log_path) if log_path else None,
|
||
content=content,
|
||
)
|
||
|
||
|
||
def _build_gateway_ws_url(request: Request, port: int) -> str:
|
||
"""Build a proxy-safe Gateway WebSocket URL."""
|
||
forwarded_proto = request.headers.get("x-forwarded-proto", "").split(",")[0].strip()
|
||
scheme = forwarded_proto or request.url.scheme
|
||
ws_scheme = "wss" if scheme == "https" else "ws"
|
||
|
||
forwarded_host = request.headers.get("x-forwarded-host", "").split(",")[0].strip()
|
||
host = forwarded_host or request.url.hostname or "localhost"
|
||
if ":" in host and not host.startswith("["):
|
||
host = host.split(":", 1)[0]
|
||
|
||
return f"{ws_scheme}://{host}:{port}"
|
||
|
||
|
||
def _load_latest_runtime_snapshot() -> Dict[str, Any]:
|
||
"""Load the latest persisted runtime snapshot."""
|
||
snapshots = sorted(
|
||
PROJECT_ROOT.glob("runs/*/state/runtime_state.json"),
|
||
key=lambda p: p.stat().st_mtime,
|
||
reverse=True,
|
||
)
|
||
if not snapshots:
|
||
raise HTTPException(status_code=404, detail="No runtime information available")
|
||
return json.loads(snapshots[0].read_text(encoding="utf-8"))
|
||
|
||
|
||
def _get_active_runtime_snapshot() -> Dict[str, Any]:
|
||
"""Return the active runtime snapshot, preferring in-memory manager state."""
|
||
if not _is_gateway_running():
|
||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||
|
||
manager = _runtime_state.runtime_manager
|
||
if manager is not None and hasattr(manager, "build_snapshot"):
|
||
snapshot = manager.build_snapshot()
|
||
context = snapshot.get("context") or {}
|
||
if context.get("config_name"):
|
||
return snapshot
|
||
|
||
return _load_latest_runtime_snapshot()
|
||
|
||
|
||
def _get_runtime_context_from_latest_snapshot() -> Dict[str, Any]:
|
||
"""Return the latest persisted runtime context regardless of active process state."""
|
||
latest = _load_latest_runtime_snapshot()
|
||
context = latest.get("context") or {}
|
||
if not context.get("config_name"):
|
||
raise HTTPException(status_code=404, detail="No runtime context available")
|
||
return context
|
||
|
||
|
||
def _get_gateway_log_path_for_run(run_id: str) -> Path:
|
||
return _get_run_dir(run_id) / "logs" / "gateway.log"
|
||
|
||
|
||
def _read_log_tail(path: Path, max_chars: int = 120_000) -> str:
|
||
if not path.exists() or not path.is_file():
|
||
return ""
|
||
text = path.read_text(encoding="utf-8", errors="replace")
|
||
if len(text) <= max_chars:
|
||
return text
|
||
return text[-max_chars:]
|
||
|
||
|
||
def _get_current_runtime_context() -> Dict[str, Any]:
|
||
"""Return the active runtime context from the latest snapshot."""
|
||
if not _is_gateway_running():
|
||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||
snapshot = _get_active_runtime_snapshot()
|
||
context = snapshot.get("context") or {}
|
||
if not context.get("config_name"):
|
||
raise HTTPException(status_code=404, detail="No runtime context available")
|
||
return context
|
||
|
||
|
||
def _get_active_runtime_context() -> Dict[str, Any]:
|
||
"""Return the active runtime context, preferring in-memory runtime manager state."""
|
||
return _get_current_runtime_context()
|
||
|
||
|
||
def _resolve_runtime_response(run_id: str) -> RuntimeConfigResponse:
|
||
"""Build a normalized runtime config response for the active run."""
|
||
context = _get_current_runtime_context()
|
||
bootstrap = dict(context.get("bootstrap_values") or {})
|
||
resolved = resolve_runtime_config(
|
||
project_root=PROJECT_ROOT,
|
||
config_name=run_id,
|
||
enable_memory=bool(bootstrap.get("enable_memory", False)),
|
||
schedule_mode=str(bootstrap.get("schedule_mode", "daily")),
|
||
interval_minutes=int(bootstrap.get("interval_minutes", 60) or 60),
|
||
trigger_time=str(bootstrap.get("trigger_time", "09:30") or "09:30"),
|
||
)
|
||
return RuntimeConfigResponse(
|
||
run_id=run_id,
|
||
is_running=True,
|
||
gateway_port=_runtime_state.gateway_port,
|
||
bootstrap=bootstrap,
|
||
resolved=resolved,
|
||
)
|
||
|
||
|
||
def _normalize_runtime_config_updates(
|
||
request: UpdateRuntimeConfigRequest,
|
||
) -> Dict[str, Any]:
|
||
"""Validate and normalize runtime config updates."""
|
||
updates: Dict[str, Any] = {}
|
||
|
||
if request.schedule_mode is not None:
|
||
schedule_mode = str(request.schedule_mode).strip().lower()
|
||
if schedule_mode not in {"daily", "intraday"}:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="schedule_mode must be 'daily' or 'intraday'",
|
||
)
|
||
updates["schedule_mode"] = schedule_mode
|
||
|
||
if request.interval_minutes is not None:
|
||
updates["interval_minutes"] = int(request.interval_minutes)
|
||
|
||
if request.trigger_time is not None:
|
||
trigger_time = str(request.trigger_time).strip()
|
||
if trigger_time and trigger_time != "now":
|
||
try:
|
||
datetime.strptime(trigger_time, "%H:%M")
|
||
except ValueError as exc:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="trigger_time must use HH:MM or 'now'",
|
||
) from exc
|
||
updates["trigger_time"] = trigger_time or "09:30"
|
||
|
||
if request.max_comm_cycles is not None:
|
||
updates["max_comm_cycles"] = int(request.max_comm_cycles)
|
||
|
||
if request.initial_cash is not None:
|
||
updates["initial_cash"] = float(request.initial_cash)
|
||
|
||
if request.margin_requirement is not None:
|
||
updates["margin_requirement"] = float(request.margin_requirement)
|
||
|
||
if request.enable_memory is not None:
|
||
updates["enable_memory"] = bool(request.enable_memory)
|
||
|
||
if not updates:
|
||
raise HTTPException(status_code=400, detail="No runtime config updates provided")
|
||
|
||
return updates
|
||
|
||
|
||
@router.post("/start", response_model=LaunchResponse)
|
||
async def start_runtime(
|
||
config: LaunchConfig,
|
||
background_tasks: BackgroundTasks
|
||
) -> LaunchResponse:
|
||
"""Start a new trading runtime with the given configuration.
|
||
|
||
1. Stop existing Gateway if running
|
||
2. Generate run ID and directory
|
||
3. Create runtime manager
|
||
4. Start Gateway as subprocess (Data Plane)
|
||
5. Return Gateway port for WebSocket connection
|
||
"""
|
||
# Lazy import to avoid circular dependency
|
||
from backend.runtime.manager import TradingRuntimeManager
|
||
|
||
# 1. Stop existing Gateway
|
||
if _is_gateway_running():
|
||
_stop_gateway()
|
||
await asyncio.sleep(1) # Wait for port release
|
||
|
||
launch_mode = str(config.launch_mode or "fresh").strip().lower()
|
||
if launch_mode not in {"fresh", "restore"}:
|
||
raise HTTPException(status_code=400, detail="launch_mode must be 'fresh' or 'restore'")
|
||
|
||
# 2. Resolve run ID, directory, and bootstrap
|
||
if launch_mode == "restore":
|
||
restore_run_id = str(config.restore_run_id or "").strip()
|
||
if not restore_run_id:
|
||
raise HTTPException(status_code=400, detail="restore_run_id is required when launch_mode=restore")
|
||
snapshot = _load_run_snapshot(restore_run_id)
|
||
context = snapshot.get("context") or {}
|
||
if not context.get("config_name"):
|
||
raise HTTPException(status_code=404, detail=f"Run context not found: {restore_run_id}")
|
||
run_id = restore_run_id
|
||
run_dir = _get_run_dir(run_id)
|
||
bootstrap = dict(context.get("bootstrap_values") or {})
|
||
bootstrap["launch_mode"] = "restore"
|
||
bootstrap["restore_run_id"] = restore_run_id
|
||
else:
|
||
run_id = _generate_run_id()
|
||
run_dir = _get_run_dir(run_id)
|
||
bootstrap = {
|
||
"launch_mode": "fresh",
|
||
"restore_run_id": None,
|
||
"tickers": config.tickers,
|
||
"schedule_mode": config.schedule_mode,
|
||
"interval_minutes": config.interval_minutes,
|
||
"trigger_time": config.trigger_time,
|
||
"max_comm_cycles": config.max_comm_cycles,
|
||
"initial_cash": config.initial_cash,
|
||
"margin_requirement": config.margin_requirement,
|
||
"enable_memory": config.enable_memory,
|
||
"mode": config.mode,
|
||
"start_date": config.start_date,
|
||
"end_date": config.end_date,
|
||
"poll_interval": config.poll_interval,
|
||
}
|
||
|
||
retention_keep = max(1, int(os.getenv("RUNS_RETENTION_COUNT", "20") or "20"))
|
||
pruned_run_ids = _prune_old_timestamped_runs(
|
||
keep=retention_keep,
|
||
exclude_run_ids={run_id},
|
||
)
|
||
if pruned_run_ids:
|
||
logger.info("Pruned old run directories: %s", ", ".join(pruned_run_ids))
|
||
|
||
# 4. Create runtime manager
|
||
manager = TradingRuntimeManager(
|
||
config_name=run_id,
|
||
run_dir=run_dir,
|
||
bootstrap=bootstrap,
|
||
)
|
||
manager.prepare_run()
|
||
register_runtime_manager(manager)
|
||
|
||
# 5. Write BOOTSTRAP.md
|
||
_write_bootstrap_md(run_dir, bootstrap)
|
||
|
||
# 6. Find available port and start Gateway process
|
||
gateway_port = _find_available_port(start_port=8765)
|
||
_runtime_state.gateway_port = gateway_port
|
||
|
||
try:
|
||
process = _start_gateway_process(
|
||
run_id=run_id,
|
||
run_dir=run_dir,
|
||
bootstrap=bootstrap,
|
||
port=gateway_port
|
||
)
|
||
_runtime_state.gateway_process = process
|
||
|
||
# Wait briefly to check if process started successfully
|
||
await asyncio.sleep(2)
|
||
|
||
if not _is_gateway_running():
|
||
_runtime_state.gateway_process = None
|
||
log_path = _get_gateway_log_path_for_run(run_id)
|
||
log_tail = _read_log_tail(log_path, max_chars=4000)
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Gateway failed to start: {log_tail or 'Unknown error'}"
|
||
)
|
||
|
||
except Exception as e:
|
||
_stop_gateway()
|
||
raise HTTPException(status_code=500, detail=f"Failed to start Gateway: {str(e)}")
|
||
|
||
return LaunchResponse(
|
||
run_id=run_id,
|
||
status="started",
|
||
run_dir=str(run_dir),
|
||
gateway_port=gateway_port,
|
||
message=f"Runtime started with run_id: {run_id}, Gateway on port: {gateway_port}",
|
||
)
|
||
|
||
|
||
@router.get("/config", response_model=RuntimeConfigResponse)
|
||
async def get_runtime_config() -> RuntimeConfigResponse:
|
||
"""Return the current runtime bootstrap and resolved settings."""
|
||
context = _get_current_runtime_context()
|
||
return _resolve_runtime_response(context["config_name"])
|
||
|
||
|
||
@router.put("/config", response_model=RuntimeConfigResponse)
|
||
async def update_runtime_config(
|
||
request: UpdateRuntimeConfigRequest,
|
||
) -> RuntimeConfigResponse:
|
||
"""Persist selected runtime configuration updates for the active run."""
|
||
context = _get_current_runtime_context()
|
||
run_id = context["config_name"]
|
||
updates = _normalize_runtime_config_updates(request)
|
||
updated = update_bootstrap_values_for_run(PROJECT_ROOT, run_id, updates)
|
||
|
||
manager = _runtime_state.runtime_manager
|
||
if manager is not None and getattr(manager, "config_name", None) == run_id:
|
||
manager.bootstrap.update(updates)
|
||
if getattr(manager, "context", None) is not None:
|
||
manager.context.bootstrap_values.update(updates)
|
||
if hasattr(manager, "_persist_snapshot"):
|
||
manager._persist_snapshot()
|
||
|
||
response = _resolve_runtime_response(run_id)
|
||
response.bootstrap = dict(updated.values)
|
||
return response
|
||
|
||
|
||
@router.post("/stop", response_model=StopResponse)
|
||
async def stop_runtime(force: bool = True) -> StopResponse:
|
||
"""Stop the current running runtime."""
|
||
was_running = _is_gateway_running()
|
||
|
||
if not was_running:
|
||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||
|
||
# Stop Gateway process
|
||
_stop_gateway()
|
||
|
||
# Unregister runtime manager
|
||
unregister_runtime_manager()
|
||
|
||
return StopResponse(
|
||
status="stopped",
|
||
message="Runtime stopped successfully",
|
||
)
|
||
|
||
|
||
@router.post("/cleanup", response_model=CleanupResponse)
|
||
async def cleanup_old_runs(keep: int = 20) -> CleanupResponse:
|
||
"""Prune old timestamped run directories while preserving named runs."""
|
||
keep_count = max(1, int(keep))
|
||
exclude: set[str] = set()
|
||
|
||
if _is_gateway_running():
|
||
try:
|
||
active_context = _get_active_runtime_context()
|
||
active_run_id = str(active_context.get("config_name") or "").strip()
|
||
if active_run_id:
|
||
exclude.add(active_run_id)
|
||
except HTTPException:
|
||
pass
|
||
|
||
pruned = _prune_old_timestamped_runs(keep=keep_count, exclude_run_ids=exclude)
|
||
return CleanupResponse(status="ok", kept=keep_count, pruned_run_ids=pruned)
|
||
|
||
|
||
@router.post("/restart")
|
||
async def restart_runtime(
|
||
config: LaunchConfig,
|
||
background_tasks: BackgroundTasks
|
||
):
|
||
"""Restart the runtime with a new configuration."""
|
||
# Stop current runtime
|
||
await stop_runtime(force=True)
|
||
|
||
# Start new runtime
|
||
response = await start_runtime(config, background_tasks)
|
||
|
||
return {
|
||
"run_id": response.run_id,
|
||
"status": "restarted",
|
||
"gateway_port": response.gateway_port,
|
||
"message": f"Runtime restarted with run_id: {response.run_id}",
|
||
}
|
||
|
||
|
||
@router.get("/current")
|
||
async def get_current_runtime():
|
||
"""Get information about the currently running runtime."""
|
||
if not _is_gateway_running():
|
||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||
|
||
context = _get_active_runtime_context()
|
||
|
||
return {
|
||
"run_id": context.get("config_name"),
|
||
"run_dir": context.get("run_dir"),
|
||
"is_running": True,
|
||
"gateway_port": _runtime_state.gateway_port,
|
||
"bootstrap": context.get("bootstrap_values", {}),
|
||
}
|
||
|
||
|
||
def register_runtime_manager(manager: Any) -> None:
|
||
"""Allow other modules to expose the runtime manager to the API."""
|
||
global runtime_manager
|
||
runtime_manager = manager
|
||
# Also update the RuntimeState singleton for internal consistency
|
||
_runtime_state.runtime_manager = manager
|
||
|
||
|
||
def unregister_runtime_manager() -> None:
|
||
"""Drop the runtime manager reference."""
|
||
global runtime_manager
|
||
runtime_manager = None
|
||
# Also update the RuntimeState singleton for internal consistency
|
||
_runtime_state.runtime_manager = None
|
||
|
||
|
||
def _write_bootstrap_md(run_dir: Path, bootstrap: Dict[str, Any]) -> None:
|
||
"""Write bootstrap configuration to BOOTSTRAP.md."""
|
||
try:
|
||
import yaml
|
||
except ImportError:
|
||
yaml = None
|
||
|
||
bootstrap_path = run_dir / "BOOTSTRAP.md"
|
||
bootstrap_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
# Filter out None values
|
||
values = {k: v for k, v in bootstrap.items() if v is not None}
|
||
|
||
if yaml:
|
||
front_matter = yaml.safe_dump(values, allow_unicode=True, sort_keys=False)
|
||
else:
|
||
front_matter = json.dumps(values, ensure_ascii=False, indent=2)
|
||
|
||
content = f"---\n{front_matter}---\n"
|
||
bootstrap_path.write_text(content, encoding="utf-8")
|