Initial commit of integrated agent system
This commit is contained in:
969
backend/api/runtime.py
Normal file
969
backend/api/runtime.py
Normal file
@@ -0,0 +1,969 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Runtime API routes - Control Plane for managing Gateway processes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from backend.runtime.agent_runtime import AgentRuntimeState
|
||||
from backend.config.bootstrap_config import (
|
||||
resolve_runtime_config,
|
||||
update_bootstrap_values_for_run,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/api/runtime", tags=["runtime"])
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
class RuntimeState:
|
||||
"""Thread-safe singleton for managing runtime state.
|
||||
|
||||
Encapsulates runtime_manager, _gateway_process, and _gateway_port
|
||||
with asyncio.Lock protection for concurrent access.
|
||||
"""
|
||||
|
||||
_instance: Optional["RuntimeState"] = None
|
||||
_lock: "threading.Lock" = __import__("threading").Lock()
|
||||
|
||||
def __new__(cls) -> "RuntimeState":
|
||||
with cls._lock:
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def __init__(self) -> None:
|
||||
if self._initialized:
|
||||
return
|
||||
self._runtime_manager: Optional[Any] = None
|
||||
self._gateway_process: Optional[subprocess.Popen] = None
|
||||
self._gateway_port: int = 8765
|
||||
self._state_lock = asyncio.Lock()
|
||||
self._initialized = True
|
||||
|
||||
@property
|
||||
async def lock(self) -> asyncio.Lock:
|
||||
"""Get the asyncio lock for state synchronization."""
|
||||
return self._state_lock
|
||||
|
||||
@property
|
||||
def runtime_manager(self) -> Optional[Any]:
|
||||
"""Get the runtime manager (no lock - read only)."""
|
||||
return self._runtime_manager
|
||||
|
||||
@runtime_manager.setter
|
||||
def runtime_manager(self, value: Optional[Any]) -> None:
|
||||
"""Set the runtime manager."""
|
||||
self._runtime_manager = value
|
||||
|
||||
@property
|
||||
def gateway_process(self) -> Optional[subprocess.Popen]:
|
||||
"""Get the gateway process (no lock - read only)."""
|
||||
return self._gateway_process
|
||||
|
||||
@gateway_process.setter
|
||||
def gateway_process(self, value: Optional[subprocess.Popen]) -> None:
|
||||
"""Set the gateway process."""
|
||||
self._gateway_process = value
|
||||
|
||||
@property
|
||||
def gateway_port(self) -> int:
|
||||
"""Get the gateway port."""
|
||||
return self._gateway_port
|
||||
|
||||
@gateway_port.setter
|
||||
def gateway_port(self, value: int) -> None:
|
||||
"""Set the gateway port."""
|
||||
self._gateway_port = value
|
||||
|
||||
async def set_runtime_manager(self, manager: Any) -> None:
|
||||
"""Set runtime manager with lock protection."""
|
||||
async with self._state_lock:
|
||||
self._runtime_manager = manager
|
||||
|
||||
async def get_runtime_manager(self) -> Optional[Any]:
|
||||
"""Get runtime manager with lock protection."""
|
||||
async with self._state_lock:
|
||||
return self._runtime_manager
|
||||
|
||||
async def set_gateway_process(self, process: Optional[subprocess.Popen]) -> None:
|
||||
"""Set gateway process with lock protection."""
|
||||
async with self._state_lock:
|
||||
self._gateway_process = process
|
||||
|
||||
async def get_gateway_process(self) -> Optional[subprocess.Popen]:
|
||||
"""Get gateway process with lock protection."""
|
||||
async with self._state_lock:
|
||||
return self._gateway_process
|
||||
|
||||
async def set_gateway_port(self, port: int) -> None:
|
||||
"""Set gateway port with lock protection."""
|
||||
async with self._state_lock:
|
||||
self._gateway_port = port
|
||||
|
||||
async def get_gateway_port(self) -> int:
|
||||
"""Get gateway port with lock protection."""
|
||||
async with self._state_lock:
|
||||
return self._gateway_port
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_runtime_state = RuntimeState()
|
||||
|
||||
|
||||
def get_runtime_state() -> RuntimeState:
|
||||
"""Get the RuntimeState singleton instance."""
|
||||
return _runtime_state
|
||||
|
||||
|
||||
# Backward compatibility: module-level runtime_manager for external imports
|
||||
# This is set by register_runtime_manager() for backward compatibility
|
||||
runtime_manager: Optional[Any] = None
|
||||
|
||||
|
||||
class RunContextResponse(BaseModel):
|
||||
config_name: str
|
||||
run_dir: str
|
||||
bootstrap_values: Dict[str, Any]
|
||||
|
||||
|
||||
class RuntimeAgentState(BaseModel):
|
||||
agent_id: str
|
||||
status: str
|
||||
last_session: Optional[str] = None
|
||||
last_updated: str
|
||||
|
||||
|
||||
class RuntimeAgentsResponse(BaseModel):
|
||||
agents: List[RuntimeAgentState]
|
||||
|
||||
|
||||
class RuntimeEvent(BaseModel):
|
||||
timestamp: str
|
||||
event: str
|
||||
details: Dict[str, Any]
|
||||
session: Optional[str]
|
||||
|
||||
|
||||
class RuntimeEventsResponse(BaseModel):
|
||||
events: List[RuntimeEvent]
|
||||
|
||||
|
||||
class LaunchConfig(BaseModel):
|
||||
"""Configuration for launching a new trading task."""
|
||||
launch_mode: str = Field(default="fresh", description="启动形式: fresh, restore")
|
||||
restore_run_id: Optional[str] = Field(default=None, description="历史任务 run_id,用于恢复启动")
|
||||
tickers: List[str] = Field(default_factory=list, description="股票池")
|
||||
schedule_mode: str = Field(default="daily", description="调度模式: daily, interval")
|
||||
interval_minutes: int = Field(default=60, ge=1, description="间隔分钟数")
|
||||
trigger_time: str = Field(default="09:30", description="触发时间 HH:MM")
|
||||
max_comm_cycles: int = Field(default=2, ge=1, description="最大会商轮数")
|
||||
initial_cash: float = Field(default=100000.0, gt=0, description="初始资金")
|
||||
margin_requirement: float = Field(default=0.0, ge=0, description="保证金要求")
|
||||
enable_memory: bool = Field(default=False, description="是否启用长期记忆")
|
||||
mode: str = Field(default="live", description="运行模式: live, backtest")
|
||||
start_date: Optional[str] = Field(default=None, description="回测开始日期 YYYY-MM-DD")
|
||||
end_date: Optional[str] = Field(default=None, description="回测结束日期 YYYY-MM-DD")
|
||||
poll_interval: int = Field(default=10, ge=1, le=300, description="市场数据轮询间隔(秒)")
|
||||
|
||||
|
||||
class LaunchResponse(BaseModel):
|
||||
run_id: str
|
||||
status: str
|
||||
run_dir: str
|
||||
gateway_port: int
|
||||
message: str
|
||||
|
||||
|
||||
class RuntimeHistoryItem(BaseModel):
|
||||
run_id: str
|
||||
run_dir: str
|
||||
updated_at: Optional[str] = None
|
||||
total_trades: int = 0
|
||||
total_asset_value: Optional[float] = None
|
||||
bootstrap: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class RuntimeHistoryResponse(BaseModel):
|
||||
runs: List[RuntimeHistoryItem]
|
||||
|
||||
|
||||
class StopResponse(BaseModel):
|
||||
status: str
|
||||
message: str
|
||||
|
||||
|
||||
class CleanupResponse(BaseModel):
|
||||
status: str
|
||||
kept: int
|
||||
pruned_run_ids: List[str]
|
||||
|
||||
|
||||
class GatewayStatusResponse(BaseModel):
|
||||
is_running: bool
|
||||
port: int
|
||||
run_id: Optional[str] = None
|
||||
|
||||
|
||||
class RuntimeConfigResponse(BaseModel):
|
||||
run_id: str
|
||||
is_running: bool
|
||||
gateway_port: int
|
||||
bootstrap: Dict[str, Any]
|
||||
resolved: Dict[str, Any]
|
||||
|
||||
|
||||
class RuntimeLogResponse(BaseModel):
|
||||
run_id: Optional[str] = None
|
||||
is_running: bool
|
||||
log_path: Optional[str] = None
|
||||
content: str = ""
|
||||
|
||||
|
||||
class UpdateRuntimeConfigRequest(BaseModel):
|
||||
schedule_mode: Optional[str] = None
|
||||
interval_minutes: Optional[int] = Field(default=None, ge=1)
|
||||
trigger_time: Optional[str] = None
|
||||
max_comm_cycles: Optional[int] = Field(default=None, ge=1)
|
||||
initial_cash: Optional[float] = Field(default=None, gt=0)
|
||||
margin_requirement: Optional[float] = Field(default=None, ge=0)
|
||||
enable_memory: Optional[bool] = None
|
||||
|
||||
|
||||
def _generate_run_id() -> str:
|
||||
"""Generate timestamp-based run ID: YYYYMMDD_HHMMSS"""
|
||||
return datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
|
||||
def _get_run_dir(run_id: str) -> Path:
|
||||
"""Return the run directory for a given run ID."""
|
||||
return PROJECT_ROOT / "runs" / run_id
|
||||
|
||||
|
||||
def _load_run_snapshot(run_id: str) -> Dict[str, Any]:
|
||||
"""Load a specific run snapshot by run_id."""
|
||||
snapshot_path = _get_run_dir(run_id) / "state" / "runtime_state.json"
|
||||
if not snapshot_path.exists():
|
||||
raise HTTPException(status_code=404, detail=f"Run snapshot not found: {run_id}")
|
||||
return json.loads(snapshot_path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _copy_path_if_exists(src: Path, dst: Path) -> None:
|
||||
if not src.exists():
|
||||
return
|
||||
if src.is_dir():
|
||||
shutil.copytree(src, dst, dirs_exist_ok=True)
|
||||
else:
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src, dst)
|
||||
|
||||
|
||||
def _restore_run_assets(source_run_id: str, target_run_dir: Path) -> None:
|
||||
"""Seed a fresh run directory from a historical run snapshot."""
|
||||
source_run_dir = _get_run_dir(source_run_id)
|
||||
if not source_run_dir.exists():
|
||||
raise HTTPException(status_code=404, detail=f"Source run not found: {source_run_id}")
|
||||
|
||||
for relative in [
|
||||
"team_dashboard",
|
||||
"agents",
|
||||
"skills",
|
||||
"memory",
|
||||
"state/server_state.json",
|
||||
"state/runtime.db",
|
||||
"state/research.db",
|
||||
]:
|
||||
_copy_path_if_exists(source_run_dir / relative, target_run_dir / relative)
|
||||
|
||||
|
||||
def _list_runs(limit: int = 50) -> list[RuntimeHistoryItem]:
|
||||
runs_root = PROJECT_ROOT / "runs"
|
||||
if not runs_root.exists():
|
||||
return []
|
||||
|
||||
items: list[RuntimeHistoryItem] = []
|
||||
run_dirs = sorted(
|
||||
[path for path in runs_root.iterdir() if path.is_dir()],
|
||||
key=lambda path: path.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
for run_dir in run_dirs[: max(1, int(limit))]:
|
||||
run_id = run_dir.name
|
||||
runtime_state_path = run_dir / "state" / "runtime_state.json"
|
||||
summary_path = run_dir / "team_dashboard" / "summary.json"
|
||||
|
||||
bootstrap: Dict[str, Any] = {}
|
||||
updated_at: Optional[str] = None
|
||||
total_trades = 0
|
||||
total_asset_value: Optional[float] = None
|
||||
|
||||
if runtime_state_path.exists():
|
||||
try:
|
||||
snapshot = json.loads(runtime_state_path.read_text(encoding="utf-8"))
|
||||
context = snapshot.get("context") or {}
|
||||
bootstrap = dict(context.get("bootstrap_values") or {})
|
||||
updated_at = snapshot.get("events", [{}])[-1].get("timestamp") if snapshot.get("events") else None
|
||||
except Exception:
|
||||
bootstrap = {}
|
||||
|
||||
if summary_path.exists():
|
||||
try:
|
||||
summary = json.loads(summary_path.read_text(encoding="utf-8"))
|
||||
total_trades = int(summary.get("totalTrades") or 0)
|
||||
total_asset_value = float(summary.get("totalAssetValue")) if summary.get("totalAssetValue") is not None else None
|
||||
except Exception:
|
||||
total_trades = 0
|
||||
total_asset_value = None
|
||||
|
||||
items.append(
|
||||
RuntimeHistoryItem(
|
||||
run_id=run_id,
|
||||
run_dir=str(run_dir),
|
||||
updated_at=updated_at,
|
||||
total_trades=total_trades,
|
||||
total_asset_value=total_asset_value,
|
||||
bootstrap=bootstrap,
|
||||
)
|
||||
)
|
||||
|
||||
return items
|
||||
|
||||
|
||||
def _is_timestamped_run_dir(path: Path) -> bool:
|
||||
try:
|
||||
datetime.strptime(path.name, "%Y%m%d_%H%M%S")
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _prune_old_timestamped_runs(*, keep: int = 20, exclude_run_ids: Optional[set[str]] = None) -> list[str]:
|
||||
"""Prune old timestamped run directories, preserving the newest N and excluded ids."""
|
||||
exclude = exclude_run_ids or set()
|
||||
runs_root = PROJECT_ROOT / "runs"
|
||||
if not runs_root.exists():
|
||||
return []
|
||||
|
||||
candidates = sorted(
|
||||
[
|
||||
path
|
||||
for path in runs_root.iterdir()
|
||||
if path.is_dir() and _is_timestamped_run_dir(path) and path.name not in exclude
|
||||
],
|
||||
key=lambda path: path.name,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
pruned: list[str] = []
|
||||
for path in candidates[max(0, keep):]:
|
||||
shutil.rmtree(path, ignore_errors=True)
|
||||
pruned.append(path.name)
|
||||
return pruned
|
||||
|
||||
|
||||
def _find_available_port(start_port: int = 8765, max_port: int = 9000) -> int:
|
||||
"""Find an available port for Gateway."""
|
||||
import socket
|
||||
for port in range(start_port, max_port):
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
if s.connect_ex(('localhost', port)) != 0:
|
||||
return port
|
||||
raise RuntimeError("No available port found")
|
||||
|
||||
|
||||
def _is_gateway_running() -> bool:
|
||||
"""Check if Gateway process is running.
|
||||
|
||||
Checks both the internally-managed gateway process and falls back to
|
||||
port availability (for externally-managed gateway processes).
|
||||
"""
|
||||
process = _runtime_state.gateway_process
|
||||
if process is not None and process.poll() is None:
|
||||
return True
|
||||
# Fallback: check if the gateway port is in use (for externally started gateway)
|
||||
import socket
|
||||
try:
|
||||
with socket.create_connection(("127.0.0.1", _runtime_state.gateway_port), timeout=1):
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
def _stop_gateway() -> bool:
|
||||
"""Stop the Gateway process."""
|
||||
process = _runtime_state.gateway_process
|
||||
if process is None:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Try graceful shutdown first
|
||||
process.terminate()
|
||||
try:
|
||||
process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
# Force kill if graceful shutdown fails
|
||||
process.kill()
|
||||
process.wait()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error during gateway shutdown: {e}")
|
||||
finally:
|
||||
_runtime_state.gateway_process = None
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _start_gateway_process(
|
||||
run_id: str,
|
||||
run_dir: Path,
|
||||
bootstrap: Dict[str, Any],
|
||||
port: int
|
||||
) -> subprocess.Popen:
|
||||
"""Start Gateway as a separate process."""
|
||||
# Prepare environment
|
||||
env = os.environ.copy()
|
||||
|
||||
# Create command arguments
|
||||
cmd = [
|
||||
sys.executable,
|
||||
"-m", "backend.gateway_server",
|
||||
"--run-id", run_id,
|
||||
"--run-dir", str(run_dir),
|
||||
"--port", str(port),
|
||||
"--bootstrap", json.dumps(bootstrap)
|
||||
]
|
||||
|
||||
log_path = run_dir / "logs" / "gateway.log"
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
log_file = log_path.open("ab")
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
env=env,
|
||||
stdout=log_file,
|
||||
stderr=subprocess.STDOUT,
|
||||
cwd=PROJECT_ROOT
|
||||
)
|
||||
finally:
|
||||
log_file.close()
|
||||
|
||||
return process
|
||||
|
||||
|
||||
@router.get("/context", response_model=RunContextResponse)
|
||||
async def get_run_context() -> RunContextResponse:
|
||||
"""Return active runtime context, or latest persisted context when stopped."""
|
||||
snapshot = _get_active_runtime_snapshot() if _is_gateway_running() else _load_latest_runtime_snapshot()
|
||||
context = snapshot.get("context")
|
||||
if context is None:
|
||||
raise HTTPException(status_code=404, detail="Run context is not ready")
|
||||
|
||||
return RunContextResponse(
|
||||
config_name=context["config_name"],
|
||||
run_dir=context["run_dir"],
|
||||
bootstrap_values=context["bootstrap_values"],
|
||||
)
|
||||
|
||||
|
||||
@router.get("/agents", response_model=RuntimeAgentsResponse)
|
||||
async def get_runtime_agents() -> RuntimeAgentsResponse:
|
||||
"""Return agent states from the active runtime, or latest persisted run."""
|
||||
snapshot = _get_active_runtime_snapshot() if _is_gateway_running() else _load_latest_runtime_snapshot()
|
||||
agents = snapshot.get("agents", [])
|
||||
|
||||
return RuntimeAgentsResponse(
|
||||
agents=[RuntimeAgentState(**a) for a in agents]
|
||||
)
|
||||
|
||||
|
||||
@router.get("/events", response_model=RuntimeEventsResponse)
|
||||
async def get_runtime_events() -> RuntimeEventsResponse:
|
||||
"""Return events from the active runtime, or latest persisted run."""
|
||||
snapshot = _get_active_runtime_snapshot() if _is_gateway_running() else _load_latest_runtime_snapshot()
|
||||
events = snapshot.get("events", [])
|
||||
|
||||
return RuntimeEventsResponse(
|
||||
events=[RuntimeEvent(**e) for e in events]
|
||||
)
|
||||
|
||||
|
||||
@router.get("/history", response_model=RuntimeHistoryResponse)
|
||||
async def get_runtime_history(limit: int = 20) -> RuntimeHistoryResponse:
|
||||
"""List recent historical runs for restore/start selection."""
|
||||
return RuntimeHistoryResponse(runs=_list_runs(limit=limit))
|
||||
|
||||
|
||||
@router.get("/gateway/status", response_model=GatewayStatusResponse)
|
||||
async def get_gateway_status() -> GatewayStatusResponse:
|
||||
"""Get Gateway process status and port."""
|
||||
is_running = _is_gateway_running()
|
||||
run_id = None
|
||||
|
||||
if is_running:
|
||||
try:
|
||||
run_id = _get_active_runtime_context().get("config_name")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to resolve active runtime context: {e}")
|
||||
|
||||
return GatewayStatusResponse(
|
||||
is_running=is_running,
|
||||
port=_runtime_state.gateway_port,
|
||||
run_id=run_id
|
||||
)
|
||||
|
||||
|
||||
@router.get("/gateway/port")
|
||||
async def get_gateway_port(request: Request) -> Dict[str, Any]:
|
||||
"""Get WebSocket Gateway port for frontend connection."""
|
||||
gateway_port = _runtime_state.gateway_port
|
||||
return {
|
||||
"port": gateway_port,
|
||||
"is_running": _is_gateway_running(),
|
||||
"ws_url": _build_gateway_ws_url(request, gateway_port),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/logs", response_model=RuntimeLogResponse)
|
||||
async def get_runtime_logs() -> RuntimeLogResponse:
|
||||
"""Return current runtime log tail, or the latest run log if runtime is stopped."""
|
||||
try:
|
||||
context = _get_active_runtime_context() if _is_gateway_running() else _get_runtime_context_from_latest_snapshot()
|
||||
except HTTPException:
|
||||
return RuntimeLogResponse(is_running=False, content="")
|
||||
|
||||
run_id = str(context.get("config_name") or "").strip() or None
|
||||
log_path = _get_gateway_log_path_for_run(run_id) if run_id else None
|
||||
content = _read_log_tail(log_path) if log_path else ""
|
||||
|
||||
return RuntimeLogResponse(
|
||||
run_id=run_id,
|
||||
is_running=_is_gateway_running(),
|
||||
log_path=str(log_path) if log_path else None,
|
||||
content=content,
|
||||
)
|
||||
|
||||
|
||||
def _build_gateway_ws_url(request: Request, port: int) -> str:
|
||||
"""Build a proxy-safe Gateway WebSocket URL."""
|
||||
forwarded_proto = request.headers.get("x-forwarded-proto", "").split(",")[0].strip()
|
||||
scheme = forwarded_proto or request.url.scheme
|
||||
ws_scheme = "wss" if scheme == "https" else "ws"
|
||||
|
||||
forwarded_host = request.headers.get("x-forwarded-host", "").split(",")[0].strip()
|
||||
host = forwarded_host or request.url.hostname or "localhost"
|
||||
if ":" in host and not host.startswith("["):
|
||||
host = host.split(":", 1)[0]
|
||||
|
||||
return f"{ws_scheme}://{host}:{port}"
|
||||
|
||||
|
||||
def _load_latest_runtime_snapshot() -> Dict[str, Any]:
|
||||
"""Load the latest persisted runtime snapshot."""
|
||||
snapshots = sorted(
|
||||
PROJECT_ROOT.glob("runs/*/state/runtime_state.json"),
|
||||
key=lambda p: p.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
if not snapshots:
|
||||
raise HTTPException(status_code=404, detail="No runtime information available")
|
||||
return json.loads(snapshots[0].read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _get_active_runtime_snapshot() -> Dict[str, Any]:
|
||||
"""Return the active runtime snapshot, preferring in-memory manager state."""
|
||||
if not _is_gateway_running():
|
||||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||||
|
||||
manager = _runtime_state.runtime_manager
|
||||
if manager is not None and hasattr(manager, "build_snapshot"):
|
||||
snapshot = manager.build_snapshot()
|
||||
context = snapshot.get("context") or {}
|
||||
if context.get("config_name"):
|
||||
return snapshot
|
||||
|
||||
return _load_latest_runtime_snapshot()
|
||||
|
||||
|
||||
def _get_runtime_context_from_latest_snapshot() -> Dict[str, Any]:
|
||||
"""Return the latest persisted runtime context regardless of active process state."""
|
||||
latest = _load_latest_runtime_snapshot()
|
||||
context = latest.get("context") or {}
|
||||
if not context.get("config_name"):
|
||||
raise HTTPException(status_code=404, detail="No runtime context available")
|
||||
return context
|
||||
|
||||
|
||||
def _get_gateway_log_path_for_run(run_id: str) -> Path:
|
||||
return _get_run_dir(run_id) / "logs" / "gateway.log"
|
||||
|
||||
|
||||
def _read_log_tail(path: Path, max_chars: int = 120_000) -> str:
|
||||
if not path.exists() or not path.is_file():
|
||||
return ""
|
||||
text = path.read_text(encoding="utf-8", errors="replace")
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
return text[-max_chars:]
|
||||
|
||||
|
||||
def _get_current_runtime_context() -> Dict[str, Any]:
|
||||
"""Return the active runtime context from the latest snapshot."""
|
||||
if not _is_gateway_running():
|
||||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||||
snapshot = _get_active_runtime_snapshot()
|
||||
context = snapshot.get("context") or {}
|
||||
if not context.get("config_name"):
|
||||
raise HTTPException(status_code=404, detail="No runtime context available")
|
||||
return context
|
||||
|
||||
|
||||
def _get_active_runtime_context() -> Dict[str, Any]:
|
||||
"""Return the active runtime context, preferring in-memory runtime manager state."""
|
||||
return _get_current_runtime_context()
|
||||
|
||||
|
||||
def _resolve_runtime_response(run_id: str) -> RuntimeConfigResponse:
|
||||
"""Build a normalized runtime config response for the active run."""
|
||||
context = _get_current_runtime_context()
|
||||
bootstrap = dict(context.get("bootstrap_values") or {})
|
||||
resolved = resolve_runtime_config(
|
||||
project_root=PROJECT_ROOT,
|
||||
config_name=run_id,
|
||||
enable_memory=bool(bootstrap.get("enable_memory", False)),
|
||||
schedule_mode=str(bootstrap.get("schedule_mode", "daily")),
|
||||
interval_minutes=int(bootstrap.get("interval_minutes", 60) or 60),
|
||||
trigger_time=str(bootstrap.get("trigger_time", "09:30") or "09:30"),
|
||||
)
|
||||
return RuntimeConfigResponse(
|
||||
run_id=run_id,
|
||||
is_running=True,
|
||||
gateway_port=_runtime_state.gateway_port,
|
||||
bootstrap=bootstrap,
|
||||
resolved=resolved,
|
||||
)
|
||||
|
||||
|
||||
def _normalize_runtime_config_updates(
|
||||
request: UpdateRuntimeConfigRequest,
|
||||
) -> Dict[str, Any]:
|
||||
"""Validate and normalize runtime config updates."""
|
||||
updates: Dict[str, Any] = {}
|
||||
|
||||
if request.schedule_mode is not None:
|
||||
schedule_mode = str(request.schedule_mode).strip().lower()
|
||||
if schedule_mode not in {"daily", "intraday"}:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="schedule_mode must be 'daily' or 'intraday'",
|
||||
)
|
||||
updates["schedule_mode"] = schedule_mode
|
||||
|
||||
if request.interval_minutes is not None:
|
||||
updates["interval_minutes"] = int(request.interval_minutes)
|
||||
|
||||
if request.trigger_time is not None:
|
||||
trigger_time = str(request.trigger_time).strip()
|
||||
if trigger_time and trigger_time != "now":
|
||||
try:
|
||||
datetime.strptime(trigger_time, "%H:%M")
|
||||
except ValueError as exc:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="trigger_time must use HH:MM or 'now'",
|
||||
) from exc
|
||||
updates["trigger_time"] = trigger_time or "09:30"
|
||||
|
||||
if request.max_comm_cycles is not None:
|
||||
updates["max_comm_cycles"] = int(request.max_comm_cycles)
|
||||
|
||||
if request.initial_cash is not None:
|
||||
updates["initial_cash"] = float(request.initial_cash)
|
||||
|
||||
if request.margin_requirement is not None:
|
||||
updates["margin_requirement"] = float(request.margin_requirement)
|
||||
|
||||
if request.enable_memory is not None:
|
||||
updates["enable_memory"] = bool(request.enable_memory)
|
||||
|
||||
if not updates:
|
||||
raise HTTPException(status_code=400, detail="No runtime config updates provided")
|
||||
|
||||
return updates
|
||||
|
||||
|
||||
@router.post("/start", response_model=LaunchResponse)
|
||||
async def start_runtime(
|
||||
config: LaunchConfig,
|
||||
background_tasks: BackgroundTasks
|
||||
) -> LaunchResponse:
|
||||
"""Start a new trading runtime with the given configuration.
|
||||
|
||||
1. Stop existing Gateway if running
|
||||
2. Generate run ID and directory
|
||||
3. Create runtime manager
|
||||
4. Start Gateway as subprocess (Data Plane)
|
||||
5. Return Gateway port for WebSocket connection
|
||||
"""
|
||||
# Lazy import to avoid circular dependency
|
||||
from backend.runtime.manager import TradingRuntimeManager
|
||||
|
||||
# 1. Stop existing Gateway
|
||||
if _is_gateway_running():
|
||||
_stop_gateway()
|
||||
await asyncio.sleep(1) # Wait for port release
|
||||
|
||||
launch_mode = str(config.launch_mode or "fresh").strip().lower()
|
||||
if launch_mode not in {"fresh", "restore"}:
|
||||
raise HTTPException(status_code=400, detail="launch_mode must be 'fresh' or 'restore'")
|
||||
|
||||
# 2. Resolve run ID, directory, and bootstrap
|
||||
if launch_mode == "restore":
|
||||
restore_run_id = str(config.restore_run_id or "").strip()
|
||||
if not restore_run_id:
|
||||
raise HTTPException(status_code=400, detail="restore_run_id is required when launch_mode=restore")
|
||||
snapshot = _load_run_snapshot(restore_run_id)
|
||||
context = snapshot.get("context") or {}
|
||||
if not context.get("config_name"):
|
||||
raise HTTPException(status_code=404, detail=f"Run context not found: {restore_run_id}")
|
||||
run_id = restore_run_id
|
||||
run_dir = _get_run_dir(run_id)
|
||||
bootstrap = dict(context.get("bootstrap_values") or {})
|
||||
bootstrap["launch_mode"] = "restore"
|
||||
bootstrap["restore_run_id"] = restore_run_id
|
||||
else:
|
||||
run_id = _generate_run_id()
|
||||
run_dir = _get_run_dir(run_id)
|
||||
bootstrap = {
|
||||
"launch_mode": "fresh",
|
||||
"restore_run_id": None,
|
||||
"tickers": config.tickers,
|
||||
"schedule_mode": config.schedule_mode,
|
||||
"interval_minutes": config.interval_minutes,
|
||||
"trigger_time": config.trigger_time,
|
||||
"max_comm_cycles": config.max_comm_cycles,
|
||||
"initial_cash": config.initial_cash,
|
||||
"margin_requirement": config.margin_requirement,
|
||||
"enable_memory": config.enable_memory,
|
||||
"mode": config.mode,
|
||||
"start_date": config.start_date,
|
||||
"end_date": config.end_date,
|
||||
"poll_interval": config.poll_interval,
|
||||
}
|
||||
|
||||
retention_keep = max(1, int(os.getenv("RUNS_RETENTION_COUNT", "20") or "20"))
|
||||
pruned_run_ids = _prune_old_timestamped_runs(
|
||||
keep=retention_keep,
|
||||
exclude_run_ids={run_id},
|
||||
)
|
||||
if pruned_run_ids:
|
||||
logger.info("Pruned old run directories: %s", ", ".join(pruned_run_ids))
|
||||
|
||||
# 4. Create runtime manager
|
||||
manager = TradingRuntimeManager(
|
||||
config_name=run_id,
|
||||
run_dir=run_dir,
|
||||
bootstrap=bootstrap,
|
||||
)
|
||||
manager.prepare_run()
|
||||
register_runtime_manager(manager)
|
||||
|
||||
# 5. Write BOOTSTRAP.md
|
||||
_write_bootstrap_md(run_dir, bootstrap)
|
||||
|
||||
# 6. Find available port and start Gateway process
|
||||
gateway_port = _find_available_port(start_port=8765)
|
||||
_runtime_state.gateway_port = gateway_port
|
||||
|
||||
try:
|
||||
process = _start_gateway_process(
|
||||
run_id=run_id,
|
||||
run_dir=run_dir,
|
||||
bootstrap=bootstrap,
|
||||
port=gateway_port
|
||||
)
|
||||
_runtime_state.gateway_process = process
|
||||
|
||||
# Wait briefly to check if process started successfully
|
||||
await asyncio.sleep(2)
|
||||
|
||||
if not _is_gateway_running():
|
||||
_runtime_state.gateway_process = None
|
||||
log_path = _get_gateway_log_path_for_run(run_id)
|
||||
log_tail = _read_log_tail(log_path, max_chars=4000)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Gateway failed to start: {log_tail or 'Unknown error'}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
_stop_gateway()
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start Gateway: {str(e)}")
|
||||
|
||||
return LaunchResponse(
|
||||
run_id=run_id,
|
||||
status="started",
|
||||
run_dir=str(run_dir),
|
||||
gateway_port=gateway_port,
|
||||
message=f"Runtime started with run_id: {run_id}, Gateway on port: {gateway_port}",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/config", response_model=RuntimeConfigResponse)
|
||||
async def get_runtime_config() -> RuntimeConfigResponse:
|
||||
"""Return the current runtime bootstrap and resolved settings."""
|
||||
context = _get_current_runtime_context()
|
||||
return _resolve_runtime_response(context["config_name"])
|
||||
|
||||
|
||||
@router.put("/config", response_model=RuntimeConfigResponse)
|
||||
async def update_runtime_config(
|
||||
request: UpdateRuntimeConfigRequest,
|
||||
) -> RuntimeConfigResponse:
|
||||
"""Persist selected runtime configuration updates for the active run."""
|
||||
context = _get_current_runtime_context()
|
||||
run_id = context["config_name"]
|
||||
updates = _normalize_runtime_config_updates(request)
|
||||
updated = update_bootstrap_values_for_run(PROJECT_ROOT, run_id, updates)
|
||||
|
||||
manager = _runtime_state.runtime_manager
|
||||
if manager is not None and getattr(manager, "config_name", None) == run_id:
|
||||
manager.bootstrap.update(updates)
|
||||
if getattr(manager, "context", None) is not None:
|
||||
manager.context.bootstrap_values.update(updates)
|
||||
if hasattr(manager, "_persist_snapshot"):
|
||||
manager._persist_snapshot()
|
||||
|
||||
response = _resolve_runtime_response(run_id)
|
||||
response.bootstrap = dict(updated.values)
|
||||
return response
|
||||
|
||||
|
||||
@router.post("/stop", response_model=StopResponse)
|
||||
async def stop_runtime(force: bool = True) -> StopResponse:
|
||||
"""Stop the current running runtime."""
|
||||
was_running = _is_gateway_running()
|
||||
|
||||
if not was_running:
|
||||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||||
|
||||
# Stop Gateway process
|
||||
_stop_gateway()
|
||||
|
||||
# Unregister runtime manager
|
||||
unregister_runtime_manager()
|
||||
|
||||
return StopResponse(
|
||||
status="stopped",
|
||||
message="Runtime stopped successfully",
|
||||
)
|
||||
|
||||
|
||||
@router.post("/cleanup", response_model=CleanupResponse)
|
||||
async def cleanup_old_runs(keep: int = 20) -> CleanupResponse:
|
||||
"""Prune old timestamped run directories while preserving named runs."""
|
||||
keep_count = max(1, int(keep))
|
||||
exclude: set[str] = set()
|
||||
|
||||
if _is_gateway_running():
|
||||
try:
|
||||
active_context = _get_active_runtime_context()
|
||||
active_run_id = str(active_context.get("config_name") or "").strip()
|
||||
if active_run_id:
|
||||
exclude.add(active_run_id)
|
||||
except HTTPException:
|
||||
pass
|
||||
|
||||
pruned = _prune_old_timestamped_runs(keep=keep_count, exclude_run_ids=exclude)
|
||||
return CleanupResponse(status="ok", kept=keep_count, pruned_run_ids=pruned)
|
||||
|
||||
|
||||
@router.post("/restart")
|
||||
async def restart_runtime(
|
||||
config: LaunchConfig,
|
||||
background_tasks: BackgroundTasks
|
||||
):
|
||||
"""Restart the runtime with a new configuration."""
|
||||
# Stop current runtime
|
||||
await stop_runtime(force=True)
|
||||
|
||||
# Start new runtime
|
||||
response = await start_runtime(config, background_tasks)
|
||||
|
||||
return {
|
||||
"run_id": response.run_id,
|
||||
"status": "restarted",
|
||||
"gateway_port": response.gateway_port,
|
||||
"message": f"Runtime restarted with run_id: {response.run_id}",
|
||||
}
|
||||
|
||||
|
||||
@router.get("/current")
|
||||
async def get_current_runtime():
|
||||
"""Get information about the currently running runtime."""
|
||||
if not _is_gateway_running():
|
||||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||||
|
||||
context = _get_active_runtime_context()
|
||||
|
||||
return {
|
||||
"run_id": context.get("config_name"),
|
||||
"run_dir": context.get("run_dir"),
|
||||
"is_running": True,
|
||||
"gateway_port": _runtime_state.gateway_port,
|
||||
"bootstrap": context.get("bootstrap_values", {}),
|
||||
}
|
||||
|
||||
|
||||
def register_runtime_manager(manager: Any) -> None:
|
||||
"""Allow other modules to expose the runtime manager to the API."""
|
||||
global runtime_manager
|
||||
runtime_manager = manager
|
||||
# Also update the RuntimeState singleton for internal consistency
|
||||
_runtime_state.runtime_manager = manager
|
||||
|
||||
|
||||
def unregister_runtime_manager() -> None:
|
||||
"""Drop the runtime manager reference."""
|
||||
global runtime_manager
|
||||
runtime_manager = None
|
||||
# Also update the RuntimeState singleton for internal consistency
|
||||
_runtime_state.runtime_manager = None
|
||||
|
||||
|
||||
def _write_bootstrap_md(run_dir: Path, bootstrap: Dict[str, Any]) -> None:
|
||||
"""Write bootstrap configuration to BOOTSTRAP.md."""
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
yaml = None
|
||||
|
||||
bootstrap_path = run_dir / "BOOTSTRAP.md"
|
||||
bootstrap_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Filter out None values
|
||||
values = {k: v for k, v in bootstrap.items() if v is not None}
|
||||
|
||||
if yaml:
|
||||
front_matter = yaml.safe_dump(values, allow_unicode=True, sort_keys=False)
|
||||
else:
|
||||
front_matter = json.dumps(values, ensure_ascii=False, indent=2)
|
||||
|
||||
content = f"---\n{front_matter}---\n"
|
||||
bootstrap_path.write_text(content, encoding="utf-8")
|
||||
Reference in New Issue
Block a user