Refine runtime data flow and UI layering
This commit is contained in:
@@ -8,6 +8,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
@@ -194,6 +195,12 @@ class StopResponse(BaseModel):
|
||||
message: str
|
||||
|
||||
|
||||
class CleanupResponse(BaseModel):
|
||||
status: str
|
||||
kept: int
|
||||
pruned_run_ids: List[str]
|
||||
|
||||
|
||||
class GatewayStatusResponse(BaseModel):
|
||||
is_running: bool
|
||||
port: int
|
||||
@@ -235,6 +242,38 @@ def _get_run_dir(run_id: str) -> Path:
|
||||
return PROJECT_ROOT / "runs" / run_id
|
||||
|
||||
|
||||
def _is_timestamped_run_dir(path: Path) -> bool:
|
||||
try:
|
||||
datetime.strptime(path.name, "%Y%m%d_%H%M%S")
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _prune_old_timestamped_runs(*, keep: int = 20, exclude_run_ids: Optional[set[str]] = None) -> list[str]:
|
||||
"""Prune old timestamped run directories, preserving the newest N and excluded ids."""
|
||||
exclude = exclude_run_ids or set()
|
||||
runs_root = PROJECT_ROOT / "runs"
|
||||
if not runs_root.exists():
|
||||
return []
|
||||
|
||||
candidates = sorted(
|
||||
[
|
||||
path
|
||||
for path in runs_root.iterdir()
|
||||
if path.is_dir() and _is_timestamped_run_dir(path) and path.name not in exclude
|
||||
],
|
||||
key=lambda path: path.name,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
pruned: list[str] = []
|
||||
for path in candidates[max(0, keep):]:
|
||||
shutil.rmtree(path, ignore_errors=True)
|
||||
pruned.append(path.name)
|
||||
return pruned
|
||||
|
||||
|
||||
def _find_available_port(start_port: int = 8765, max_port: int = 9000) -> int:
|
||||
"""Find an available port for Gateway."""
|
||||
import socket
|
||||
@@ -316,15 +355,9 @@ def _start_gateway_process(
|
||||
|
||||
@router.get("/context", response_model=RunContextResponse)
|
||||
async def get_run_context() -> RunContextResponse:
|
||||
"""Return the most recent run context."""
|
||||
snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json")
|
||||
snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
|
||||
if not snapshots:
|
||||
raise HTTPException(status_code=404, detail="No run context available")
|
||||
|
||||
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
|
||||
context = latest.get("context")
|
||||
"""Return active runtime context, or latest persisted context when stopped."""
|
||||
snapshot = _get_active_runtime_snapshot() if _is_gateway_running() else _load_latest_runtime_snapshot()
|
||||
context = snapshot.get("context")
|
||||
if context is None:
|
||||
raise HTTPException(status_code=404, detail="Run context is not ready")
|
||||
|
||||
@@ -337,15 +370,9 @@ async def get_run_context() -> RunContextResponse:
|
||||
|
||||
@router.get("/agents", response_model=RuntimeAgentsResponse)
|
||||
async def get_runtime_agents() -> RuntimeAgentsResponse:
|
||||
"""Return agent states from the most recent run."""
|
||||
snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json")
|
||||
snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
|
||||
if not snapshots:
|
||||
raise HTTPException(status_code=404, detail="No runtime state available")
|
||||
|
||||
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
|
||||
agents = latest.get("agents", [])
|
||||
"""Return agent states from the active runtime, or latest persisted run."""
|
||||
snapshot = _get_active_runtime_snapshot() if _is_gateway_running() else _load_latest_runtime_snapshot()
|
||||
agents = snapshot.get("agents", [])
|
||||
|
||||
return RuntimeAgentsResponse(
|
||||
agents=[RuntimeAgentState(**a) for a in agents]
|
||||
@@ -354,15 +381,9 @@ async def get_runtime_agents() -> RuntimeAgentsResponse:
|
||||
|
||||
@router.get("/events", response_model=RuntimeEventsResponse)
|
||||
async def get_runtime_events() -> RuntimeEventsResponse:
|
||||
"""Return events from the most recent run."""
|
||||
snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json")
|
||||
snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
|
||||
if not snapshots:
|
||||
raise HTTPException(status_code=404, detail="No runtime state available")
|
||||
|
||||
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
|
||||
events = latest.get("events", [])
|
||||
"""Return events from the active runtime, or latest persisted run."""
|
||||
snapshot = _get_active_runtime_snapshot() if _is_gateway_running() else _load_latest_runtime_snapshot()
|
||||
events = snapshot.get("events", [])
|
||||
|
||||
return RuntimeEventsResponse(
|
||||
events=[RuntimeEvent(**e) for e in events]
|
||||
@@ -376,15 +397,10 @@ async def get_gateway_status() -> GatewayStatusResponse:
|
||||
run_id = None
|
||||
|
||||
if is_running:
|
||||
# Try to find run_id from runtime state
|
||||
snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json")
|
||||
snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
if snapshots:
|
||||
try:
|
||||
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
|
||||
run_id = latest.get("context", {}).get("config_name")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse latest snapshot: {e}")
|
||||
try:
|
||||
run_id = _get_active_runtime_context().get("config_name")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to resolve active runtime context: {e}")
|
||||
|
||||
return GatewayStatusResponse(
|
||||
is_running=is_running,
|
||||
@@ -408,7 +424,7 @@ async def get_gateway_port(request: Request) -> Dict[str, Any]:
|
||||
async def get_runtime_logs() -> RuntimeLogResponse:
|
||||
"""Return current runtime log tail, or the latest run log if runtime is stopped."""
|
||||
try:
|
||||
context = _get_runtime_context_from_latest_snapshot()
|
||||
context = _get_active_runtime_context() if _is_gateway_running() else _get_runtime_context_from_latest_snapshot()
|
||||
except HTTPException:
|
||||
return RuntimeLogResponse(is_running=False, content="")
|
||||
|
||||
@@ -450,6 +466,21 @@ def _load_latest_runtime_snapshot() -> Dict[str, Any]:
|
||||
return json.loads(snapshots[0].read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _get_active_runtime_snapshot() -> Dict[str, Any]:
|
||||
"""Return the active runtime snapshot, preferring in-memory manager state."""
|
||||
if not _is_gateway_running():
|
||||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||||
|
||||
manager = _runtime_state.runtime_manager
|
||||
if manager is not None and hasattr(manager, "build_snapshot"):
|
||||
snapshot = manager.build_snapshot()
|
||||
context = snapshot.get("context") or {}
|
||||
if context.get("config_name"):
|
||||
return snapshot
|
||||
|
||||
return _load_latest_runtime_snapshot()
|
||||
|
||||
|
||||
def _get_runtime_context_from_latest_snapshot() -> Dict[str, Any]:
|
||||
"""Return the latest persisted runtime context regardless of active process state."""
|
||||
latest = _load_latest_runtime_snapshot()
|
||||
@@ -476,7 +507,16 @@ def _get_current_runtime_context() -> Dict[str, Any]:
|
||||
"""Return the active runtime context from the latest snapshot."""
|
||||
if not _is_gateway_running():
|
||||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||||
return _get_runtime_context_from_latest_snapshot()
|
||||
snapshot = _get_active_runtime_snapshot()
|
||||
context = snapshot.get("context") or {}
|
||||
if not context.get("config_name"):
|
||||
raise HTTPException(status_code=404, detail="No runtime context available")
|
||||
return context
|
||||
|
||||
|
||||
def _get_active_runtime_context() -> Dict[str, Any]:
|
||||
"""Return the active runtime context, preferring in-memory runtime manager state."""
|
||||
return _get_current_runtime_context()
|
||||
|
||||
|
||||
def _resolve_runtime_response(run_id: str) -> RuntimeConfigResponse:
|
||||
@@ -573,6 +613,14 @@ async def start_runtime(
|
||||
run_id = _generate_run_id()
|
||||
run_dir = _get_run_dir(run_id)
|
||||
|
||||
retention_keep = max(1, int(os.getenv("RUNS_RETENTION_COUNT", "20") or "20"))
|
||||
pruned_run_ids = _prune_old_timestamped_runs(
|
||||
keep=retention_keep,
|
||||
exclude_run_ids={run_id},
|
||||
)
|
||||
if pruned_run_ids:
|
||||
logger.info("Pruned old run directories: %s", ", ".join(pruned_run_ids))
|
||||
|
||||
# 3. Prepare bootstrap config
|
||||
bootstrap = {
|
||||
"tickers": config.tickers,
|
||||
@@ -690,6 +738,25 @@ async def stop_runtime(force: bool = True) -> StopResponse:
|
||||
)
|
||||
|
||||
|
||||
@router.post("/cleanup", response_model=CleanupResponse)
|
||||
async def cleanup_old_runs(keep: int = 20) -> CleanupResponse:
|
||||
"""Prune old timestamped run directories while preserving named runs."""
|
||||
keep_count = max(1, int(keep))
|
||||
exclude: set[str] = set()
|
||||
|
||||
if _is_gateway_running():
|
||||
try:
|
||||
active_context = _get_active_runtime_context()
|
||||
active_run_id = str(active_context.get("config_name") or "").strip()
|
||||
if active_run_id:
|
||||
exclude.add(active_run_id)
|
||||
except HTTPException:
|
||||
pass
|
||||
|
||||
pruned = _prune_old_timestamped_runs(keep=keep_count, exclude_run_ids=exclude)
|
||||
return CleanupResponse(status="ok", kept=keep_count, pruned_run_ids=pruned)
|
||||
|
||||
|
||||
@router.post("/restart")
|
||||
async def restart_runtime(
|
||||
config: LaunchConfig,
|
||||
@@ -716,15 +783,7 @@ async def get_current_runtime():
|
||||
if not _is_gateway_running():
|
||||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||||
|
||||
# Find latest runtime state
|
||||
snapshot_path = PROJECT_ROOT.glob("runs/*/state/runtime_state.json")
|
||||
snapshots = sorted(snapshot_path, key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
|
||||
if not snapshots:
|
||||
raise HTTPException(status_code=404, detail="No runtime information available")
|
||||
|
||||
latest = json.loads(snapshots[0].read_text(encoding="utf-8"))
|
||||
context = latest.get("context", {})
|
||||
context = _get_active_runtime_context()
|
||||
|
||||
return {
|
||||
"run_id": context.get("config_name"),
|
||||
|
||||
Reference in New Issue
Block a user