feat(agent): complete EvoAgent integration for all 6 agent roles
Migrate all agent roles from Legacy to EvoAgent architecture: - fundamentals_analyst, technical_analyst, sentiment_analyst, valuation_analyst - risk_manager, portfolio_manager Key changes: - EvoAgent now supports Portfolio Manager compatibility methods (_make_decision, get_decisions, get_portfolio_state, load_portfolio_state, update_portfolio) - Add UnifiedAgentFactory for centralized agent creation - ToolGuard with batch approval API and WebSocket broadcast - Legacy agents marked deprecated (AnalystAgent, RiskAgent, PMAgent) - Remove backend/agents/compat.py migration shim - Add run_id alongside workspace_id for semantic clarity - Complete integration test coverage (13 tests) - All smoke tests passing for 6 agent roles Constraint: Must maintain backward compatibility with existing run configs Constraint: Memory support must work with EvoAgent (no fallback to Legacy) Rejected: Separate PM implementation for EvoAgent | unified approach cleaner Confidence: high Scope-risk: broad Directive: EVO_AGENT_IDS env var still respected but defaults to all roles Not-tested: Kubernetes sandbox mode for skill execution
This commit is contained in:
@@ -2,7 +2,10 @@
|
||||
"""
|
||||
Agent API Routes
|
||||
|
||||
Provides REST API endpoints for agent management within workspaces.
|
||||
Provides REST API endpoints for both:
|
||||
|
||||
- design-time agent management under `workspaces/`
|
||||
- run-scoped agent asset access under `runs/<run_id>/`
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
@@ -24,6 +27,30 @@ from backend.llm.models import get_agent_model_info
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/workspaces/{workspace_id}/agents", tags=["agents"])
|
||||
DESIGN_SCOPE = "design_workspace"
|
||||
RUNTIME_SCOPE = "runtime_run"
|
||||
RUNTIME_SCOPE_NOTE = (
|
||||
"For profile, skills, and editable agent files, `workspace_id` is treated "
|
||||
"as the active run id under `runs/<run_id>/`, not as the design-time "
|
||||
"`workspaces/` registry."
|
||||
)
|
||||
|
||||
|
||||
def _runtime_scope_fields() -> dict[str, str]:
|
||||
return {
|
||||
"scope_type": RUNTIME_SCOPE,
|
||||
"scope_note": RUNTIME_SCOPE_NOTE,
|
||||
}
|
||||
|
||||
|
||||
def _design_scope_fields() -> dict[str, str]:
|
||||
return {
|
||||
"scope_type": DESIGN_SCOPE,
|
||||
"scope_note": (
|
||||
"For design-time CRUD routes on this surface, `workspace_id` refers "
|
||||
"to the persistent registry under `workspaces/`."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# Request/Response Models
|
||||
@@ -68,30 +95,40 @@ class AgentResponse(BaseModel):
|
||||
config_path: str
|
||||
agent_dir: str
|
||||
status: str = "inactive"
|
||||
scope_type: str = DESIGN_SCOPE
|
||||
scope_note: Optional[str] = None
|
||||
|
||||
|
||||
class AgentFileResponse(BaseModel):
|
||||
"""Agent file content response."""
|
||||
filename: str
|
||||
content: str
|
||||
scope_type: str = RUNTIME_SCOPE
|
||||
scope_note: Optional[str] = None
|
||||
|
||||
|
||||
class AgentProfileResponse(BaseModel):
|
||||
agent_id: str
|
||||
workspace_id: str
|
||||
profile: Dict[str, Any]
|
||||
scope_type: str = RUNTIME_SCOPE
|
||||
scope_note: Optional[str] = None
|
||||
|
||||
|
||||
class AgentSkillsResponse(BaseModel):
|
||||
agent_id: str
|
||||
workspace_id: str
|
||||
skills: List[Dict[str, Any]]
|
||||
scope_type: str = RUNTIME_SCOPE
|
||||
scope_note: Optional[str] = None
|
||||
|
||||
|
||||
class SkillDetailResponse(BaseModel):
|
||||
agent_id: str
|
||||
workspace_id: str
|
||||
skill: Dict[str, Any]
|
||||
scope_type: str = RUNTIME_SCOPE
|
||||
scope_note: Optional[str] = None
|
||||
|
||||
|
||||
# Dependencies
|
||||
@@ -101,7 +138,7 @@ def get_agent_factory():
|
||||
|
||||
|
||||
def get_workspace_manager():
|
||||
"""Get run-scoped workspace manager instance."""
|
||||
"""Get run-scoped asset manager for one runtime workspace/run id."""
|
||||
return RunWorkspaceManager()
|
||||
|
||||
|
||||
@@ -119,7 +156,7 @@ async def create_agent(
|
||||
registry = Depends(get_registry),
|
||||
):
|
||||
"""
|
||||
Create a new agent in a workspace.
|
||||
Create a new agent in a design-time workspace registry entry.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
@@ -162,6 +199,7 @@ async def create_agent(
|
||||
config_path=str(agent.config_path),
|
||||
agent_dir=str(agent.agent_dir),
|
||||
status="inactive",
|
||||
**_design_scope_fields(),
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
@@ -174,7 +212,7 @@ async def list_agents(
|
||||
factory: AgentFactory = Depends(get_agent_factory),
|
||||
):
|
||||
"""
|
||||
List all agents in a workspace.
|
||||
List all agents in a design-time workspace registry entry.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
@@ -192,6 +230,7 @@ async def list_agents(
|
||||
config_path=agent["config_path"],
|
||||
agent_dir=str(Path(agent["config_path"]).parent),
|
||||
status="inactive",
|
||||
**_design_scope_fields(),
|
||||
)
|
||||
for agent in agents_data
|
||||
]
|
||||
@@ -206,7 +245,7 @@ async def get_agent(
|
||||
registry = Depends(get_registry),
|
||||
):
|
||||
"""
|
||||
Get agent details.
|
||||
Get design-time agent details from the persistent workspace registry.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
@@ -227,6 +266,7 @@ async def get_agent(
|
||||
config_path=agent_info.config_path,
|
||||
agent_dir=agent_info.agent_dir,
|
||||
status=agent_info.status,
|
||||
**_design_scope_fields(),
|
||||
)
|
||||
|
||||
|
||||
@@ -275,6 +315,7 @@ async def get_agent_profile(
|
||||
"enabled_skills": agent_config.enabled_skills,
|
||||
"disabled_skills": agent_config.disabled_skills,
|
||||
},
|
||||
**_runtime_scope_fields(),
|
||||
)
|
||||
|
||||
|
||||
@@ -310,7 +351,12 @@ async def get_agent_skills(
|
||||
"status": status,
|
||||
})
|
||||
|
||||
return AgentSkillsResponse(agent_id=agent_id, workspace_id=workspace_id, skills=payload)
|
||||
return AgentSkillsResponse(
|
||||
agent_id=agent_id,
|
||||
workspace_id=workspace_id,
|
||||
skills=payload,
|
||||
**_runtime_scope_fields(),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{agent_id}/skills/{skill_name}", response_model=SkillDetailResponse)
|
||||
@@ -329,7 +375,12 @@ async def get_agent_skill_detail(
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(status_code=404, detail=f"Unknown skill: {skill_name}")
|
||||
|
||||
return SkillDetailResponse(agent_id=agent_id, workspace_id=workspace_id, skill=detail)
|
||||
return SkillDetailResponse(
|
||||
agent_id=agent_id,
|
||||
workspace_id=workspace_id,
|
||||
skill=detail,
|
||||
**_runtime_scope_fields(),
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{agent_id}")
|
||||
@@ -416,6 +467,7 @@ async def update_agent(
|
||||
config_path=agent_info.config_path,
|
||||
agent_dir=agent_info.agent_dir,
|
||||
status=agent_info.status,
|
||||
**_design_scope_fields(),
|
||||
)
|
||||
|
||||
|
||||
@@ -656,7 +708,7 @@ async def get_agent_file(
|
||||
workspace_manager: RunWorkspaceManager = Depends(get_workspace_manager),
|
||||
):
|
||||
"""
|
||||
Read an agent's workspace file.
|
||||
Read an agent file from the run-scoped asset tree under `runs/<run_id>/`.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
@@ -672,7 +724,11 @@ async def get_agent_file(
|
||||
agent_id=agent_id,
|
||||
filename=filename,
|
||||
)
|
||||
return AgentFileResponse(filename=filename, content=content)
|
||||
return AgentFileResponse(
|
||||
filename=filename,
|
||||
content=content,
|
||||
**_runtime_scope_fields(),
|
||||
)
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(status_code=404, detail=f"File '{filename}' not found")
|
||||
|
||||
@@ -686,7 +742,7 @@ async def update_agent_file(
|
||||
workspace_manager: RunWorkspaceManager = Depends(get_workspace_manager),
|
||||
):
|
||||
"""
|
||||
Update an agent's workspace file.
|
||||
Update an agent file in the run-scoped asset tree under `runs/<run_id>/`.
|
||||
|
||||
Args:
|
||||
workspace_id: Workspace identifier
|
||||
@@ -704,6 +760,10 @@ async def update_agent_file(
|
||||
filename=filename,
|
||||
content=content,
|
||||
)
|
||||
return AgentFileResponse(filename=filename, content=content)
|
||||
return AgentFileResponse(
|
||||
filename=filename,
|
||||
content=content,
|
||||
**_runtime_scope_fields(),
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@@ -7,7 +7,7 @@ Provides REST API endpoints for tool guard operations.
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
from datetime import datetime
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -29,7 +29,7 @@ class ToolCallRequest(BaseModel):
|
||||
tool_name: str = Field(..., description="Name of the tool")
|
||||
tool_input: Dict[str, Any] = Field(default_factory=dict, description="Tool parameters")
|
||||
agent_id: str = Field(..., description="Agent making the request")
|
||||
workspace_id: str = Field(..., description="Workspace context")
|
||||
workspace_id: str = Field(..., description="Run context; historical field name retained for compatibility")
|
||||
session_id: Optional[str] = Field(None, description="Session identifier")
|
||||
|
||||
|
||||
@@ -46,6 +46,21 @@ class DenyRequest(BaseModel):
|
||||
reason: Optional[str] = Field(None, description="Reason for denial")
|
||||
|
||||
|
||||
class BatchApprovalRequest(BaseModel):
|
||||
"""Request to approve multiple tool calls."""
|
||||
approval_ids: List[str] = Field(..., description="List of approval request IDs")
|
||||
one_time: bool = Field(True, description="Whether these are one-time approvals")
|
||||
|
||||
|
||||
class BatchApprovalResponse(BaseModel):
|
||||
"""Response for batch approval operation."""
|
||||
approved: List[ApprovalResponse] = Field(default_factory=list, description="Successfully approved")
|
||||
failed: List[Dict[str, Any]] = Field(default_factory=list, description="Failed approvals with errors")
|
||||
total_requested: int
|
||||
total_approved: int
|
||||
total_failed: int
|
||||
|
||||
|
||||
class ToolFinding(BaseModel):
|
||||
"""Tool guard finding."""
|
||||
severity: SeverityLevel
|
||||
@@ -61,11 +76,17 @@ class ApprovalResponse(BaseModel):
|
||||
tool_input: Dict[str, Any]
|
||||
agent_id: str
|
||||
workspace_id: str
|
||||
run_id: str
|
||||
session_id: Optional[str] = None
|
||||
findings: List[ToolFinding] = Field(default_factory=list)
|
||||
created_at: str
|
||||
resolved_at: Optional[str] = None
|
||||
resolved_by: Optional[str] = None
|
||||
scope_type: str = "runtime_run"
|
||||
scope_note: str = (
|
||||
"Approvals are scoped to the active runtime run. `workspace_id` is "
|
||||
"retained as a compatibility field name; prefer `run_id` for display."
|
||||
)
|
||||
|
||||
|
||||
class PendingApprovalsResponse(BaseModel):
|
||||
@@ -91,6 +112,7 @@ def _to_response(record: ApprovalRecord) -> ApprovalResponse:
|
||||
tool_input=record.tool_input,
|
||||
agent_id=record.agent_id,
|
||||
workspace_id=record.workspace_id,
|
||||
run_id=record.workspace_id,
|
||||
session_id=record.session_id,
|
||||
findings=[ToolFinding(**f.to_dict()) for f in record.findings],
|
||||
created_at=record.created_at.isoformat(),
|
||||
@@ -124,7 +146,7 @@ async def check_tool_call(
|
||||
|
||||
if request.tool_name in SAFE_TOOLS:
|
||||
record.status = ApprovalStatus.APPROVED
|
||||
record.resolved_at = datetime.utcnow()
|
||||
record.resolved_at = datetime.now(UTC)
|
||||
record.resolved_by = "system"
|
||||
STORE.set_status(
|
||||
record.approval_id,
|
||||
@@ -156,9 +178,12 @@ async def approve_tool_call(
|
||||
if record.status != ApprovalStatus.PENDING:
|
||||
raise HTTPException(status_code=400, detail=f"Approval already {record.status}")
|
||||
|
||||
record.status = ApprovalStatus.APPROVED
|
||||
record.resolved_at = datetime.utcnow()
|
||||
record.resolved_by = "user"
|
||||
record = STORE.set_status(
|
||||
request.approval_id,
|
||||
ApprovalStatus.APPROVED,
|
||||
resolved_by="user",
|
||||
notify_request=True,
|
||||
)
|
||||
|
||||
return _to_response(record)
|
||||
|
||||
@@ -183,9 +208,12 @@ async def deny_tool_call(
|
||||
if record.status != ApprovalStatus.PENDING:
|
||||
raise HTTPException(status_code=400, detail=f"Approval already {record.status}")
|
||||
|
||||
record.status = ApprovalStatus.DENIED
|
||||
record.resolved_at = datetime.utcnow()
|
||||
record.resolved_by = "user"
|
||||
record = STORE.set_status(
|
||||
request.approval_id,
|
||||
ApprovalStatus.DENIED,
|
||||
resolved_by="user",
|
||||
notify_request=True,
|
||||
)
|
||||
record.metadata["denial_reason"] = request.reason
|
||||
|
||||
return _to_response(record)
|
||||
@@ -200,7 +228,7 @@ async def list_pending_approvals(
|
||||
List pending tool approval requests.
|
||||
|
||||
Args:
|
||||
workspace_id: Filter by workspace
|
||||
workspace_id: Filter by run id (historical query parameter name retained)
|
||||
agent_id: Filter by agent
|
||||
|
||||
Returns:
|
||||
@@ -255,3 +283,58 @@ async def cancel_approval(
|
||||
|
||||
STORE.cancel(approval_id)
|
||||
return _to_response(record)
|
||||
|
||||
|
||||
@router.post("/approve/batch", response_model=BatchApprovalResponse)
|
||||
async def batch_approve_tool_calls(
|
||||
request: BatchApprovalRequest,
|
||||
):
|
||||
"""
|
||||
Approve multiple pending tool calls in a single request.
|
||||
|
||||
Args:
|
||||
request: Batch approval parameters with list of approval IDs
|
||||
|
||||
Returns:
|
||||
Batch approval results with successful and failed approvals
|
||||
"""
|
||||
approved: List[ApprovalResponse] = []
|
||||
failed: List[Dict[str, Any]] = []
|
||||
|
||||
for approval_id in request.approval_ids:
|
||||
record = STORE.get(approval_id)
|
||||
if not record:
|
||||
failed.append({
|
||||
"approval_id": approval_id,
|
||||
"error": "Approval request not found",
|
||||
})
|
||||
continue
|
||||
|
||||
if record.status != ApprovalStatus.PENDING:
|
||||
failed.append({
|
||||
"approval_id": approval_id,
|
||||
"error": f"Approval already {record.status}",
|
||||
})
|
||||
continue
|
||||
|
||||
try:
|
||||
record = STORE.set_status(
|
||||
approval_id,
|
||||
ApprovalStatus.APPROVED,
|
||||
resolved_by="user",
|
||||
notify_request=True,
|
||||
)
|
||||
approved.append(_to_response(record))
|
||||
except Exception as e:
|
||||
failed.append({
|
||||
"approval_id": approval_id,
|
||||
"error": str(e),
|
||||
})
|
||||
|
||||
return BatchApprovalResponse(
|
||||
approved=approved,
|
||||
failed=failed,
|
||||
total_requested=len(request.approval_ids),
|
||||
total_approved=len(approved),
|
||||
total_failed=len(failed),
|
||||
)
|
||||
|
||||
@@ -219,6 +219,22 @@ class GatewayStatusResponse(BaseModel):
|
||||
is_running: bool
|
||||
port: int
|
||||
run_id: Optional[str] = None
|
||||
process_status: Optional[str] = None
|
||||
pid: Optional[int] = None
|
||||
|
||||
|
||||
class GatewayHealthResponse(BaseModel):
|
||||
status: str
|
||||
checks: Dict[str, Any]
|
||||
timestamp: str
|
||||
|
||||
|
||||
class RuntimeModeResponse(BaseModel):
|
||||
mode: str
|
||||
is_backtest: bool
|
||||
run_id: Optional[str] = None
|
||||
schedule_mode: Optional[str] = None
|
||||
is_running: bool
|
||||
|
||||
|
||||
class RuntimeConfigResponse(BaseModel):
|
||||
@@ -264,6 +280,49 @@ def _load_run_snapshot(run_id: str) -> Dict[str, Any]:
|
||||
return json.loads(snapshot_path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _load_run_server_state(run_dir: Path) -> Dict[str, Any]:
|
||||
"""Load persisted runtime server state if present."""
|
||||
server_state_path = run_dir / "state" / "server_state.json"
|
||||
if not server_state_path.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(server_state_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _extract_history_metrics(run_dir: Path) -> tuple[int, Optional[float]]:
|
||||
"""Prefer runtime state files over dashboard exports for history summaries."""
|
||||
server_state = _load_run_server_state(run_dir)
|
||||
portfolio = server_state.get("portfolio") or {}
|
||||
trades = server_state.get("trades")
|
||||
total_trades = len(trades) if isinstance(trades, list) else 0
|
||||
total_asset_value = None
|
||||
if portfolio.get("total_value") is not None:
|
||||
try:
|
||||
total_asset_value = float(portfolio.get("total_value"))
|
||||
except (TypeError, ValueError):
|
||||
total_asset_value = None
|
||||
|
||||
if total_trades or total_asset_value is not None:
|
||||
return total_trades, total_asset_value
|
||||
|
||||
summary_path = run_dir / "team_dashboard" / "summary.json"
|
||||
if not summary_path.exists():
|
||||
return 0, None
|
||||
try:
|
||||
summary = json.loads(summary_path.read_text(encoding="utf-8"))
|
||||
total_trades = int(summary.get("totalTrades") or 0)
|
||||
total_asset_value = (
|
||||
float(summary.get("totalAssetValue"))
|
||||
if summary.get("totalAssetValue") is not None
|
||||
else None
|
||||
)
|
||||
return total_trades, total_asset_value
|
||||
except Exception:
|
||||
return 0, None
|
||||
|
||||
|
||||
def _copy_path_if_exists(src: Path, dst: Path) -> None:
|
||||
if not src.exists():
|
||||
return
|
||||
@@ -281,7 +340,7 @@ def _restore_run_assets(source_run_id: str, target_run_dir: Path) -> None:
|
||||
raise HTTPException(status_code=404, detail=f"Source run not found: {source_run_id}")
|
||||
|
||||
for relative in [
|
||||
"team_dashboard",
|
||||
"team_dashboard/_internal_state.json",
|
||||
"agents",
|
||||
"skills",
|
||||
"memory",
|
||||
@@ -307,12 +366,10 @@ def _list_runs(limit: int = 50) -> list[RuntimeHistoryItem]:
|
||||
for run_dir in run_dirs[: max(1, int(limit))]:
|
||||
run_id = run_dir.name
|
||||
runtime_state_path = run_dir / "state" / "runtime_state.json"
|
||||
summary_path = run_dir / "team_dashboard" / "summary.json"
|
||||
|
||||
bootstrap: Dict[str, Any] = {}
|
||||
updated_at: Optional[str] = None
|
||||
total_trades = 0
|
||||
total_asset_value: Optional[float] = None
|
||||
total_trades, total_asset_value = _extract_history_metrics(run_dir)
|
||||
|
||||
if runtime_state_path.exists():
|
||||
try:
|
||||
@@ -323,15 +380,6 @@ def _list_runs(limit: int = 50) -> list[RuntimeHistoryItem]:
|
||||
except Exception:
|
||||
bootstrap = {}
|
||||
|
||||
if summary_path.exists():
|
||||
try:
|
||||
summary = json.loads(summary_path.read_text(encoding="utf-8"))
|
||||
total_trades = int(summary.get("totalTrades") or 0)
|
||||
total_asset_value = float(summary.get("totalAssetValue")) if summary.get("totalAssetValue") is not None else None
|
||||
except Exception:
|
||||
total_trades = 0
|
||||
total_asset_value = None
|
||||
|
||||
items.append(
|
||||
RuntimeHistoryItem(
|
||||
run_id=run_id,
|
||||
@@ -436,6 +484,14 @@ def _start_gateway_process(
|
||||
port: int
|
||||
) -> subprocess.Popen:
|
||||
"""Start Gateway as a separate process."""
|
||||
# Validate configuration before starting
|
||||
validation_errors = _validate_gateway_config(bootstrap)
|
||||
if validation_errors:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Gateway configuration validation failed: {'; '.join(validation_errors)}"
|
||||
)
|
||||
|
||||
# Prepare environment
|
||||
env = os.environ.copy()
|
||||
|
||||
@@ -467,6 +523,168 @@ def _start_gateway_process(
|
||||
return process
|
||||
|
||||
|
||||
def _validate_gateway_config(bootstrap: Dict[str, Any]) -> List[str]:
|
||||
"""Validate Gateway bootstrap configuration.
|
||||
|
||||
Returns a list of validation error messages. Empty list means valid.
|
||||
"""
|
||||
errors: List[str] = []
|
||||
|
||||
# Check required environment variables based on mode
|
||||
mode = bootstrap.get("mode", "live")
|
||||
is_backtest = mode == "backtest"
|
||||
|
||||
# Validate mode
|
||||
if mode not in ("live", "backtest"):
|
||||
errors.append(f"Invalid mode '{mode}': must be 'live' or 'backtest'")
|
||||
|
||||
# Check API keys based on mode
|
||||
if not is_backtest:
|
||||
# Live mode requires FINNHUB_API_KEY
|
||||
finnhub_key = os.getenv("FINNHUB_API_KEY")
|
||||
if not finnhub_key:
|
||||
errors.append("FINNHUB_API_KEY environment variable is required for live mode")
|
||||
|
||||
# Check LLM configuration
|
||||
model_name = os.getenv("MODEL_NAME")
|
||||
openai_key = os.getenv("OPENAI_API_KEY")
|
||||
if not model_name:
|
||||
errors.append("MODEL_NAME environment variable is not set")
|
||||
if not openai_key:
|
||||
errors.append("OPENAI_API_KEY environment variable is not set")
|
||||
|
||||
# Validate tickers
|
||||
tickers = bootstrap.get("tickers", [])
|
||||
if not tickers:
|
||||
errors.append("No tickers specified in configuration")
|
||||
elif not isinstance(tickers, list):
|
||||
errors.append("Tickers must be a list")
|
||||
|
||||
# Validate numeric values
|
||||
try:
|
||||
initial_cash = float(bootstrap.get("initial_cash", 0))
|
||||
if initial_cash <= 0:
|
||||
errors.append("initial_cash must be greater than 0")
|
||||
except (TypeError, ValueError):
|
||||
errors.append("initial_cash must be a valid number")
|
||||
|
||||
try:
|
||||
margin_requirement = float(bootstrap.get("margin_requirement", 0))
|
||||
if margin_requirement < 0 or margin_requirement > 1:
|
||||
errors.append("margin_requirement must be between 0 and 1")
|
||||
except (TypeError, ValueError):
|
||||
errors.append("margin_requirement must be a valid number")
|
||||
|
||||
# Validate backtest dates
|
||||
if is_backtest:
|
||||
start_date = bootstrap.get("start_date")
|
||||
end_date = bootstrap.get("end_date")
|
||||
if not start_date:
|
||||
errors.append("start_date is required for backtest mode")
|
||||
if not end_date:
|
||||
errors.append("end_date is required for backtest mode")
|
||||
if start_date and end_date:
|
||||
try:
|
||||
from datetime import datetime
|
||||
start = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
if start >= end:
|
||||
errors.append("start_date must be before end_date")
|
||||
except ValueError:
|
||||
errors.append("Dates must be in YYYY-MM-DD format")
|
||||
|
||||
# Validate schedule mode
|
||||
schedule_mode = bootstrap.get("schedule_mode", "daily")
|
||||
if schedule_mode not in ("daily", "intraday"):
|
||||
errors.append(f"Invalid schedule_mode '{schedule_mode}': must be 'daily' or 'intraday'")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def _get_gateway_process_details() -> Dict[str, Any]:
|
||||
"""Get detailed information about the Gateway process."""
|
||||
process = _runtime_state.gateway_process
|
||||
details = {
|
||||
"pid": None,
|
||||
"status": "not_running",
|
||||
"returncode": None,
|
||||
}
|
||||
|
||||
if process is None:
|
||||
return details
|
||||
|
||||
details["pid"] = process.pid
|
||||
returncode = process.poll()
|
||||
|
||||
if returncode is None:
|
||||
details["status"] = "running"
|
||||
details["returncode"] = None
|
||||
else:
|
||||
details["status"] = "exited"
|
||||
details["returncode"] = returncode
|
||||
|
||||
return details
|
||||
|
||||
|
||||
def _check_gateway_health() -> Dict[str, Any]:
|
||||
"""Perform comprehensive health checks on Gateway."""
|
||||
checks = {
|
||||
"process": {"status": "unknown", "details": {}},
|
||||
"port": {"status": "unknown", "details": {}},
|
||||
"configuration": {"status": "unknown", "details": {}},
|
||||
}
|
||||
|
||||
# Check process status
|
||||
process_details = _get_gateway_process_details()
|
||||
checks["process"]["details"] = process_details
|
||||
|
||||
if process_details["status"] == "running":
|
||||
checks["process"]["status"] = "healthy"
|
||||
elif process_details["status"] == "exited":
|
||||
checks["process"]["status"] = "unhealthy"
|
||||
checks["process"]["details"]["error"] = f"Process exited with code {process_details['returncode']}"
|
||||
else:
|
||||
checks["process"]["status"] = "unknown"
|
||||
|
||||
# Check port connectivity
|
||||
import socket
|
||||
port = _runtime_state.gateway_port
|
||||
try:
|
||||
with socket.create_connection(("127.0.0.1", port), timeout=2):
|
||||
checks["port"]["status"] = "healthy"
|
||||
checks["port"]["details"] = {"port": port, "accessible": True}
|
||||
except OSError as e:
|
||||
checks["port"]["status"] = "unhealthy"
|
||||
checks["port"]["details"] = {"port": port, "accessible": False, "error": str(e)}
|
||||
|
||||
# Check configuration
|
||||
try:
|
||||
if _runtime_state.runtime_manager is not None:
|
||||
checks["configuration"]["status"] = "healthy"
|
||||
checks["configuration"]["details"]["has_runtime_manager"] = True
|
||||
else:
|
||||
checks["configuration"]["status"] = "degraded"
|
||||
checks["configuration"]["details"]["has_runtime_manager"] = False
|
||||
except Exception as e:
|
||||
checks["configuration"]["status"] = "unknown"
|
||||
checks["configuration"]["details"]["error"] = str(e)
|
||||
|
||||
# Determine overall status
|
||||
statuses = [c["status"] for c in checks.values()]
|
||||
if any(s == "unhealthy" for s in statuses):
|
||||
overall_status = "unhealthy"
|
||||
elif all(s == "healthy" for s in statuses):
|
||||
overall_status = "healthy"
|
||||
else:
|
||||
overall_status = "degraded"
|
||||
|
||||
return {
|
||||
"status": overall_status,
|
||||
"checks": checks,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/context", response_model=RunContextResponse)
|
||||
async def get_run_context() -> RunContextResponse:
|
||||
"""Return active runtime context, or latest persisted context when stopped."""
|
||||
@@ -512,9 +730,10 @@ async def get_runtime_history(limit: int = 20) -> RuntimeHistoryResponse:
|
||||
|
||||
@router.get("/gateway/status", response_model=GatewayStatusResponse)
|
||||
async def get_gateway_status() -> GatewayStatusResponse:
|
||||
"""Get Gateway process status and port."""
|
||||
"""Get Gateway process status and port with detailed process information."""
|
||||
is_running = _is_gateway_running()
|
||||
run_id = None
|
||||
process_details = _get_gateway_process_details()
|
||||
|
||||
if is_running:
|
||||
try:
|
||||
@@ -525,10 +744,55 @@ async def get_gateway_status() -> GatewayStatusResponse:
|
||||
return GatewayStatusResponse(
|
||||
is_running=is_running,
|
||||
port=_runtime_state.gateway_port,
|
||||
run_id=run_id
|
||||
run_id=run_id,
|
||||
process_status=process_details["status"],
|
||||
pid=process_details["pid"],
|
||||
)
|
||||
|
||||
|
||||
@router.get("/gateway/health", response_model=GatewayHealthResponse)
|
||||
async def get_gateway_health() -> GatewayHealthResponse:
|
||||
"""Get comprehensive Gateway health check including process, port, and configuration status."""
|
||||
health = _check_gateway_health()
|
||||
return GatewayHealthResponse(**health)
|
||||
|
||||
|
||||
@router.get("/mode", response_model=RuntimeModeResponse)
|
||||
async def get_runtime_mode() -> RuntimeModeResponse:
|
||||
"""Get current runtime mode (live or backtest) and related configuration."""
|
||||
is_running = _is_gateway_running()
|
||||
|
||||
if not is_running:
|
||||
return RuntimeModeResponse(
|
||||
mode="stopped",
|
||||
is_backtest=False,
|
||||
run_id=None,
|
||||
schedule_mode=None,
|
||||
is_running=False,
|
||||
)
|
||||
|
||||
try:
|
||||
context = _get_active_runtime_context()
|
||||
bootstrap = context.get("bootstrap_values", {})
|
||||
mode = bootstrap.get("mode", "live")
|
||||
|
||||
return RuntimeModeResponse(
|
||||
mode=mode,
|
||||
is_backtest=mode == "backtest",
|
||||
run_id=context.get("config_name"),
|
||||
schedule_mode=bootstrap.get("schedule_mode"),
|
||||
is_running=True,
|
||||
)
|
||||
except HTTPException:
|
||||
return RuntimeModeResponse(
|
||||
mode="unknown",
|
||||
is_backtest=False,
|
||||
run_id=None,
|
||||
schedule_mode=None,
|
||||
is_running=False,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/gateway/port")
|
||||
async def get_gateway_port(request: Request) -> Dict[str, Any]:
|
||||
"""Get WebSocket Gateway port for frontend connection."""
|
||||
@@ -807,14 +1071,38 @@ async def start_runtime(
|
||||
_runtime_state.gateway_process = None
|
||||
log_path = _get_gateway_log_path_for_run(run_id)
|
||||
log_tail = _read_log_tail(log_path, max_chars=4000)
|
||||
|
||||
# Build detailed error message
|
||||
error_details = []
|
||||
error_details.append(f"Gateway process exited unexpectedly")
|
||||
|
||||
process_details = _get_gateway_process_details()
|
||||
if process_details.get("returncode") is not None:
|
||||
error_details.append(f"Exit code: {process_details['returncode']}")
|
||||
|
||||
if log_tail:
|
||||
error_details.append(f"Recent log output:\n{log_tail}")
|
||||
else:
|
||||
error_details.append("No log output available. Check environment configuration.")
|
||||
|
||||
# Check common configuration issues
|
||||
config_errors = _validate_gateway_config(bootstrap)
|
||||
if config_errors:
|
||||
error_details.append(f"Configuration issues detected: {'; '.join(config_errors)}")
|
||||
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Gateway failed to start: {log_tail or 'Unknown error'}"
|
||||
detail="\n".join(error_details)
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
_stop_gateway()
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start Gateway: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to start Gateway: {type(e).__name__}: {str(e)}"
|
||||
)
|
||||
|
||||
return LaunchResponse(
|
||||
run_id=run_id,
|
||||
@@ -861,17 +1149,38 @@ async def stop_runtime(force: bool = True) -> StopResponse:
|
||||
was_running = _is_gateway_running()
|
||||
|
||||
if not was_running:
|
||||
process_details = _get_gateway_process_details()
|
||||
if process_details["status"] == "exited":
|
||||
# Process exited but we have a record of it
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=(
|
||||
f"No runtime is currently running. "
|
||||
f"Previous Gateway process exited with code {process_details['returncode']}. "
|
||||
f"PID: {process_details['pid']}"
|
||||
)
|
||||
)
|
||||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||||
|
||||
# Get process details before stopping for the response
|
||||
process_details = _get_gateway_process_details()
|
||||
pid_info = f" (PID: {process_details.get('pid')})" if process_details.get('pid') else ""
|
||||
|
||||
# Stop Gateway process
|
||||
_stop_gateway()
|
||||
stop_success = _stop_gateway()
|
||||
|
||||
if not stop_success:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to stop Gateway process{pid_info}. Process may have already terminated."
|
||||
)
|
||||
|
||||
# Unregister runtime manager
|
||||
unregister_runtime_manager()
|
||||
|
||||
return StopResponse(
|
||||
status="stopped",
|
||||
message="Runtime stopped successfully",
|
||||
message=f"Runtime stopped successfully{pid_info}",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Workspace API Routes
|
||||
Workspace API Routes.
|
||||
|
||||
Provides REST API endpoints for workspace management.
|
||||
These routes manage the design-time `workspaces/` registry, not the run-scoped
|
||||
runtime data under `runs/<run_id>/`.
|
||||
"""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
@@ -31,7 +32,7 @@ class UpdateWorkspaceRequest(BaseModel):
|
||||
|
||||
|
||||
class WorkspaceResponse(BaseModel):
|
||||
"""Workspace information response."""
|
||||
"""Design-time workspace information response."""
|
||||
workspace_id: str
|
||||
name: str
|
||||
description: str
|
||||
@@ -89,10 +90,10 @@ async def list_workspaces(
|
||||
manager: WorkspaceManager = Depends(get_workspace_manager),
|
||||
):
|
||||
"""
|
||||
List all workspaces.
|
||||
List all design-time workspaces.
|
||||
|
||||
Returns:
|
||||
List of workspaces
|
||||
List of design-time workspaces
|
||||
"""
|
||||
workspaces = manager.list_workspaces()
|
||||
return WorkspaceListResponse(
|
||||
|
||||
Reference in New Issue
Block a user