feat(agent): complete EvoAgent integration for all 6 agent roles
Migrate all agent roles from Legacy to EvoAgent architecture: - fundamentals_analyst, technical_analyst, sentiment_analyst, valuation_analyst - risk_manager, portfolio_manager Key changes: - EvoAgent now supports Portfolio Manager compatibility methods (_make_decision, get_decisions, get_portfolio_state, load_portfolio_state, update_portfolio) - Add UnifiedAgentFactory for centralized agent creation - ToolGuard with batch approval API and WebSocket broadcast - Legacy agents marked deprecated (AnalystAgent, RiskAgent, PMAgent) - Remove backend/agents/compat.py migration shim - Add run_id alongside workspace_id for semantic clarity - Complete integration test coverage (13 tests) - All smoke tests passing for 6 agent roles Constraint: Must maintain backward compatibility with existing run configs Constraint: Memory support must work with EvoAgent (no fallback to Legacy) Rejected: Separate PM implementation for EvoAgent | unified approach cleaner Confidence: high Scope-risk: broad Directive: EVO_AGENT_IDS env var still respected but defaults to all roles Not-tested: Kubernetes sandbox mode for skill execution
This commit is contained in:
@@ -219,6 +219,22 @@ class GatewayStatusResponse(BaseModel):
|
||||
is_running: bool
|
||||
port: int
|
||||
run_id: Optional[str] = None
|
||||
process_status: Optional[str] = None
|
||||
pid: Optional[int] = None
|
||||
|
||||
|
||||
class GatewayHealthResponse(BaseModel):
|
||||
status: str
|
||||
checks: Dict[str, Any]
|
||||
timestamp: str
|
||||
|
||||
|
||||
class RuntimeModeResponse(BaseModel):
|
||||
mode: str
|
||||
is_backtest: bool
|
||||
run_id: Optional[str] = None
|
||||
schedule_mode: Optional[str] = None
|
||||
is_running: bool
|
||||
|
||||
|
||||
class RuntimeConfigResponse(BaseModel):
|
||||
@@ -264,6 +280,49 @@ def _load_run_snapshot(run_id: str) -> Dict[str, Any]:
|
||||
return json.loads(snapshot_path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _load_run_server_state(run_dir: Path) -> Dict[str, Any]:
|
||||
"""Load persisted runtime server state if present."""
|
||||
server_state_path = run_dir / "state" / "server_state.json"
|
||||
if not server_state_path.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(server_state_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _extract_history_metrics(run_dir: Path) -> tuple[int, Optional[float]]:
|
||||
"""Prefer runtime state files over dashboard exports for history summaries."""
|
||||
server_state = _load_run_server_state(run_dir)
|
||||
portfolio = server_state.get("portfolio") or {}
|
||||
trades = server_state.get("trades")
|
||||
total_trades = len(trades) if isinstance(trades, list) else 0
|
||||
total_asset_value = None
|
||||
if portfolio.get("total_value") is not None:
|
||||
try:
|
||||
total_asset_value = float(portfolio.get("total_value"))
|
||||
except (TypeError, ValueError):
|
||||
total_asset_value = None
|
||||
|
||||
if total_trades or total_asset_value is not None:
|
||||
return total_trades, total_asset_value
|
||||
|
||||
summary_path = run_dir / "team_dashboard" / "summary.json"
|
||||
if not summary_path.exists():
|
||||
return 0, None
|
||||
try:
|
||||
summary = json.loads(summary_path.read_text(encoding="utf-8"))
|
||||
total_trades = int(summary.get("totalTrades") or 0)
|
||||
total_asset_value = (
|
||||
float(summary.get("totalAssetValue"))
|
||||
if summary.get("totalAssetValue") is not None
|
||||
else None
|
||||
)
|
||||
return total_trades, total_asset_value
|
||||
except Exception:
|
||||
return 0, None
|
||||
|
||||
|
||||
def _copy_path_if_exists(src: Path, dst: Path) -> None:
|
||||
if not src.exists():
|
||||
return
|
||||
@@ -281,7 +340,7 @@ def _restore_run_assets(source_run_id: str, target_run_dir: Path) -> None:
|
||||
raise HTTPException(status_code=404, detail=f"Source run not found: {source_run_id}")
|
||||
|
||||
for relative in [
|
||||
"team_dashboard",
|
||||
"team_dashboard/_internal_state.json",
|
||||
"agents",
|
||||
"skills",
|
||||
"memory",
|
||||
@@ -307,12 +366,10 @@ def _list_runs(limit: int = 50) -> list[RuntimeHistoryItem]:
|
||||
for run_dir in run_dirs[: max(1, int(limit))]:
|
||||
run_id = run_dir.name
|
||||
runtime_state_path = run_dir / "state" / "runtime_state.json"
|
||||
summary_path = run_dir / "team_dashboard" / "summary.json"
|
||||
|
||||
bootstrap: Dict[str, Any] = {}
|
||||
updated_at: Optional[str] = None
|
||||
total_trades = 0
|
||||
total_asset_value: Optional[float] = None
|
||||
total_trades, total_asset_value = _extract_history_metrics(run_dir)
|
||||
|
||||
if runtime_state_path.exists():
|
||||
try:
|
||||
@@ -323,15 +380,6 @@ def _list_runs(limit: int = 50) -> list[RuntimeHistoryItem]:
|
||||
except Exception:
|
||||
bootstrap = {}
|
||||
|
||||
if summary_path.exists():
|
||||
try:
|
||||
summary = json.loads(summary_path.read_text(encoding="utf-8"))
|
||||
total_trades = int(summary.get("totalTrades") or 0)
|
||||
total_asset_value = float(summary.get("totalAssetValue")) if summary.get("totalAssetValue") is not None else None
|
||||
except Exception:
|
||||
total_trades = 0
|
||||
total_asset_value = None
|
||||
|
||||
items.append(
|
||||
RuntimeHistoryItem(
|
||||
run_id=run_id,
|
||||
@@ -436,6 +484,14 @@ def _start_gateway_process(
|
||||
port: int
|
||||
) -> subprocess.Popen:
|
||||
"""Start Gateway as a separate process."""
|
||||
# Validate configuration before starting
|
||||
validation_errors = _validate_gateway_config(bootstrap)
|
||||
if validation_errors:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Gateway configuration validation failed: {'; '.join(validation_errors)}"
|
||||
)
|
||||
|
||||
# Prepare environment
|
||||
env = os.environ.copy()
|
||||
|
||||
@@ -467,6 +523,168 @@ def _start_gateway_process(
|
||||
return process
|
||||
|
||||
|
||||
def _validate_gateway_config(bootstrap: Dict[str, Any]) -> List[str]:
|
||||
"""Validate Gateway bootstrap configuration.
|
||||
|
||||
Returns a list of validation error messages. Empty list means valid.
|
||||
"""
|
||||
errors: List[str] = []
|
||||
|
||||
# Check required environment variables based on mode
|
||||
mode = bootstrap.get("mode", "live")
|
||||
is_backtest = mode == "backtest"
|
||||
|
||||
# Validate mode
|
||||
if mode not in ("live", "backtest"):
|
||||
errors.append(f"Invalid mode '{mode}': must be 'live' or 'backtest'")
|
||||
|
||||
# Check API keys based on mode
|
||||
if not is_backtest:
|
||||
# Live mode requires FINNHUB_API_KEY
|
||||
finnhub_key = os.getenv("FINNHUB_API_KEY")
|
||||
if not finnhub_key:
|
||||
errors.append("FINNHUB_API_KEY environment variable is required for live mode")
|
||||
|
||||
# Check LLM configuration
|
||||
model_name = os.getenv("MODEL_NAME")
|
||||
openai_key = os.getenv("OPENAI_API_KEY")
|
||||
if not model_name:
|
||||
errors.append("MODEL_NAME environment variable is not set")
|
||||
if not openai_key:
|
||||
errors.append("OPENAI_API_KEY environment variable is not set")
|
||||
|
||||
# Validate tickers
|
||||
tickers = bootstrap.get("tickers", [])
|
||||
if not tickers:
|
||||
errors.append("No tickers specified in configuration")
|
||||
elif not isinstance(tickers, list):
|
||||
errors.append("Tickers must be a list")
|
||||
|
||||
# Validate numeric values
|
||||
try:
|
||||
initial_cash = float(bootstrap.get("initial_cash", 0))
|
||||
if initial_cash <= 0:
|
||||
errors.append("initial_cash must be greater than 0")
|
||||
except (TypeError, ValueError):
|
||||
errors.append("initial_cash must be a valid number")
|
||||
|
||||
try:
|
||||
margin_requirement = float(bootstrap.get("margin_requirement", 0))
|
||||
if margin_requirement < 0 or margin_requirement > 1:
|
||||
errors.append("margin_requirement must be between 0 and 1")
|
||||
except (TypeError, ValueError):
|
||||
errors.append("margin_requirement must be a valid number")
|
||||
|
||||
# Validate backtest dates
|
||||
if is_backtest:
|
||||
start_date = bootstrap.get("start_date")
|
||||
end_date = bootstrap.get("end_date")
|
||||
if not start_date:
|
||||
errors.append("start_date is required for backtest mode")
|
||||
if not end_date:
|
||||
errors.append("end_date is required for backtest mode")
|
||||
if start_date and end_date:
|
||||
try:
|
||||
from datetime import datetime
|
||||
start = datetime.strptime(start_date, "%Y-%m-%d")
|
||||
end = datetime.strptime(end_date, "%Y-%m-%d")
|
||||
if start >= end:
|
||||
errors.append("start_date must be before end_date")
|
||||
except ValueError:
|
||||
errors.append("Dates must be in YYYY-MM-DD format")
|
||||
|
||||
# Validate schedule mode
|
||||
schedule_mode = bootstrap.get("schedule_mode", "daily")
|
||||
if schedule_mode not in ("daily", "intraday"):
|
||||
errors.append(f"Invalid schedule_mode '{schedule_mode}': must be 'daily' or 'intraday'")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def _get_gateway_process_details() -> Dict[str, Any]:
|
||||
"""Get detailed information about the Gateway process."""
|
||||
process = _runtime_state.gateway_process
|
||||
details = {
|
||||
"pid": None,
|
||||
"status": "not_running",
|
||||
"returncode": None,
|
||||
}
|
||||
|
||||
if process is None:
|
||||
return details
|
||||
|
||||
details["pid"] = process.pid
|
||||
returncode = process.poll()
|
||||
|
||||
if returncode is None:
|
||||
details["status"] = "running"
|
||||
details["returncode"] = None
|
||||
else:
|
||||
details["status"] = "exited"
|
||||
details["returncode"] = returncode
|
||||
|
||||
return details
|
||||
|
||||
|
||||
def _check_gateway_health() -> Dict[str, Any]:
|
||||
"""Perform comprehensive health checks on Gateway."""
|
||||
checks = {
|
||||
"process": {"status": "unknown", "details": {}},
|
||||
"port": {"status": "unknown", "details": {}},
|
||||
"configuration": {"status": "unknown", "details": {}},
|
||||
}
|
||||
|
||||
# Check process status
|
||||
process_details = _get_gateway_process_details()
|
||||
checks["process"]["details"] = process_details
|
||||
|
||||
if process_details["status"] == "running":
|
||||
checks["process"]["status"] = "healthy"
|
||||
elif process_details["status"] == "exited":
|
||||
checks["process"]["status"] = "unhealthy"
|
||||
checks["process"]["details"]["error"] = f"Process exited with code {process_details['returncode']}"
|
||||
else:
|
||||
checks["process"]["status"] = "unknown"
|
||||
|
||||
# Check port connectivity
|
||||
import socket
|
||||
port = _runtime_state.gateway_port
|
||||
try:
|
||||
with socket.create_connection(("127.0.0.1", port), timeout=2):
|
||||
checks["port"]["status"] = "healthy"
|
||||
checks["port"]["details"] = {"port": port, "accessible": True}
|
||||
except OSError as e:
|
||||
checks["port"]["status"] = "unhealthy"
|
||||
checks["port"]["details"] = {"port": port, "accessible": False, "error": str(e)}
|
||||
|
||||
# Check configuration
|
||||
try:
|
||||
if _runtime_state.runtime_manager is not None:
|
||||
checks["configuration"]["status"] = "healthy"
|
||||
checks["configuration"]["details"]["has_runtime_manager"] = True
|
||||
else:
|
||||
checks["configuration"]["status"] = "degraded"
|
||||
checks["configuration"]["details"]["has_runtime_manager"] = False
|
||||
except Exception as e:
|
||||
checks["configuration"]["status"] = "unknown"
|
||||
checks["configuration"]["details"]["error"] = str(e)
|
||||
|
||||
# Determine overall status
|
||||
statuses = [c["status"] for c in checks.values()]
|
||||
if any(s == "unhealthy" for s in statuses):
|
||||
overall_status = "unhealthy"
|
||||
elif all(s == "healthy" for s in statuses):
|
||||
overall_status = "healthy"
|
||||
else:
|
||||
overall_status = "degraded"
|
||||
|
||||
return {
|
||||
"status": overall_status,
|
||||
"checks": checks,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/context", response_model=RunContextResponse)
|
||||
async def get_run_context() -> RunContextResponse:
|
||||
"""Return active runtime context, or latest persisted context when stopped."""
|
||||
@@ -512,9 +730,10 @@ async def get_runtime_history(limit: int = 20) -> RuntimeHistoryResponse:
|
||||
|
||||
@router.get("/gateway/status", response_model=GatewayStatusResponse)
|
||||
async def get_gateway_status() -> GatewayStatusResponse:
|
||||
"""Get Gateway process status and port."""
|
||||
"""Get Gateway process status and port with detailed process information."""
|
||||
is_running = _is_gateway_running()
|
||||
run_id = None
|
||||
process_details = _get_gateway_process_details()
|
||||
|
||||
if is_running:
|
||||
try:
|
||||
@@ -525,10 +744,55 @@ async def get_gateway_status() -> GatewayStatusResponse:
|
||||
return GatewayStatusResponse(
|
||||
is_running=is_running,
|
||||
port=_runtime_state.gateway_port,
|
||||
run_id=run_id
|
||||
run_id=run_id,
|
||||
process_status=process_details["status"],
|
||||
pid=process_details["pid"],
|
||||
)
|
||||
|
||||
|
||||
@router.get("/gateway/health", response_model=GatewayHealthResponse)
|
||||
async def get_gateway_health() -> GatewayHealthResponse:
|
||||
"""Get comprehensive Gateway health check including process, port, and configuration status."""
|
||||
health = _check_gateway_health()
|
||||
return GatewayHealthResponse(**health)
|
||||
|
||||
|
||||
@router.get("/mode", response_model=RuntimeModeResponse)
|
||||
async def get_runtime_mode() -> RuntimeModeResponse:
|
||||
"""Get current runtime mode (live or backtest) and related configuration."""
|
||||
is_running = _is_gateway_running()
|
||||
|
||||
if not is_running:
|
||||
return RuntimeModeResponse(
|
||||
mode="stopped",
|
||||
is_backtest=False,
|
||||
run_id=None,
|
||||
schedule_mode=None,
|
||||
is_running=False,
|
||||
)
|
||||
|
||||
try:
|
||||
context = _get_active_runtime_context()
|
||||
bootstrap = context.get("bootstrap_values", {})
|
||||
mode = bootstrap.get("mode", "live")
|
||||
|
||||
return RuntimeModeResponse(
|
||||
mode=mode,
|
||||
is_backtest=mode == "backtest",
|
||||
run_id=context.get("config_name"),
|
||||
schedule_mode=bootstrap.get("schedule_mode"),
|
||||
is_running=True,
|
||||
)
|
||||
except HTTPException:
|
||||
return RuntimeModeResponse(
|
||||
mode="unknown",
|
||||
is_backtest=False,
|
||||
run_id=None,
|
||||
schedule_mode=None,
|
||||
is_running=False,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/gateway/port")
|
||||
async def get_gateway_port(request: Request) -> Dict[str, Any]:
|
||||
"""Get WebSocket Gateway port for frontend connection."""
|
||||
@@ -807,14 +1071,38 @@ async def start_runtime(
|
||||
_runtime_state.gateway_process = None
|
||||
log_path = _get_gateway_log_path_for_run(run_id)
|
||||
log_tail = _read_log_tail(log_path, max_chars=4000)
|
||||
|
||||
# Build detailed error message
|
||||
error_details = []
|
||||
error_details.append(f"Gateway process exited unexpectedly")
|
||||
|
||||
process_details = _get_gateway_process_details()
|
||||
if process_details.get("returncode") is not None:
|
||||
error_details.append(f"Exit code: {process_details['returncode']}")
|
||||
|
||||
if log_tail:
|
||||
error_details.append(f"Recent log output:\n{log_tail}")
|
||||
else:
|
||||
error_details.append("No log output available. Check environment configuration.")
|
||||
|
||||
# Check common configuration issues
|
||||
config_errors = _validate_gateway_config(bootstrap)
|
||||
if config_errors:
|
||||
error_details.append(f"Configuration issues detected: {'; '.join(config_errors)}")
|
||||
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Gateway failed to start: {log_tail or 'Unknown error'}"
|
||||
detail="\n".join(error_details)
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
_stop_gateway()
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start Gateway: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to start Gateway: {type(e).__name__}: {str(e)}"
|
||||
)
|
||||
|
||||
return LaunchResponse(
|
||||
run_id=run_id,
|
||||
@@ -861,17 +1149,38 @@ async def stop_runtime(force: bool = True) -> StopResponse:
|
||||
was_running = _is_gateway_running()
|
||||
|
||||
if not was_running:
|
||||
process_details = _get_gateway_process_details()
|
||||
if process_details["status"] == "exited":
|
||||
# Process exited but we have a record of it
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=(
|
||||
f"No runtime is currently running. "
|
||||
f"Previous Gateway process exited with code {process_details['returncode']}. "
|
||||
f"PID: {process_details['pid']}"
|
||||
)
|
||||
)
|
||||
raise HTTPException(status_code=404, detail="No runtime is currently running")
|
||||
|
||||
# Get process details before stopping for the response
|
||||
process_details = _get_gateway_process_details()
|
||||
pid_info = f" (PID: {process_details.get('pid')})" if process_details.get('pid') else ""
|
||||
|
||||
# Stop Gateway process
|
||||
_stop_gateway()
|
||||
stop_success = _stop_gateway()
|
||||
|
||||
if not stop_success:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to stop Gateway process{pid_info}. Process may have already terminated."
|
||||
)
|
||||
|
||||
# Unregister runtime manager
|
||||
unregister_runtime_manager()
|
||||
|
||||
return StopResponse(
|
||||
status="stopped",
|
||||
message="Runtime stopped successfully",
|
||||
message=f"Runtime stopped successfully{pid_info}",
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user