feat: add runtime dynamic team controls

This commit is contained in:
2026-04-03 13:48:31 +08:00
parent dc0b250adc
commit ecfbd87244
16 changed files with 2146 additions and 147 deletions

197
backend/core/apo.py Normal file
View File

@@ -0,0 +1,197 @@
# -*- coding: utf-8 -*-
"""
Autonomous Policy Optimizer (APO)
Automatically tunes agent policies based on performance feedback.
"""
import logging
import json
import os
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
from agentscope.message import Msg
from backend.llm.models import get_agent_model, get_agent_formatter
from backend.agents.workspace_manager import WorkspaceManager
logger = logging.getLogger(__name__)
class PolicyOptimizer:
"""
PolicyOptimizer analyzes trading performance and automatically updates
agent workspace files (POLICY.md, AGENTS.md) to improve future results.
"""
def __init__(self, config_name: str, project_root: Optional[Path] = None):
self.config_name = config_name
self.workspace_manager = WorkspaceManager(project_root=project_root)
# Use a high-capability model for the optimizer (meta-agent)
self.model = get_agent_model("portfolio_manager")
self.formatter = get_agent_formatter("portfolio_manager")
async def run_optimization(
self,
date: str,
reflection_content: str,
settlement_result: Dict[str, Any],
analyst_results: List[Dict[str, Any]],
decisions: Dict[str, Dict],
) -> Dict[str, Any]:
"""
Run the optimization loop if performance indicates a need for change.
"""
total_pnl = settlement_result.get("portfolio_value", 0) - 100000.0 # Assuming 100k initial
# You might want to use a more sophisticated trigger, like 3 consecutive losses
if total_pnl >= 0:
logger.info(f"APO: Positive P&L (${total_pnl:,.2f}) for {date}, skipping optimization.")
return {"status": "skipped", "reason": "positive_pnl"}
logger.info(f"APO: Negative P&L (${total_pnl:,.2f}) detected for {date}. Starting optimization...")
# 1. Identify underperforming agents or logic
# 2. Generate policy updates
# 3. Apply updates
optimizations = []
# Focus on agents that gave high confidence but wrong direction
underperformers = self._identify_underperformers(settlement_result, analyst_results)
for agent_id in underperformers:
update = await self._generate_policy_update(
agent_id,
date,
reflection_content,
settlement_result,
analyst_results,
decisions
)
if update:
self._apply_update(agent_id, update)
optimizations.append({
"agent_id": agent_id,
"file": update.get("file", "POLICY.md"),
"change": update.get("change", "")
})
return {
"status": "completed",
"date": date,
"total_pnl": total_pnl,
"optimizations": optimizations
}
def _identify_underperformers(
self,
settlement_result: Dict[str, Any],
analyst_results: List[Dict[str, Any]]
) -> List[str]:
"""Identify which agents might need policy adjustments."""
underperformers = []
# Simple logic: if the overall day was a loss, all active analysts might need a check,
# but specifically those whose predictions didn't match the market.
# For now, let's include all analysts involved in the day.
for result in analyst_results:
agent_id = result.get("agent")
if agent_id:
underperformers.append(agent_id)
# Also include PM and Risk Manager as they are critical
underperformers.append("portfolio_manager")
underperformers.append("risk_manager")
return list(set(underperformers))
async def _generate_policy_update(
self,
agent_id: str,
date: str,
reflection_content: str,
settlement_result: Dict[str, Any],
analyst_results: List[Dict[str, Any]],
decisions: Dict[str, Dict],
) -> Optional[Dict[str, str]]:
"""Use LLM to generate a specific policy update for an agent."""
# Load current policy
try:
current_policy = self.workspace_manager.load_agent_file(
config_name=self.config_name,
agent_id=agent_id,
filename="POLICY.md"
)
except Exception:
current_policy = "No existing policy found."
prompt = f"""
As an Expert Meta-Optimizer for a multi-agent trading system, your task is to update the operational POLICY for an agent named '{agent_id}' based on recent performance failures.
[Current Context]
Date: {date}
Daily Reflection:
{reflection_content}
[Agent's Current POLICY.md]
{current_policy}
[Task]
Analyze why the system failed (loss occurred). Identify what '{agent_id}' could have done differently or what new constraint/heuristic should be added to its policy to prevent similar mistakes in the future.
Provide a specific, concise addition or modification to the POLICY.md file.
The output MUST be a JSON object with:
1. "reasoning": Brief explanation of why this change is needed.
2. "file": Always "POLICY.md".
3. "change": The EXACT markdown text to APPEND or REPLACE in the file. Keep it in Chinese as the system uses Chinese prompts.
Output ONLY the JSON object.
"""
msg = Msg(name="system", content=prompt, role="user")
response = await self.model.reply(msg)
content = response.content
if isinstance(content, list):
content = content[0].get("text", "")
# Clean JSON if wrapped in markdown
if "```json" in content:
content = content.split("```json")[1].split("```")[0].strip()
try:
return json.loads(content)
except Exception as e:
logger.error(f"APO: Failed to parse optimization response for {agent_id}: {e}")
return None
def _apply_update(self, agent_id: str, update: Dict[str, str]) -> None:
"""Apply the suggested update to the agent's workspace."""
filename = update.get("file", "POLICY.md")
change = update.get("change", "")
if not change:
return
try:
current_content = self.workspace_manager.load_agent_file(
config_name=self.config_name,
agent_id=agent_id,
filename=filename
)
# Check if change is already there to avoid duplicates
if change.strip() in current_content:
logger.info(f"APO: Change already present in {agent_id}/{filename}")
return
new_content = current_content + "\n\n### APO Update (" + datetime.now().strftime("%Y-%m-%d") + ")\n" + change
self.workspace_manager.update_agent_file(
config_name=self.config_name,
agent_id=agent_id,
filename=filename,
content=new_content
)
logger.info(f"APO: Updated {agent_id}/{filename} with new heuristics.")
except Exception as e:
logger.error(f"APO: Failed to apply update to {agent_id}/{filename}: {e}")