feat: add runtime dynamic team controls

2026-04-03 13:48:31 +08:00
parent dc0b250adc
commit ecfbd87244
16 changed files with 2146 additions and 147 deletions
--- a/backend/core/apo.py
+++ b/backend/core/apo.py
@@ -0,0 +1,197 @@
+# -*- coding: utf-8 -*-
+"""
+Autonomous Policy Optimizer (APO)
+Automatically tunes agent policies based on performance feedback.
+"""
+
+import logging
+import json
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from agentscope.message import Msg
+from backend.llm.models import get_agent_model, get_agent_formatter
+from backend.agents.workspace_manager import WorkspaceManager
+
+logger = logging.getLogger(__name__)
+
+class PolicyOptimizer:
+    """
+    PolicyOptimizer analyzes trading performance and automatically updates
+    agent workspace files (POLICY.md, AGENTS.md) to improve future results.
+    """
+
+    def __init__(self, config_name: str, project_root: Optional[Path] = None):
+        self.config_name = config_name
+        self.workspace_manager = WorkspaceManager(project_root=project_root)
+        # Use a high-capability model for the optimizer (meta-agent)
+        self.model = get_agent_model("portfolio_manager") 
+        self.formatter = get_agent_formatter("portfolio_manager")
+
+    async def run_optimization(
+        self,
+        date: str,
+        reflection_content: str,
+        settlement_result: Dict[str, Any],
+        analyst_results: List[Dict[str, Any]],
+        decisions: Dict[str, Dict],
+    ) -> Dict[str, Any]:
+        """
+        Run the optimization loop if performance indicates a need for change.
+        """
+        total_pnl = settlement_result.get("portfolio_value", 0) - 100000.0 # Assuming 100k initial
+        # You might want to use a more sophisticated trigger, like 3 consecutive losses
+        if total_pnl >= 0:
+            logger.info(f"APO: Positive P&L (${total_pnl:,.2f}) for {date}, skipping optimization.")
+            return {"status": "skipped", "reason": "positive_pnl"}
+
+        logger.info(f"APO: Negative P&L (${total_pnl:,.2f}) detected for {date}. Starting optimization...")
+
+        # 1. Identify underperforming agents or logic
+        # 2. Generate policy updates
+        # 3. Apply updates
+        
+        optimizations = []
+        
+        # Focus on agents that gave high confidence but wrong direction
+        underperformers = self._identify_underperformers(settlement_result, analyst_results)
+        
+        for agent_id in underperformers:
+            update = await self._generate_policy_update(
+                agent_id, 
+                date, 
+                reflection_content, 
+                settlement_result,
+                analyst_results,
+                decisions
+            )
+            if update:
+                self._apply_update(agent_id, update)
+                optimizations.append({
+                    "agent_id": agent_id,
+                    "file": update.get("file", "POLICY.md"),
+                    "change": update.get("change", "")
+                })
+
+        return {
+            "status": "completed",
+            "date": date,
+            "total_pnl": total_pnl,
+            "optimizations": optimizations
+        }
+
+    def _identify_underperformers(
+        self, 
+        settlement_result: Dict[str, Any], 
+        analyst_results: List[Dict[str, Any]]
+    ) -> List[str]:
+        """Identify which agents might need policy adjustments."""
+        underperformers = []
+        
+        # Simple logic: if the overall day was a loss, all active analysts might need a check,
+        # but specifically those whose predictions didn't match the market.
+        # For now, let's include all analysts involved in the day.
+        for result in analyst_results:
+            agent_id = result.get("agent")
+            if agent_id:
+                underperformers.append(agent_id)
+        
+        # Also include PM and Risk Manager as they are critical
+        underperformers.append("portfolio_manager")
+        underperformers.append("risk_manager")
+        
+        return list(set(underperformers))
+
+    async def _generate_policy_update(
+        self,
+        agent_id: str,
+        date: str,
+        reflection_content: str,
+        settlement_result: Dict[str, Any],
+        analyst_results: List[Dict[str, Any]],
+        decisions: Dict[str, Dict],
+    ) -> Optional[Dict[str, str]]:
+        """Use LLM to generate a specific policy update for an agent."""
+        
+        # Load current policy
+        try:
+            current_policy = self.workspace_manager.load_agent_file(
+                config_name=self.config_name,
+                agent_id=agent_id,
+                filename="POLICY.md"
+            )
+        except Exception:
+            current_policy = "No existing policy found."
+
+        prompt = f"""
+As an Expert Meta-Optimizer for a multi-agent trading system, your task is to update the operational POLICY for an agent named '{agent_id}' based on recent performance failures.
+
+[Current Context]
+Date: {date}
+Daily Reflection:
+{reflection_content}
+
+[Agent's Current POLICY.md]
+{current_policy}
+
+[Task]
+Analyze why the system failed (loss occurred). Identify what '{agent_id}' could have done differently or what new constraint/heuristic should be added to its policy to prevent similar mistakes in the future.
+
+Provide a specific, concise addition or modification to the POLICY.md file. 
+The output MUST be a JSON object with:
+1. "reasoning": Brief explanation of why this change is needed.
+2. "file": Always "POLICY.md".
+3. "change": The EXACT markdown text to APPEND or REPLACE in the file. Keep it in Chinese as the system uses Chinese prompts.
+
+Output ONLY the JSON object.
+"""
+        msg = Msg(name="system", content=prompt, role="user")
+        response = await self.model.reply(msg)
+        
+        content = response.content
+        if isinstance(content, list):
+            content = content[0].get("text", "")
+            
+        # Clean JSON if wrapped in markdown
+        if "```json" in content:
+            content = content.split("```json")[1].split("```")[0].strip()
+        
+        try:
+            return json.loads(content)
+        except Exception as e:
+            logger.error(f"APO: Failed to parse optimization response for {agent_id}: {e}")
+            return None
+
+    def _apply_update(self, agent_id: str, update: Dict[str, str]) -> None:
+        """Apply the suggested update to the agent's workspace."""
+        filename = update.get("file", "POLICY.md")
+        change = update.get("change", "")
+        
+        if not change:
+            return
+
+        try:
+            current_content = self.workspace_manager.load_agent_file(
+                config_name=self.config_name,
+                agent_id=agent_id,
+                filename=filename
+            )
+            
+            # Check if change is already there to avoid duplicates
+            if change.strip() in current_content:
+                logger.info(f"APO: Change already present in {agent_id}/{filename}")
+                return
+
+            new_content = current_content + "\n\n### APO Update (" + datetime.now().strftime("%Y-%m-%d") + ")\n" + change
+            
+            self.workspace_manager.update_agent_file(
+                config_name=self.config_name,
+                agent_id=agent_id,
+                filename=filename,
+                content=new_content
+            )
+            logger.info(f"APO: Updated {agent_id}/{filename} with new heuristics.")
+        except Exception as e:
+            logger.error(f"APO: Failed to apply update to {agent_id}/{filename}: {e}")