Files
evotraders/tuner/werewolves/main.py
2026-01-16 17:25:49 +08:00

317 lines
14 KiB
Python

# -*- coding: utf-8 -*-
# flake8: noqa: E501
# pylint: disable=C0301,C0413,W0621,W0404,C0412,E0611,E1121
"""Example of training a werewolf game agent with Trinity-RFT using AgentScope tuner."""
import sys
from pathlib import Path
from typing import Dict
import traceback
import numpy as np
from agentscope.tuner import (
tune,
WorkflowOutput,
TunerModelConfig,
)
from agentscope.agent import ReActAgent
from agentscope.formatter import OpenAIMultiAgentFormatter
# Add current directory to path for local imports
sys.path.insert(0, str(Path(__file__).parent))
from game import BadGuyException, werewolves_game # noqa: E402
async def run_werewolves_workflow(
task: Dict,
model: TunerModelConfig,
auxiliary_models: Dict[str, TunerModelConfig],
) -> WorkflowOutput:
"""Run the werewolf game workflow.
Args:
task (Dict): The task information containing:
- 'seed': for role shuffling
- 'workflow_args': optional dict with 'trainable_target' key
("werewolf" or "good_guy", default: "werewolf")
model (TunerModelConfig): The trainable model.
auxiliary_models (Dict[str, TunerModelConfig]): Dictionary of auxiliary
models. Expected to have 'participant' key for opponent players.
Returns:
WorkflowOutput: Contains reward and metrics from the game.
"""
# Initialize roles: 2 werewolves, 3 villagers, 1 seer, 1 witch
roles = ["werewolf"] * 2 + ["villager"] * 3 + ["seer", "witch"]
# Shuffle roles based on task seed for reproducibility
seed = task.get("seed", 0)
np.random.seed(seed)
np.random.shuffle(roles)
# Get trainable_target from workflow_args (default: "werewolf")
# Options: "werewolf" or "good_guy" (villager, seer, witch)
workflow_args = task.get("workflow_args", {})
trainable_target = workflow_args.get("trainable_target", "werewolf")
# Get the participant model for opponent players
if "participant" not in auxiliary_models:
raise ValueError(
"Expected 'participant' model in auxiliary_models for opponent players",
)
participant_model = auxiliary_models["participant"]
# Create players with appropriate models based on trainable_target
players = []
for i, role in enumerate(roles):
# Determine which model to use based on trainable_target
if trainable_target == "werewolf":
# Training werewolves: werewolves use trainable model
use_trainable = role == "werewolf"
else: # trainable_target == "good_guy"
# Training good guys: villager, seer, witch use trainable model
use_trainable = role in ["villager", "seer", "witch"]
agent = ReActAgent(
name=f"Player{i + 1}",
sys_prompt=get_official_agent_prompt(f"Player{i + 1}"),
model=model if use_trainable else participant_model,
formatter=OpenAIMultiAgentFormatter(),
max_iters=3,
)
players.append(agent)
try:
# Run the werewolf game
good_guy_win = await werewolves_game(players, roles)
# Calculate reward based on trainable_target
is_success = False
if trainable_target == "werewolf":
# Training werewolves: reward when werewolves win (good_guy_win = False)
if not good_guy_win:
raw_reward = 1.0
is_success = True
else:
raw_reward = 0.0
else: # trainable_target == "good_guy"
# Training good guys: reward when good guys win (good_guy_win = True)
if good_guy_win:
raw_reward = 1.0
is_success = True
else:
raw_reward = 0.0
metrics = {
"success": float(is_success),
"werewolf_win": float(not good_guy_win),
"villager_win": float(good_guy_win),
"trainable_target": trainable_target,
}
return WorkflowOutput(
reward=raw_reward,
metrics=metrics,
)
except BadGuyException as e:
# If game execution fails, give a small penalty
traceback.print_exc()
print(
f"Error during game execution: {e}. "
"Assigning penalty to trainable agents.",
)
return WorkflowOutput(
reward=-0.1,
metrics={"success": 0.0, "game_error": 1.0},
)
except Exception as e:
# Catch any other unexpected errors
traceback.print_exc()
print(f"Unexpected error: {e}")
return WorkflowOutput(
reward=-0.1,
metrics={"success": 0.0, "unexpected_error": 1.0},
)
def get_official_agent_prompt(name: str) -> str:
"""Get the system prompt for an agent.
Args:
name (str): The name of the agent.
Returns:
str: The system prompt.
"""
from textwrap import dedent
system_prompt = dedent(
f"""
You're a werewolf game player named {name}.
# YOUR TARGET
Your target is to win the game with your teammates as much as possible.
# GAME RULES
- In werewolf game, players are divided into two werewolves, three villagers, one seer, and one witch.
- Werewolves: kill one player each night, and must hide identity during the day.
- Villagers: ordinary players without special abilities, try to identify and eliminate werewolves.
- Seer: A special villager who can check one player's identity each night.
- Witch: A special villager with two one-time-use potions: a healing potion to save a player (including herself) from being killed at night, and a poison to eliminate one player at night.
- The game alternates between night and day phases until one side wins:
- Night Phase
- Werewolves choose one victim
- Seer checks one player's identity
- Witch decides whether to use potions
- Moderator announces who died during the night
- Day Phase
- All players discuss and vote to eliminate one suspected player
- The werewolves will win the game if they can eliminate all the villagers.
- The villagers will win the game if they can eliminate all the werewolves.
## During PUBLIC discussion (day phase):
- Your response will be split into TWO parts: REASONING (private) and STATEMENT (public)
- REASONING: Your internal thoughts - ONLY YOU can see this. Think freely here.
- STATEMENT: What you actually say - EVERYONE can see this. Be strategic!
## For WEREWOLVES in public discussion:
- ❌ NEVER say "I'm a werewolf" or "we werewolves" in your STATEMENT
- ❌ NEVER reveal your werewolf teammates in your STATEMENT
- ❌ NEVER discuss werewolf strategy in your STATEMENT
- ✅ In REASONING: freely think about werewolf strategy
- ✅ In STATEMENT: pretend to be a villager, seer, or other role
- ✅ In STATEMENT: accuse others, defend yourself, but NEVER reveal your true identity
## For ALL ROLES in public discussion:
- Use REASONING to analyze: "Who might be the werewolf? What's my strategy?"
- Use STATEMENT to speak: "I think Player X is suspicious because..."
- Keep sensitive information in REASONING, not in STATEMENT
## Examples:
### BAD (Werewolf exposing themselves):
REASONING: "I'm a werewolf, I should protect my teammates."
STATEMENT: "As a werewolf, I think we should vote Player 5." ❌ EXPOSED!
### GOOD (Werewolf hiding identity):
REASONING: "I'm a werewolf. Player 5 might be the seer based on their questions. I should cast suspicion on them without being obvious."
STATEMENT: "I find Player 5's behavior suspicious. They've been asking too many questions about people's roles." ✅ HIDDEN!
### GOOD (Villager analyzing):
REASONING: "Player 2 and Player 3 seem to be defending each other. Could they be werewolf teammates?"
STATEMENT: "I noticed Player 2 and Player 3 have been very defensive of each other. This makes me suspicious." ✅ STRATEGIC!
# GAME GUIDANCE
- Try your best to win the game with your teammates, tricks, lies, and deception are all allowed, e.g. pretending to be a different role.
- During discussion, don't be political, be direct and to the point.
- The day phase voting provides important clues. For example, the werewolves may vote together, attack the seer, etc.
## GAME GUIDANCE FOR WEREWOLF
- Seer is your greatest threat, who can check one player's identity each night. Analyze players' speeches, find out the seer and eliminate him/her will greatly increase your chances of winning.
- In the first night, making random choices is common for werewolves since no information is available.
- Pretending to be other roles (seer, witch or villager) is a common strategy to hide your identity and mislead other villagers in the day phase.
- The outcome of the night phase provides important clues. For example, if witch uses the healing or poison potion, etc. Use this information to adjust your strategy.
- [CRITICAL] In public discussion, NEVER reveal you are a werewolf. Always pretend to be a villager or other role.
## GAME GUIDANCE FOR SEER
- Seer is very important to villagers, you should earn the villagers' trust, and lead the discussion phase if possible.
- Your ability to check one player's identity is crucial.
- The outcome of the night phase provides important clues. For example, if witch uses the healing or poison potion, etc. Use this information to adjust your strategy.
- Consider when to reveal your identity - too early and werewolves will target you, too late and villagers won't trust you.
## GAME GUIDANCE FOR WITCH
- Witch has two powerful potions, use them wisely to protect key villagers or eliminate suspected werewolves.
- [IMPORTANT] You CAN use the healing potion to save yourself if you are killed by werewolves (self-rescue is allowed).
- Consider saving the healing potion for critical moments, especially if you think you might be targeted.
- The outcome of the night phase provides important clues. Use this information to adjust your strategy. For example, the person you save is likely to be on the villagers' side.
## GAME GUIDANCE FOR VILLAGER
- Protecting special villagers, especially the seer, is crucial for your team's success.
- Be cautious and decide whether to trust other players based on their speeches and actions.
- Base your decisions on the information you have received, be logical and engage in the discussion to vote out the suspected werewolves.
# NOTE
- [IMPORTANT] DO NOT make up any information that is not provided by the moderator or other players.
- This is a TEXT-based game, so DO NOT use or make up any non-textual information.
- Always critically reflect on whether your evidence exist, and avoid making assumptions.
- Your response should be specific and concise, provide clear reason and avoid unnecessary elaboration.
- Generate your one-line response by using the `generate_response` function.
- Don't repeat the others' speeches.
- [CRITICAL] Remember: REASONING is private (only you see it), STATEMENT is public (everyone sees it). Use this to your advantage!""",
)
return system_prompt
if __name__ == "__main__":
from agentscope.tuner import (
DatasetConfig,
TunerModelConfig,
AlgorithmConfig,
)
# High-level configuration in code (easy to modify)
config_path = Path(__file__).parent / "config.yaml"
# Setup Model Path
trained_model_path = (
"Qwen/Qwen2.5-7B-Instruct" # fill in your model path here
)
auxiliary_model_path = "Qwen/Qwen3-30B-A3B-Instruct-2507" # fill in your auxiliary model path here
# Dataset configuration
dataset = DatasetConfig(
path=str(Path(__file__).parent / "data"),
split="train",
total_steps=400, # Total training steps
)
# Model configuration (trainable model for werewolf players)
model = TunerModelConfig(
model_path=trained_model_path,
max_model_len=25600,
max_tokens=4096,
temperature=1.0,
inference_engine_num=16,
tensor_parallel_size=1,
tool_call_parser="hermes",
reasoning_parser=None,
)
# Auxiliary models (for non-werewolf players)
auxiliary_models = {
"participant": TunerModelConfig(
model_path=auxiliary_model_path,
max_model_len=25600,
max_tokens=4096,
temperature=0.1, # Lower temperature for auxiliary models
inference_engine_num=8,
tensor_parallel_size=1,
tool_call_parser="hermes",
reasoning_parser=None,
),
}
# Algorithm configuration
algorithm = AlgorithmConfig(
algorithm_type="multi_step_grpo",
group_size=32, # repeat_times in Trinity
batch_size=24,
learning_rate=1e-6,
save_interval_steps=100,
eval_interval_steps=100,
)
# Run training with hybrid configuration
# Code parameters above + detailed Trinity config from YAML
tune(
workflow_func=run_werewolves_workflow,
judge_func=None, # We compute reward directly in the workflow
train_dataset=dataset,
model=model,
auxiliary_models=auxiliary_models,
algorithm=algorithm,
config_path=str(config_path), # For cluster, explorer, trainer details
)