# -*- coding: utf-8 -*- # flake8: noqa: E501 # pylint: disable=C0301,C0413,W0621,W0404,C0412,E0611,E1121 """Example of training a werewolf game agent with Trinity-RFT using AgentScope tuner.""" import sys from pathlib import Path from typing import Dict import traceback import numpy as np from agentscope.tuner import ( tune, WorkflowOutput, TunerModelConfig, ) from agentscope.agent import ReActAgent from agentscope.formatter import OpenAIMultiAgentFormatter # Add current directory to path for local imports sys.path.insert(0, str(Path(__file__).parent)) from game import BadGuyException, werewolves_game # noqa: E402 async def run_werewolves_workflow( task: Dict, model: TunerModelConfig, auxiliary_models: Dict[str, TunerModelConfig], ) -> WorkflowOutput: """Run the werewolf game workflow. Args: task (Dict): The task information containing: - 'seed': for role shuffling - 'workflow_args': optional dict with 'trainable_target' key ("werewolf" or "good_guy", default: "werewolf") model (TunerModelConfig): The trainable model. auxiliary_models (Dict[str, TunerModelConfig]): Dictionary of auxiliary models. Expected to have 'participant' key for opponent players. Returns: WorkflowOutput: Contains reward and metrics from the game. """ # Initialize roles: 2 werewolves, 3 villagers, 1 seer, 1 witch roles = ["werewolf"] * 2 + ["villager"] * 3 + ["seer", "witch"] # Shuffle roles based on task seed for reproducibility seed = task.get("seed", 0) np.random.seed(seed) np.random.shuffle(roles) # Get trainable_target from workflow_args (default: "werewolf") # Options: "werewolf" or "good_guy" (villager, seer, witch) workflow_args = task.get("workflow_args", {}) trainable_target = workflow_args.get("trainable_target", "werewolf") # Get the participant model for opponent players if "participant" not in auxiliary_models: raise ValueError( "Expected 'participant' model in auxiliary_models for opponent players", ) participant_model = auxiliary_models["participant"] # Create players with appropriate models based on trainable_target players = [] for i, role in enumerate(roles): # Determine which model to use based on trainable_target if trainable_target == "werewolf": # Training werewolves: werewolves use trainable model use_trainable = role == "werewolf" else: # trainable_target == "good_guy" # Training good guys: villager, seer, witch use trainable model use_trainable = role in ["villager", "seer", "witch"] agent = ReActAgent( name=f"Player{i + 1}", sys_prompt=get_official_agent_prompt(f"Player{i + 1}"), model=model if use_trainable else participant_model, formatter=OpenAIMultiAgentFormatter(), max_iters=3, ) players.append(agent) try: # Run the werewolf game good_guy_win = await werewolves_game(players, roles) # Calculate reward based on trainable_target is_success = False if trainable_target == "werewolf": # Training werewolves: reward when werewolves win (good_guy_win = False) if not good_guy_win: raw_reward = 1.0 is_success = True else: raw_reward = 0.0 else: # trainable_target == "good_guy" # Training good guys: reward when good guys win (good_guy_win = True) if good_guy_win: raw_reward = 1.0 is_success = True else: raw_reward = 0.0 metrics = { "success": float(is_success), "werewolf_win": float(not good_guy_win), "villager_win": float(good_guy_win), "trainable_target": trainable_target, } return WorkflowOutput( reward=raw_reward, metrics=metrics, ) except BadGuyException as e: # If game execution fails, give a small penalty traceback.print_exc() print( f"Error during game execution: {e}. " "Assigning penalty to trainable agents.", ) return WorkflowOutput( reward=-0.1, metrics={"success": 0.0, "game_error": 1.0}, ) except Exception as e: # Catch any other unexpected errors traceback.print_exc() print(f"Unexpected error: {e}") return WorkflowOutput( reward=-0.1, metrics={"success": 0.0, "unexpected_error": 1.0}, ) def get_official_agent_prompt(name: str) -> str: """Get the system prompt for an agent. Args: name (str): The name of the agent. Returns: str: The system prompt. """ from textwrap import dedent system_prompt = dedent( f""" You're a werewolf game player named {name}. # YOUR TARGET Your target is to win the game with your teammates as much as possible. # GAME RULES - In werewolf game, players are divided into two werewolves, three villagers, one seer, and one witch. - Werewolves: kill one player each night, and must hide identity during the day. - Villagers: ordinary players without special abilities, try to identify and eliminate werewolves. - Seer: A special villager who can check one player's identity each night. - Witch: A special villager with two one-time-use potions: a healing potion to save a player (including herself) from being killed at night, and a poison to eliminate one player at night. - The game alternates between night and day phases until one side wins: - Night Phase - Werewolves choose one victim - Seer checks one player's identity - Witch decides whether to use potions - Moderator announces who died during the night - Day Phase - All players discuss and vote to eliminate one suspected player - The werewolves will win the game if they can eliminate all the villagers. - The villagers will win the game if they can eliminate all the werewolves. ## During PUBLIC discussion (day phase): - Your response will be split into TWO parts: REASONING (private) and STATEMENT (public) - REASONING: Your internal thoughts - ONLY YOU can see this. Think freely here. - STATEMENT: What you actually say - EVERYONE can see this. Be strategic! ## For WEREWOLVES in public discussion: - ❌ NEVER say "I'm a werewolf" or "we werewolves" in your STATEMENT - ❌ NEVER reveal your werewolf teammates in your STATEMENT - ❌ NEVER discuss werewolf strategy in your STATEMENT - ✅ In REASONING: freely think about werewolf strategy - ✅ In STATEMENT: pretend to be a villager, seer, or other role - ✅ In STATEMENT: accuse others, defend yourself, but NEVER reveal your true identity ## For ALL ROLES in public discussion: - Use REASONING to analyze: "Who might be the werewolf? What's my strategy?" - Use STATEMENT to speak: "I think Player X is suspicious because..." - Keep sensitive information in REASONING, not in STATEMENT ## Examples: ### BAD (Werewolf exposing themselves): REASONING: "I'm a werewolf, I should protect my teammates." STATEMENT: "As a werewolf, I think we should vote Player 5." ❌ EXPOSED! ### GOOD (Werewolf hiding identity): REASONING: "I'm a werewolf. Player 5 might be the seer based on their questions. I should cast suspicion on them without being obvious." STATEMENT: "I find Player 5's behavior suspicious. They've been asking too many questions about people's roles." ✅ HIDDEN! ### GOOD (Villager analyzing): REASONING: "Player 2 and Player 3 seem to be defending each other. Could they be werewolf teammates?" STATEMENT: "I noticed Player 2 and Player 3 have been very defensive of each other. This makes me suspicious." ✅ STRATEGIC! # GAME GUIDANCE - Try your best to win the game with your teammates, tricks, lies, and deception are all allowed, e.g. pretending to be a different role. - During discussion, don't be political, be direct and to the point. - The day phase voting provides important clues. For example, the werewolves may vote together, attack the seer, etc. ## GAME GUIDANCE FOR WEREWOLF - Seer is your greatest threat, who can check one player's identity each night. Analyze players' speeches, find out the seer and eliminate him/her will greatly increase your chances of winning. - In the first night, making random choices is common for werewolves since no information is available. - Pretending to be other roles (seer, witch or villager) is a common strategy to hide your identity and mislead other villagers in the day phase. - The outcome of the night phase provides important clues. For example, if witch uses the healing or poison potion, etc. Use this information to adjust your strategy. - [CRITICAL] In public discussion, NEVER reveal you are a werewolf. Always pretend to be a villager or other role. ## GAME GUIDANCE FOR SEER - Seer is very important to villagers, you should earn the villagers' trust, and lead the discussion phase if possible. - Your ability to check one player's identity is crucial. - The outcome of the night phase provides important clues. For example, if witch uses the healing or poison potion, etc. Use this information to adjust your strategy. - Consider when to reveal your identity - too early and werewolves will target you, too late and villagers won't trust you. ## GAME GUIDANCE FOR WITCH - Witch has two powerful potions, use them wisely to protect key villagers or eliminate suspected werewolves. - [IMPORTANT] You CAN use the healing potion to save yourself if you are killed by werewolves (self-rescue is allowed). - Consider saving the healing potion for critical moments, especially if you think you might be targeted. - The outcome of the night phase provides important clues. Use this information to adjust your strategy. For example, the person you save is likely to be on the villagers' side. ## GAME GUIDANCE FOR VILLAGER - Protecting special villagers, especially the seer, is crucial for your team's success. - Be cautious and decide whether to trust other players based on their speeches and actions. - Base your decisions on the information you have received, be logical and engage in the discussion to vote out the suspected werewolves. # NOTE - [IMPORTANT] DO NOT make up any information that is not provided by the moderator or other players. - This is a TEXT-based game, so DO NOT use or make up any non-textual information. - Always critically reflect on whether your evidence exist, and avoid making assumptions. - Your response should be specific and concise, provide clear reason and avoid unnecessary elaboration. - Generate your one-line response by using the `generate_response` function. - Don't repeat the others' speeches. - [CRITICAL] Remember: REASONING is private (only you see it), STATEMENT is public (everyone sees it). Use this to your advantage!""", ) return system_prompt if __name__ == "__main__": from agentscope.tuner import ( DatasetConfig, TunerModelConfig, AlgorithmConfig, ) # High-level configuration in code (easy to modify) config_path = Path(__file__).parent / "config.yaml" # Setup Model Path trained_model_path = ( "Qwen/Qwen2.5-7B-Instruct" # fill in your model path here ) auxiliary_model_path = "Qwen/Qwen3-30B-A3B-Instruct-2507" # fill in your auxiliary model path here # Dataset configuration dataset = DatasetConfig( path=str(Path(__file__).parent / "data"), split="train", total_steps=400, # Total training steps ) # Model configuration (trainable model for werewolf players) model = TunerModelConfig( model_path=trained_model_path, max_model_len=25600, max_tokens=4096, temperature=1.0, inference_engine_num=16, tensor_parallel_size=1, tool_call_parser="hermes", reasoning_parser=None, ) # Auxiliary models (for non-werewolf players) auxiliary_models = { "participant": TunerModelConfig( model_path=auxiliary_model_path, max_model_len=25600, max_tokens=4096, temperature=0.1, # Lower temperature for auxiliary models inference_engine_num=8, tensor_parallel_size=1, tool_call_parser="hermes", reasoning_parser=None, ), } # Algorithm configuration algorithm = AlgorithmConfig( algorithm_type="multi_step_grpo", group_size=32, # repeat_times in Trinity batch_size=24, learning_rate=1e-6, save_interval_steps=100, eval_interval_steps=100, ) # Run training with hybrid configuration # Code parameters above + detailed Trinity config from YAML tune( workflow_func=run_werewolves_workflow, judge_func=None, # We compute reward directly in the workflow train_dataset=dataset, model=model, auxiliary_models=auxiliary_models, algorithm=algorithm, config_path=str(config_path), # For cluster, explorer, trainer details )