project: "AgentScope" # Project name name: "FrozenLake" # Experiment name checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} # Directory to save model checkpoints algorithm: algorithm_type: multi_step_grpo # GRPO series for multi-step scenario repeat_times: 16 # Number of rollouts per prompt for advantage estimation kl_loss_fn: "low_var_kl" kl_loss_fn_args: kl_coef: 0 # KL divergence coefficient advantage_fn_args: epsilon: 1e-6 # Small value for numerical stability std_threshold: 0.0001 # Threshold for standard deviation optimizer: lr: 1e-6 # Learning rate model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct} # Base model path max_prompt_tokens: 23552 # Max tokens for prompt max_response_tokens: 2048 # Max tokens per response max_model_len: 25600 # Max context length temperature: 1.0 # Sampling temperature buffer: total_epochs: 5 # Total training epochs batch_size: 32 # Batch size per explore step train_batch_size: 1024 # Total experiences per training step trainer_input: experience_buffer: name: experience_buffer storage_type: queue max_read_timeout: 7200 # Max timeout for reading from buffer (seconds) replay_buffer: enable: true # Enable experience replay priority_fn: linear_decay # Priority function for replay buffer priority_fn_args: decay: 0.1 # Decay rate for priority function explorer: runner_per_model: 16 # Number of runners per model rollout_model: engine_num: 6 # Number of vLLM engines for rollout model tensor_parallel_size: 1 # TP size per engine for rollout model enable_openai_api: true # Enable OpenAI-compatible API enable_history: true # Enable conversation history enable_auto_tool_choice: true # Enable automatic tool selection tool_call_parser: hermes # Parser for tool calls trainer: save_interval: 100 # Save checkpoint every N steps use_dynamic_bsz: true # Use dynamic batch size grad_clip: 1.0 # Gradient clipping value max_token_len_per_gpu: 25600 # Max token length per GPU ulysses_sequence_parallel_size: 2 # Sequence parallel size for Ulysses synchronizer: sync_style: dynamic_by_explorer # Sync triggered dynamically by explorer sync_interval: 1 # Sync every N steps sync_timeout: 1200 # Timeout for synchronization (seconds)