project: "Data-Augmentation" # Project name name: "Random-Selector" # Experiment name checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} # Directory to save model checkpoints # Config of data-centric experiments buffer: total_epochs: 1 # Total training epochs explorer_input: taskset: path: "path/to/your/augmented/math_data" # Training data path split: "train" # Training data split task_selector: selector_type: random # Strategy of task selection eval_tasksets: - name: "eval-aime24" # Evaluation data name path: "path/to/aime24_data" # Evaluation data path split: "test" # Evaluation data split synchronizer: sync_style: dynamic_by_explorer # Sync triggered dynamically by explorer sync_method: 'nccl' sync_interval: 4 # Sync every N steps sync_timeout: 7200 # Timeout for synchronization (seconds) monitor: monitor_type: tensorboard # Can also use wandb, mlflow or swanlab # The config below has been set in python file algorithm: algorithm_type: multi_step_grpo # GRPO series for multi-step scenario repeat_times: 8 # Number of rollouts per prompt for advantage estimation optimizer: lr: 1e-6 # Learning rate model: model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-0.6B} # Base model path max_model_len: 24576 # Max context length max_response_tokens: 16384 # Max tokens per response temperature: 1.0 # Temperature of model's generation cluster: node_num: 1 # Number of used nodes gpu_per_node: 8 # Number of GPUs every node explorer: eval_interval: 20 # Evaluation every N steps runner_per_model: 16 # Runners per infer engine max_timeout: 1200 # Max timeout for each rollout (seconds) rollout_model: engine_num: 4 # Number of vLLM engines for rollout model tensor_parallel_size: 1 # TP size per engine for rollout model enable_openai_api: true # Enable OpenAI-compatible API enable_history: true # Enable conversation history enable_auto_tool_choice: true # Enable automatic tool selection tool_call_parser: hermes # Parser for tool calls reasoning_parser: deepseek_r1 # Parser for reasoning type trainer: save_interval: 100 # Save checkpoint every N steps use_dynamic_bsz: true # Use dynamic batch size ulysses_sequence_parallel_size: 1 # Sequence parallel size for Ulysses