Add example for data augmentation in tuner (#98)
This commit is contained in:
62
tuner/data_augment/config_random.yaml
Normal file
62
tuner/data_augment/config_random.yaml
Normal file
@@ -0,0 +1,62 @@
|
||||
project: "Data-Augmentation" # Project name
|
||||
name: "Random-Selector" # Experiment name
|
||||
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} # Directory to save model checkpoints
|
||||
|
||||
# Config of data-centric experiments
|
||||
buffer:
|
||||
total_epochs: 1 # Total training epochs
|
||||
explorer_input:
|
||||
taskset:
|
||||
path: "path/to/your/augmented/math_data" # Training data path
|
||||
split: "train" # Training data split
|
||||
task_selector:
|
||||
selector_type: random # Strategy of task selection
|
||||
eval_tasksets:
|
||||
- name: "eval-aime24" # Evaluation data name
|
||||
path: "path/to/aime24_data" # Evaluation data path
|
||||
split: "test" # Evaluation data split
|
||||
|
||||
synchronizer:
|
||||
sync_style: dynamic_by_explorer # Sync triggered dynamically by explorer
|
||||
sync_method: 'nccl'
|
||||
sync_interval: 4 # Sync every N steps
|
||||
sync_timeout: 7200 # Timeout for synchronization (seconds)
|
||||
|
||||
monitor:
|
||||
monitor_type: tensorboard # Can also use wandb, mlflow or swanlab
|
||||
|
||||
# The config below has been set in python file
|
||||
|
||||
algorithm:
|
||||
algorithm_type: multi_step_grpo # GRPO series for multi-step scenario
|
||||
repeat_times: 8 # Number of rollouts per prompt for advantage estimation
|
||||
optimizer:
|
||||
lr: 1e-6 # Learning rate
|
||||
|
||||
model:
|
||||
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-0.6B} # Base model path
|
||||
max_model_len: 24576 # Max context length
|
||||
max_response_tokens: 16384 # Max tokens per response
|
||||
temperature: 1.0 # Temperature of model's generation
|
||||
|
||||
cluster:
|
||||
node_num: 1 # Number of used nodes
|
||||
gpu_per_node: 8 # Number of GPUs every node
|
||||
|
||||
explorer:
|
||||
eval_interval: 20 # Evaluation every N steps
|
||||
runner_per_model: 16 # Runners per infer engine
|
||||
max_timeout: 1200 # Max timeout for each rollout (seconds)
|
||||
rollout_model:
|
||||
engine_num: 4 # Number of vLLM engines for rollout model
|
||||
tensor_parallel_size: 1 # TP size per engine for rollout model
|
||||
enable_openai_api: true # Enable OpenAI-compatible API
|
||||
enable_history: true # Enable conversation history
|
||||
enable_auto_tool_choice: true # Enable automatic tool selection
|
||||
tool_call_parser: hermes # Parser for tool calls
|
||||
reasoning_parser: deepseek_r1 # Parser for reasoning type
|
||||
|
||||
trainer:
|
||||
save_interval: 100 # Save checkpoint every N steps
|
||||
use_dynamic_bsz: true # Use dynamic batch size
|
||||
ulysses_sequence_parallel_size: 1 # Sequence parallel size for Ulysses
|
||||
Reference in New Issue
Block a user