Add Tuner learn_to_ask example (#101)
This commit is contained in:
30
tuner/learn_to_ask/config.yaml
Normal file
30
tuner/learn_to_ask/config.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
project: AgentScope-ReAct
|
||||
name: Learn_to_Ask-Qwen2.5-7B-fixed
|
||||
# directory to save checkpoints, default to ./checkpoints if TRINITY_CHECKPOINT_ROOT_DIR not set
|
||||
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
|
||||
algorithm:
|
||||
algorithm_type: grpo # a GRPO-based algorithm for multi-step reasoning
|
||||
model:
|
||||
# path to the pre-trained model, default to Qwen/Qwen2.5-7B-Instruct if TRINITY_MODEL_PATH not set
|
||||
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct}
|
||||
tinker: # tinker config, you can set tinker parameters here
|
||||
enable: false # if true, tinker will be enabled
|
||||
cluster:
|
||||
node_num: 1 # cluster with 1 node
|
||||
gpu_per_node: 8 # each node has 8 GPUs
|
||||
buffer:
|
||||
total_epochs: 4 # run taskset for 4 epoch
|
||||
explorer:
|
||||
runner_per_model: 32 # each model has 32 runners for parallel rollout
|
||||
max_timeout: 600 # max timeout for each rollout is 600 seconds
|
||||
synchronizer:
|
||||
sync_style: fixed
|
||||
sync_method: 'nccl'
|
||||
sync_interval: 10
|
||||
sync_timeout: 7200 # wait for 120 minutes
|
||||
trainer:
|
||||
save_interval: 90 # save checkpoint every 90 steps
|
||||
use_dynamic_bsz: true
|
||||
ulysses_sequence_parallel_size: 1 # use sequence parallelism to reduce memory usage
|
||||
monitor:
|
||||
monitor_type: wandb # here we use wandb; you can also use tensorboard, mlflow or swanlab
|
||||
Reference in New Issue
Block a user