Add Tuner learn_to_ask example (#101)

2026-01-16 19:24:46 +08:00
parent 5855c5161b
commit 3821fb04ac
10 changed files with 1643 additions and 0 deletions
--- a/tuner/learn_to_ask/config.yaml
+++ b/tuner/learn_to_ask/config.yaml
@@ -0,0 +1,30 @@
+project: AgentScope-ReAct
+name: Learn_to_Ask-Qwen2.5-7B-fixed
+# directory to save checkpoints, default to ./checkpoints if TRINITY_CHECKPOINT_ROOT_DIR not set
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
+algorithm:
+  algorithm_type: grpo  # a GRPO-based algorithm for multi-step reasoning
+model:
+  # path to the pre-trained model, default to Qwen/Qwen2.5-7B-Instruct if TRINITY_MODEL_PATH not set
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct}
+  tinker:  # tinker config, you can set tinker parameters here
+    enable: false  # if true, tinker will be enabled
+cluster:
+  node_num: 1  # cluster with 1 node
+  gpu_per_node: 8  # each node has 8 GPUs
+buffer:
+  total_epochs: 4  # run taskset for 4 epoch
+explorer:
+  runner_per_model: 32  # each model has 32 runners for parallel rollout
+  max_timeout: 600  # max timeout for each rollout is 600 seconds
+synchronizer:
+  sync_style: fixed
+  sync_method: 'nccl'
+  sync_interval: 10
+  sync_timeout: 7200  # wait for 120 minutes
+trainer:
+  save_interval: 90  # save checkpoint every 90 steps
+  use_dynamic_bsz: true
+  ulysses_sequence_parallel_size: 1  # use sequence parallelism to reduce memory usage
+monitor:
+  monitor_type: wandb  # here we use wandb; you can also use tensorboard, mlflow or swanlab