Add examples for frozenlake and emailsearch (#94)

2026-01-19 12:25:13 +08:00
parent 3821fb04ac
commit 654c35127a
26 changed files with 3370 additions and 14 deletions
--- a/tuner/frozen_lake/config.yaml
+++ b/tuner/frozen_lake/config.yaml
@@ -0,0 +1,53 @@
+project: "AgentScope"  # Project name
+name: "FrozenLake"  # Experiment name
+checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}  # Directory to save model checkpoints
+algorithm:
+  algorithm_type: multi_step_grpo  # GRPO series for multi-step scenario
+  repeat_times: 16  # Number of rollouts per prompt for advantage estimation
+  kl_loss_fn: "low_var_kl"
+  kl_loss_fn_args:
+    kl_coef: 0 # KL divergence coefficient
+  advantage_fn_args:
+    epsilon: 1e-6  # Small value for numerical stability
+    std_threshold: 0.0001  # Threshold for standard deviation
+  optimizer:
+    lr: 1e-6  # Learning rate
+model:
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct}  # Base model path
+  max_prompt_tokens: 23552  # Max tokens for prompt
+  max_response_tokens: 2048  # Max tokens per response
+  max_model_len: 25600  # Max context length
+  temperature: 1.0  # Sampling temperature
+buffer:
+  total_epochs: 5  # Total training epochs
+  batch_size: 32  # Batch size per explore step
+  train_batch_size: 1024  # Total experiences per training step
+  trainer_input:
+    experience_buffer:
+      name: experience_buffer
+      storage_type: queue
+      max_read_timeout: 7200  # Max timeout for reading from buffer (seconds)
+      replay_buffer:
+        enable: true  # Enable experience replay
+        priority_fn: linear_decay  # Priority function for replay buffer
+        priority_fn_args:
+          decay: 0.1  # Decay rate for priority function
+explorer:
+  runner_per_model: 16  # Number of runners per model
+  rollout_model:
+    engine_num: 6  # Number of vLLM engines for rollout model
+    tensor_parallel_size: 1  # TP size per engine for rollout model
+    enable_openai_api: true  # Enable OpenAI-compatible API
+    enable_history: true  # Enable conversation history
+    enable_auto_tool_choice: true  # Enable automatic tool selection
+    tool_call_parser: hermes  # Parser for tool calls
+trainer:
+  save_interval: 100  # Save checkpoint every N steps
+  use_dynamic_bsz: true  # Use dynamic batch size
+  grad_clip: 1.0  # Gradient clipping value
+  max_token_len_per_gpu: 25600  # Max token length per GPU
+  ulysses_sequence_parallel_size: 2  # Sequence parallel size for Ulysses
+synchronizer:
+  sync_style: dynamic_by_explorer  # Sync triggered dynamically by explorer
+  sync_interval: 1  # Sync every N steps
+  sync_timeout: 1200  # Timeout for synchronization (seconds)