Add examples for frozenlake and emailsearch (#94)
This commit is contained in:
53
tuner/frozen_lake/config.yaml
Normal file
53
tuner/frozen_lake/config.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
project: "AgentScope" # Project name
|
||||
name: "FrozenLake" # Experiment name
|
||||
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} # Directory to save model checkpoints
|
||||
algorithm:
|
||||
algorithm_type: multi_step_grpo # GRPO series for multi-step scenario
|
||||
repeat_times: 16 # Number of rollouts per prompt for advantage estimation
|
||||
kl_loss_fn: "low_var_kl"
|
||||
kl_loss_fn_args:
|
||||
kl_coef: 0 # KL divergence coefficient
|
||||
advantage_fn_args:
|
||||
epsilon: 1e-6 # Small value for numerical stability
|
||||
std_threshold: 0.0001 # Threshold for standard deviation
|
||||
optimizer:
|
||||
lr: 1e-6 # Learning rate
|
||||
model:
|
||||
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-3B-Instruct} # Base model path
|
||||
max_prompt_tokens: 23552 # Max tokens for prompt
|
||||
max_response_tokens: 2048 # Max tokens per response
|
||||
max_model_len: 25600 # Max context length
|
||||
temperature: 1.0 # Sampling temperature
|
||||
buffer:
|
||||
total_epochs: 5 # Total training epochs
|
||||
batch_size: 32 # Batch size per explore step
|
||||
train_batch_size: 1024 # Total experiences per training step
|
||||
trainer_input:
|
||||
experience_buffer:
|
||||
name: experience_buffer
|
||||
storage_type: queue
|
||||
max_read_timeout: 7200 # Max timeout for reading from buffer (seconds)
|
||||
replay_buffer:
|
||||
enable: true # Enable experience replay
|
||||
priority_fn: linear_decay # Priority function for replay buffer
|
||||
priority_fn_args:
|
||||
decay: 0.1 # Decay rate for priority function
|
||||
explorer:
|
||||
runner_per_model: 16 # Number of runners per model
|
||||
rollout_model:
|
||||
engine_num: 6 # Number of vLLM engines for rollout model
|
||||
tensor_parallel_size: 1 # TP size per engine for rollout model
|
||||
enable_openai_api: true # Enable OpenAI-compatible API
|
||||
enable_history: true # Enable conversation history
|
||||
enable_auto_tool_choice: true # Enable automatic tool selection
|
||||
tool_call_parser: hermes # Parser for tool calls
|
||||
trainer:
|
||||
save_interval: 100 # Save checkpoint every N steps
|
||||
use_dynamic_bsz: true # Use dynamic batch size
|
||||
grad_clip: 1.0 # Gradient clipping value
|
||||
max_token_len_per_gpu: 25600 # Max token length per GPU
|
||||
ulysses_sequence_parallel_size: 2 # Sequence parallel size for Ulysses
|
||||
synchronizer:
|
||||
sync_style: dynamic_by_explorer # Sync triggered dynamically by explorer
|
||||
sync_interval: 1 # Sync every N steps
|
||||
sync_timeout: 1200 # Timeout for synchronization (seconds)
|
||||
Reference in New Issue
Block a user