73 lines
3.0 KiB
YAML
73 lines
3.0 KiB
YAML
project: "AgentScope" # Project name
|
|
name: "Email_search" # Experiment name
|
|
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints} # Directory to save model checkpoints
|
|
algorithm:
|
|
algorithm_type: multi_step_grpo # GRPO series for multi-step scenario
|
|
repeat_times: 8 # Number of rollouts per prompt for advantage estimation
|
|
optimizer:
|
|
lr: 1e-6 # Learning rate
|
|
policy_loss_fn: "rec" # Policy loss function
|
|
policy_loss_fn_args: # Policy loss function arguments
|
|
epsilon_low: 0.2
|
|
epsilon_high: 0.2
|
|
clip_mode: "one-side"
|
|
weight: "none"
|
|
temp: 1.0
|
|
regularizer: "none"
|
|
regularizer_coef: 0.0
|
|
kl_loss_fn: 'k2' # KL divergence loss function
|
|
kl_loss_fn_args:
|
|
kl_coef: 0.0 # KL divergence coefficient
|
|
advantage_fn_args:
|
|
std_cal_level: 'batch' # Advantage normalization level
|
|
model:
|
|
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-4B-Instruct-2507} # Base model path
|
|
max_response_tokens: 4096 # Max tokens per response
|
|
max_model_len: 20480 # Max context length
|
|
buffer:
|
|
total_epochs: 10 # Total training epochs
|
|
batch_size: 64 # Batch size per explore step
|
|
train_batch_size: 2560 # 64*8*5, total experiences per training step
|
|
trainer_input:
|
|
experience_buffer:
|
|
name: experience_buffer
|
|
storage_type: queue
|
|
replay_buffer:
|
|
enable: true # Enable experience replay
|
|
priority_fn: 'decay_limit_randomization'
|
|
priority_fn_args:
|
|
decay: 2.0
|
|
use_count_limit: 3
|
|
sigma: 2.0
|
|
explorer:
|
|
eval_interval: 10
|
|
max_repeat_times_per_runner: 1 # Max repeat times per runner
|
|
max_timeout: 3600 # Max timeout for each rollout (seconds)
|
|
rollout_model:
|
|
enable_history: true # Enable conversation history
|
|
enable_openai_api: true # Enable OpenAI-compatible API
|
|
enable_auto_tool_choice: true # Enable automatic tool selection
|
|
tool_call_parser: hermes # Parser for tool calls
|
|
engine_num: 4 # Number of vLLM engines for rollout model
|
|
tensor_parallel_size: 1 # TP size per engine for rollout model
|
|
enable_prefix_caching: false # Disable prefix caching
|
|
auxiliary_models:
|
|
- name: judge
|
|
model_path: Qwen/Qwen3-30B-A3B-Instruct-2507 # Judge model path
|
|
engine_num: 1 # Number of vLLM engines for judge model
|
|
tensor_parallel_size: 2 # TP size per engine for judge model
|
|
enable_thinking: false # Disable thinking/reasoning mode
|
|
max_prompt_tokens: 2048 # Max tokens for prompt
|
|
max_response_tokens: 128 # Max tokens for response
|
|
max_model_len: 2500 # Max model context length
|
|
synchronizer:
|
|
sync_style: dynamic_by_explorer # Sync triggered dynamically by explorer
|
|
sync_interval: 5 # Sync every N steps
|
|
sync_timeout: 3600 # Timeout for synchronization (seconds)
|
|
trainer:
|
|
save_interval: 100 # Save checkpoint every N steps
|
|
grad_clip: 1.0 # Gradient clipping value
|
|
use_dynamic_bsz: true # Use dynamic batch size
|
|
max_token_len_per_gpu: 16384 # Max token length per GPU
|
|
ulysses_sequence_parallel_size: 1 # Sequence parallel size for Ulysses
|