init
This commit is contained in:
325
deep_research/agent_deep_research/utils.py
Normal file
325
deep_research/agent_deep_research/utils.py
Normal file
@@ -0,0 +1,325 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""The utilities for deep research agent"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Sequence, Type, Union
|
||||
|
||||
from agentscope.tool import Toolkit, ToolResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
TOOL_RESULTS_MAX_WORDS = 5000
|
||||
|
||||
|
||||
def get_prompt_from_file(
|
||||
file_path: str,
|
||||
return_json: bool,
|
||||
) -> Union[str, dict]:
|
||||
"""Get prompt from file"""
|
||||
with open(os.path.join(file_path), "r", encoding="utf-8") as f:
|
||||
if return_json:
|
||||
prompt = json.load(f)
|
||||
else:
|
||||
prompt = f.read()
|
||||
return prompt
|
||||
|
||||
|
||||
def truncate_by_words(sentence: str) -> str:
|
||||
"""Truncate too long sentences by words number"""
|
||||
words = re.findall(
|
||||
r"\w+|[^\w\s]",
|
||||
sentence,
|
||||
re.UNICODE,
|
||||
)
|
||||
|
||||
word_count = 0
|
||||
result = []
|
||||
for word in words:
|
||||
if re.match(r"\w+", word):
|
||||
word_count += 1
|
||||
if word_count > TOOL_RESULTS_MAX_WORDS:
|
||||
break
|
||||
result.append(word)
|
||||
|
||||
truncated_sentence = ""
|
||||
for i, word in enumerate(result):
|
||||
if i == 0:
|
||||
truncated_sentence += word
|
||||
elif re.match(r"\w+", word):
|
||||
truncated_sentence += " " + word
|
||||
else:
|
||||
truncated_sentence += word
|
||||
return truncated_sentence
|
||||
|
||||
|
||||
def truncate_search_result(
|
||||
res: list,
|
||||
search_func: str = "tavily-search",
|
||||
extract_function: str = "tavily-extract",
|
||||
) -> list:
|
||||
"""Truncate search result in deep research agent"""
|
||||
if search_func != "tavily-search" or extract_function != "tavily-extract":
|
||||
raise NotImplementedError(
|
||||
"Specific implementation of truncation should be provided.",
|
||||
)
|
||||
|
||||
for i, val in enumerate(res):
|
||||
res[i]["text"] = truncate_by_words(val["text"])
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def generate_structure_output(**kwargs: Any) -> ToolResponse:
|
||||
"""Generate a structured output tool response.
|
||||
|
||||
This function is designed to be used as a tool function for generating
|
||||
structured outputs. It takes arbitrary keyword arguments and wraps them
|
||||
in a ToolResponse with metadata.
|
||||
|
||||
Args:
|
||||
**kwargs: Arbitrary keyword arguments that should match the format
|
||||
of the expected structured output specification.
|
||||
|
||||
Returns:
|
||||
ToolResponse: A tool response object with empty content and the
|
||||
provided kwargs as metadata.
|
||||
|
||||
Note:
|
||||
The input parameters should be in the same format as the specification
|
||||
and include as much detail as requested by the calling context.
|
||||
"""
|
||||
return ToolResponse(content=[], metadata=kwargs)
|
||||
|
||||
|
||||
def get_dynamic_tool_call_json(data_model_type: Type[BaseModel]) -> list[dict]:
|
||||
"""Generate JSON schema for dynamic tool calling with a given data model.
|
||||
|
||||
Creates a temporary toolkit, registers the structure output function,
|
||||
and configures it with the specified data model to generate appropriate
|
||||
JSON schemas for tool calling.
|
||||
|
||||
Args:
|
||||
data_model_type: A Pydantic BaseModel class that defines the expected
|
||||
structure of the tool output.
|
||||
|
||||
Returns:
|
||||
A list of dictionary that contains the JSON schemas for
|
||||
the configured tool, suitable for use in API calls that
|
||||
support structured outputs.
|
||||
|
||||
Example:
|
||||
class MyModel(BaseModel):
|
||||
name: str
|
||||
value: int
|
||||
|
||||
schema = get_dynamic_tool_call_json(MyModel)
|
||||
"""
|
||||
tmp_toolkit = Toolkit()
|
||||
tmp_toolkit.register_tool_function(generate_structure_output)
|
||||
tmp_toolkit.set_extended_model(
|
||||
"generate_structure_output",
|
||||
data_model_type,
|
||||
)
|
||||
return tmp_toolkit.get_json_schemas()
|
||||
|
||||
|
||||
def get_structure_output(blocks: list | Sequence) -> dict:
|
||||
"""Extract structured output from a sequence of blocks.
|
||||
|
||||
Processes a list or sequence of blocks to extract tool use outputs
|
||||
and combine them into a single dictionary. This is typically used
|
||||
to parse responses from language models that include tool calls.
|
||||
|
||||
Args:
|
||||
blocks: A list or sequence of blocks that may contain tool use
|
||||
information. Each block should be a dictionary with 'type'
|
||||
and 'input' keys for tool use blocks.
|
||||
|
||||
Returns:
|
||||
A dictionary containing the combined input data from all tool
|
||||
use blocks found in the input sequence.
|
||||
|
||||
Example:
|
||||
blocks = [
|
||||
{"type": "tool_use", "input": {"name": "test"}},
|
||||
{"type": "text", "content": "Some text"},
|
||||
{"type": "tool_use", "input": {"value": 42}}
|
||||
]
|
||||
result = PromptBase.get_structure_output(blocks)
|
||||
# result: {"name": "test", "value": 42}
|
||||
"""
|
||||
|
||||
dict_output = {}
|
||||
for block in blocks:
|
||||
if isinstance(block, dict) and block.get("type") == "tool_use":
|
||||
dict_output.update(block.get("input", {}))
|
||||
return dict_output
|
||||
|
||||
|
||||
def load_prompt_dict() -> dict:
|
||||
"""Load prompt into dict"""
|
||||
prompt_dict = {}
|
||||
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
prompt_dict["add_note"] = get_prompt_from_file(
|
||||
file_path=os.path.join(
|
||||
cur_dir,
|
||||
"built_in_prompt/prompt_worker_additional_sys_prompt.md",
|
||||
),
|
||||
return_json=False,
|
||||
)
|
||||
|
||||
prompt_dict["tool_use_rule"] = get_prompt_from_file(
|
||||
file_path=os.path.join(
|
||||
cur_dir,
|
||||
"built_in_prompt/prompt_tool_usage_rules.md",
|
||||
),
|
||||
return_json=False,
|
||||
)
|
||||
|
||||
prompt_dict["decompose_sys_prompt"] = get_prompt_from_file(
|
||||
file_path=os.path.join(
|
||||
cur_dir,
|
||||
"built_in_prompt/prompt_decompose_subtask.md",
|
||||
),
|
||||
return_json=False,
|
||||
)
|
||||
|
||||
prompt_dict["expansion_sys_prompt"] = get_prompt_from_file(
|
||||
file_path=os.path.join(
|
||||
cur_dir,
|
||||
"built_in_prompt/prompt_deeper_expansion.md",
|
||||
),
|
||||
return_json=False,
|
||||
)
|
||||
|
||||
prompt_dict["summarize_sys_prompt"] = get_prompt_from_file(
|
||||
file_path=os.path.join(
|
||||
cur_dir,
|
||||
"built_in_prompt/prompt_inprocess_report.md",
|
||||
),
|
||||
return_json=False,
|
||||
)
|
||||
|
||||
prompt_dict["reporting_sys_prompt"] = get_prompt_from_file(
|
||||
file_path=os.path.join(
|
||||
cur_dir,
|
||||
"built_in_prompt/prompt_deepresearch_summary_report.md",
|
||||
),
|
||||
return_json=False,
|
||||
)
|
||||
|
||||
prompt_dict["reflect_sys_prompt"] = get_prompt_from_file(
|
||||
file_path=os.path.join(
|
||||
cur_dir,
|
||||
"built_in_prompt/prompt_reflect_failure.md",
|
||||
),
|
||||
return_json=False,
|
||||
)
|
||||
|
||||
prompt_dict["reasoning_prompt"] = (
|
||||
"## Current Subtask:\n{objective}\n"
|
||||
"## Working Plan:\n{meta_planner_agent}\n"
|
||||
"{knowledge_gap}\n"
|
||||
"## Research Depth:\n{depth}"
|
||||
)
|
||||
|
||||
prompt_dict["previous_plan_inst"] = (
|
||||
"## Previous Plan:\n{previous_plan}\n"
|
||||
"## Current Subtask:\n{objective}\n"
|
||||
)
|
||||
|
||||
prompt_dict["max_depth_hint"] = (
|
||||
"The search depth has reached the maximum limit. So the "
|
||||
"current subtask can not be further decomposed and "
|
||||
"expanded anymore. I need to find another way to get it "
|
||||
"done no matter what."
|
||||
)
|
||||
|
||||
prompt_dict["expansion_inst"] = (
|
||||
"Review the web search results and identify whether "
|
||||
"there is any information that can potentially help address "
|
||||
"checklist items or fulfill knowledge gaps of the task, "
|
||||
"but whose content is limited or only briefly mentioned.\n"
|
||||
"**Task Description:**\n{objective}\n"
|
||||
"**Checklist:**\n{checklist}\n"
|
||||
"**Knowledge Gaps:**\n{knowledge_gaps}\n"
|
||||
"**Search Results:**\n{search_results}\n"
|
||||
"**Output:**\n"
|
||||
)
|
||||
|
||||
prompt_dict["follow_up_judge_sys_prompt"] = (
|
||||
"To provide sufficient external information for the user's "
|
||||
"query, you have conducted a web search to obtain additional "
|
||||
"data. However, you found that some of the information, while "
|
||||
"important, was insufficient. Consequently, you extracted the "
|
||||
"entire content from one of the URLs to gather more "
|
||||
"comprehensive information. Now, you must rigorously and "
|
||||
"carefully assess whether, after both the web search and "
|
||||
"extraction process, the information content is adequate to "
|
||||
"address the given task. Be aware that any arbitrary decisions "
|
||||
"may result in unnecessary and unacceptable time costs.\n"
|
||||
)
|
||||
|
||||
prompt_dict[
|
||||
"retry_hint"
|
||||
] = "Something went wrong when {state}. I need to retry."
|
||||
|
||||
prompt_dict["need_deeper_hint"] = (
|
||||
"The information is insufficient and I need to make deeper "
|
||||
"research to fill the knowledge gap."
|
||||
)
|
||||
|
||||
prompt_dict[
|
||||
"sufficient_hint"
|
||||
] = "The information after web search and extraction is sufficient enough!"
|
||||
|
||||
prompt_dict["no_result_hint"] = (
|
||||
"I mistakenly called the `summarize_intermediate_results` tool as "
|
||||
"there exists no milestone result to summarize now."
|
||||
)
|
||||
|
||||
prompt_dict["summarize_hint"] = (
|
||||
"Based on your work history above, examine which step in the "
|
||||
"following working meta_planner_agent has been completed. Mark the completed "
|
||||
"step with [DONE] at the end of its line (e.g., k. step k [DONE]) "
|
||||
"and leave the uncompleted steps unchanged. You MUST return only "
|
||||
"the updated meta_planner_agent, preserving exactly the same format as the "
|
||||
"original meta_planner_agent. Do not include any explanations, reasoning, "
|
||||
"or section headers such as '## Working Plan:', just output the"
|
||||
"updated meta_planner_agent itself."
|
||||
"\n\n## Working Plan:\n{meta_planner_agent}"
|
||||
)
|
||||
|
||||
prompt_dict["summarize_inst"] = (
|
||||
"**Task Description:**\n{objective}\n"
|
||||
"**Checklist:**\n{knowledge_gaps}\n"
|
||||
"**Knowledge Gaps:**\n{working_plan}\n"
|
||||
"**Search Results:**\n{tool_result}"
|
||||
)
|
||||
|
||||
prompt_dict["update_report_hint"] = (
|
||||
"Due to the overwhelming quantity of information, I have replaced the "
|
||||
"original bulk search results from the research phase with the "
|
||||
"following report that consolidates and summarizes the essential "
|
||||
"findings:\n {intermediate_report}\n\n"
|
||||
"Such report has been saved to the {report_path}. "
|
||||
"I will now **proceed to the next item** in the working meta_planner_agent."
|
||||
)
|
||||
|
||||
prompt_dict["save_report_hint"] = (
|
||||
"The milestone results of the current item in working meta_planner_agent "
|
||||
"are summarized into the following report:\n{intermediate_report}"
|
||||
)
|
||||
|
||||
prompt_dict["reflect_instruction"] = (
|
||||
"## Work History:\n{conversation_history}\n"
|
||||
"## Working Plan:\n{meta_planner_agent}\n"
|
||||
)
|
||||
|
||||
prompt_dict["subtask_complete_hint"] = (
|
||||
"Subtask ‘{cur_obj}’ is completed. Now the current subtask "
|
||||
"fallbacks to '{next_obj}'"
|
||||
)
|
||||
|
||||
return prompt_dict
|
||||
Reference in New Issue
Block a user