release datajuicer agent
This commit is contained in:
235
data_juicer_agent/tools/dj_dev_tools.py
Normal file
235
data_juicer_agent/tools/dj_dev_tools.py
Normal file
@@ -0,0 +1,235 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DataJuicer Development Tools
|
||||
|
||||
Tools for developing DataJuicer operators, including access to basic documentation
|
||||
and example code for different operator types.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from agentscope.message import TextBlock
|
||||
from agentscope.tool import ToolResponse
|
||||
|
||||
# DataJuicer home path - should be configured based on your environment
|
||||
DATA_JUICER_PATH = os.getenv("DATA_JUICER_PATH", None)
|
||||
|
||||
BASIC_LIST_RELATIVE = [
|
||||
"data_juicer/ops/base_op.py",
|
||||
"docs/DeveloperGuide.md",
|
||||
"docs/DeveloperGuide_ZH.md",
|
||||
]
|
||||
|
||||
|
||||
def get_basic_files() -> ToolResponse:
|
||||
"""Get basic DataJuicer development files content.
|
||||
|
||||
Returns the content of essential files needed for DJ operator development:
|
||||
- base_op.py: Base operator class
|
||||
- DeveloperGuide.md: English developer guide
|
||||
- DeveloperGuide_ZH.md: Chinese developer guide
|
||||
|
||||
Returns:
|
||||
ToolResponse: Combined content of all basic development files
|
||||
"""
|
||||
global DATA_JUICER_PATH, BASIC_LIST_RELATIVE
|
||||
if DATA_JUICER_PATH is None:
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text="DATA_JUICER_PATH is not configured. Please ask the user to provide the DATA_JUICER_PATH",
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
try:
|
||||
combined_content = "# DataJuicer Operator Development Basic Files\n\n"
|
||||
|
||||
for relative_path in BASIC_LIST_RELATIVE:
|
||||
file_path = os.path.join(DATA_JUICER_PATH, relative_path)
|
||||
if os.path.exists(file_path):
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
filename = os.path.basename(file_path)
|
||||
combined_content += f"## {filename}\n\n"
|
||||
combined_content += (
|
||||
f"```{'python' if filename.endswith('.py') else 'markdown'}\n"
|
||||
)
|
||||
combined_content += content
|
||||
combined_content += "\n```\n\n"
|
||||
except Exception as e:
|
||||
combined_content += (
|
||||
f"## {os.path.basename(file_path)} (Read Failed)\n"
|
||||
)
|
||||
combined_content += f"Error: {str(e)}\n\n"
|
||||
|
||||
return ToolResponse(content=[TextBlock(type="text", text=combined_content)])
|
||||
|
||||
except Exception as e:
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"Error occurred while getting basic files: {str(e)}",
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
async def get_operator_example(
|
||||
requirement_description: str, limit: int = 2
|
||||
) -> ToolResponse:
|
||||
"""Get example operators based on requirement description using dynamic search.
|
||||
|
||||
Args:
|
||||
requirement_description (str): Natural language description of the operator requirement
|
||||
limit (int): Maximum number of example operators to return (default: 2)
|
||||
|
||||
Returns:
|
||||
ToolResponse: Example operator code and test files based on the requirement
|
||||
"""
|
||||
global DATA_JUICER_PATH
|
||||
if DATA_JUICER_PATH is None:
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text="DATA_JUICER_PATH is not configured. Please ask the user to provide the DATA_JUICER_PATH",
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
try:
|
||||
# Import retrieve_ops from op_manager
|
||||
from .op_manager.op_retrieval import retrieve_ops
|
||||
|
||||
# Query relevant operators using the requirement description
|
||||
# Use retrieval mode from environment variable if set
|
||||
retrieval_mode = os.environ.get("RETRIEVAL_MODE", "auto")
|
||||
tool_names = await retrieve_ops(requirement_description, limit=limit, mode=retrieval_mode)
|
||||
|
||||
if not tool_names:
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"No relevant operators found for requirement: {requirement_description}\n"
|
||||
f"Please try with more specific keywords or check if DATA_JUICER_PATH is properly configured.",
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
combined_content = (
|
||||
f"# Dynamic Operator Examples for: {requirement_description}\n\n"
|
||||
)
|
||||
combined_content += (
|
||||
f"Found {len(tool_names)} relevant operators (limit: {limit})\n\n"
|
||||
)
|
||||
|
||||
# Process each found operator
|
||||
for i, tool_name in enumerate(tool_names[:limit]):
|
||||
combined_content += f"## {i+1}. {tool_name}\n\n"
|
||||
|
||||
op_type = tool_name.split("_")[-1]
|
||||
|
||||
operator_path = f"data_juicer/ops/{op_type}/{tool_name}.py"
|
||||
|
||||
# Try to find operator source file
|
||||
|
||||
full_path = os.path.join(DATA_JUICER_PATH, operator_path)
|
||||
if os.path.exists(full_path):
|
||||
with open(full_path, "r", encoding="utf-8") as f:
|
||||
operator_code = f.read()
|
||||
|
||||
combined_content += f"### Source Code\n"
|
||||
combined_content += "```python\n"
|
||||
combined_content += operator_code
|
||||
combined_content += "\n```\n\n"
|
||||
else:
|
||||
combined_content += (
|
||||
f"**Note:** Source code file not found for `{tool_name}`.\n\n"
|
||||
)
|
||||
|
||||
test_path = f"tests/ops/{op_type}/test_{tool_name}.py"
|
||||
|
||||
full_test_path = os.path.join(DATA_JUICER_PATH, test_path)
|
||||
if os.path.exists(full_test_path):
|
||||
with open(full_test_path, "r", encoding="utf-8") as f:
|
||||
test_code = f.read()
|
||||
|
||||
combined_content += f"### Test Code\n"
|
||||
combined_content += f"**File Path:** `{test_path}`\n\n"
|
||||
combined_content += "```python\n"
|
||||
combined_content += test_code
|
||||
combined_content += "\n```\n\n"
|
||||
|
||||
else:
|
||||
combined_content += (
|
||||
f"**Note:** Test file not found for `{tool_name}`.\n\n"
|
||||
)
|
||||
|
||||
combined_content += "---\n\n"
|
||||
|
||||
return ToolResponse(content=[TextBlock(type="text", text=combined_content)])
|
||||
|
||||
except Exception as e:
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"Error occurred while getting operator examples: {str(e)}\n"
|
||||
f"Please check the requirement description and try again.",
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def configure_data_juicer_path(data_juicer_path: str) -> ToolResponse:
|
||||
"""Configure DataJuicer path.
|
||||
If the user provides the data_juicer_path, please use this method to configure it.
|
||||
|
||||
Args:
|
||||
data_juicer_path (str): Path to DataJuicer installation
|
||||
|
||||
Returns:
|
||||
ToolResponse: Configuration result
|
||||
"""
|
||||
global DATA_JUICER_PATH
|
||||
|
||||
data_juicer_path = os.path.expanduser(data_juicer_path)
|
||||
|
||||
try:
|
||||
if not os.path.exists(data_juicer_path):
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"Specified DataJuicer path does not exist: {data_juicer_path}",
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Update global DATA_JUICER_PATH
|
||||
DATA_JUICER_PATH = data_juicer_path
|
||||
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"DataJuicer path has been updated to: {DATA_JUICER_PATH}",
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"Error occurred while configuring DataJuicer path: {str(e)}",
|
||||
)
|
||||
]
|
||||
)
|
||||
Reference in New Issue
Block a user