Optimize DataJuicer Agent doc & linter (#30)
This commit is contained in:
@@ -2,8 +2,8 @@
|
||||
"""
|
||||
DataJuicer Development Tools
|
||||
|
||||
Tools for developing DataJuicer operators, including access to basic documentation
|
||||
and example code for different operator types.
|
||||
Tools for developing DataJuicer operators, including access to basic
|
||||
documentation and example code for different operator types.
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -23,7 +23,8 @@ BASIC_LIST_RELATIVE = [
|
||||
def get_basic_files() -> ToolResponse:
|
||||
"""Get basic DataJuicer development files content.
|
||||
|
||||
Returns the content of essential files needed for DJ operator development:
|
||||
Returns the content of essential files needed for DJ operator
|
||||
development:
|
||||
- base_op.py: Base operator class
|
||||
- DeveloperGuide.md: English developer guide
|
||||
- DeveloperGuide_ZH.md: Chinese developer guide
|
||||
@@ -31,19 +32,23 @@ def get_basic_files() -> ToolResponse:
|
||||
Returns:
|
||||
ToolResponse: Combined content of all basic development files
|
||||
"""
|
||||
|
||||
global DATA_JUICER_PATH, BASIC_LIST_RELATIVE
|
||||
if DATA_JUICER_PATH is None:
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text="DATA_JUICER_PATH is not configured. Please ask the user to provide the DATA_JUICER_PATH",
|
||||
)
|
||||
]
|
||||
text=(
|
||||
"DATA_JUICER_PATH is not configured. Please ask the "
|
||||
"user to provide the DATA_JUICER_PATH"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
try:
|
||||
combined_content = "# DataJuicer Operator Development Basic Files\n\n"
|
||||
comb_content = "# DataJuicer Operator Development Basic Files\n\n"
|
||||
|
||||
for relative_path in BASIC_LIST_RELATIVE:
|
||||
file_path = os.path.join(DATA_JUICER_PATH, relative_path)
|
||||
@@ -52,20 +57,21 @@ def get_basic_files() -> ToolResponse:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
filename = os.path.basename(file_path)
|
||||
combined_content += f"## {filename}\n\n"
|
||||
combined_content += (
|
||||
f"```{'python' if filename.endswith('.py') else 'markdown'}\n"
|
||||
)
|
||||
combined_content += content
|
||||
combined_content += "\n```\n\n"
|
||||
file_n = os.path.basename(file_path)
|
||||
comb_content += f"## {file_n}\n\n```"
|
||||
flag = "python" if file_n.endswith(".py") else "markdown"
|
||||
comb_content += f"{flag}\n"
|
||||
comb_content += content
|
||||
comb_content += "\n```\n\n"
|
||||
except Exception as e:
|
||||
combined_content += (
|
||||
comb_content += (
|
||||
f"## {os.path.basename(file_path)} (Read Failed)\n"
|
||||
)
|
||||
combined_content += f"Error: {str(e)}\n\n"
|
||||
comb_content += f"Error: {str(e)}\n\n"
|
||||
|
||||
return ToolResponse(content=[TextBlock(type="text", text=combined_content)])
|
||||
return ToolResponse(
|
||||
content=[TextBlock(type="text", text=comb_content)],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return ToolResponse(
|
||||
@@ -73,32 +79,41 @@ def get_basic_files() -> ToolResponse:
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"Error occurred while getting basic files: {str(e)}",
|
||||
)
|
||||
]
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
async def get_operator_example(
|
||||
requirement_description: str, limit: int = 2
|
||||
requirement_description: str,
|
||||
limit: int = 2,
|
||||
) -> ToolResponse:
|
||||
"""Get example operators based on requirement description using dynamic search.
|
||||
"""Get example operators based on requirement description using
|
||||
dynamic search.
|
||||
|
||||
Args:
|
||||
requirement_description (str): Natural language description of the operator requirement
|
||||
limit (int): Maximum number of example operators to return (default: 2)
|
||||
requirement_description (str): Natural language description of
|
||||
the operator requirement
|
||||
limit (int): Maximum number of example operators to return
|
||||
(default: 2)
|
||||
|
||||
Returns:
|
||||
ToolResponse: Example operator code and test files based on the requirement
|
||||
ToolResponse: Example operator code and test files based on
|
||||
the requirement
|
||||
"""
|
||||
|
||||
global DATA_JUICER_PATH
|
||||
if DATA_JUICER_PATH is None:
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text="DATA_JUICER_PATH is not configured. Please ask the user to provide the DATA_JUICER_PATH",
|
||||
)
|
||||
]
|
||||
text=(
|
||||
"DATA_JUICER_PATH is not configured. Please ask the "
|
||||
"user to provide the DATA_JUICER_PATH"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -108,49 +123,56 @@ async def get_operator_example(
|
||||
# Query relevant operators using the requirement description
|
||||
# Use retrieval mode from environment variable if set
|
||||
retrieval_mode = os.environ.get("RETRIEVAL_MODE", "auto")
|
||||
tool_names = await retrieve_ops(requirement_description, limit=limit, mode=retrieval_mode)
|
||||
tool_names = await retrieve_ops(
|
||||
requirement_description,
|
||||
limit=limit,
|
||||
mode=retrieval_mode,
|
||||
)
|
||||
|
||||
if not tool_names:
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"No relevant operators found for requirement: {requirement_description}\n"
|
||||
f"Please try with more specific keywords or check if DATA_JUICER_PATH is properly configured.",
|
||||
)
|
||||
]
|
||||
text=(
|
||||
"No relevant operators found for requirement: "
|
||||
f"{requirement_description}\n"
|
||||
"Please try with more specific keywords or "
|
||||
"check if DATA_JUICER_PATH is properly "
|
||||
"configured."
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
combined_content = (
|
||||
comb_content = (
|
||||
f"# Dynamic Operator Examples for: {requirement_description}\n\n"
|
||||
)
|
||||
combined_content += (
|
||||
comb_content += (
|
||||
f"Found {len(tool_names)} relevant operators (limit: {limit})\n\n"
|
||||
)
|
||||
|
||||
# Process each found operator
|
||||
for i, tool_name in enumerate(tool_names[:limit]):
|
||||
combined_content += f"## {i+1}. {tool_name}\n\n"
|
||||
comb_content += f"## {i+1}. {tool_name}\n\n"
|
||||
|
||||
op_type = tool_name.split("_")[-1]
|
||||
|
||||
operator_path = f"data_juicer/ops/{op_type}/{tool_name}.py"
|
||||
|
||||
# Try to find operator source file
|
||||
|
||||
full_path = os.path.join(DATA_JUICER_PATH, operator_path)
|
||||
if os.path.exists(full_path):
|
||||
with open(full_path, "r", encoding="utf-8") as f:
|
||||
operator_code = f.read()
|
||||
|
||||
combined_content += f"### Source Code\n"
|
||||
combined_content += "```python\n"
|
||||
combined_content += operator_code
|
||||
combined_content += "\n```\n\n"
|
||||
comb_content += "### Source Code\n"
|
||||
comb_content += "```python\n"
|
||||
comb_content += operator_code
|
||||
comb_content += "\n```\n\n"
|
||||
else:
|
||||
combined_content += (
|
||||
f"**Note:** Source code file not found for `{tool_name}`.\n\n"
|
||||
)
|
||||
comb_content += "**Note:** Source code file not found for"
|
||||
comb_content += f" `{tool_name}`.\n\n"
|
||||
|
||||
test_path = f"tests/ops/{op_type}/test_{tool_name}.py"
|
||||
|
||||
@@ -159,36 +181,43 @@ async def get_operator_example(
|
||||
with open(full_test_path, "r", encoding="utf-8") as f:
|
||||
test_code = f.read()
|
||||
|
||||
combined_content += f"### Test Code\n"
|
||||
combined_content += f"**File Path:** `{test_path}`\n\n"
|
||||
combined_content += "```python\n"
|
||||
combined_content += test_code
|
||||
combined_content += "\n```\n\n"
|
||||
comb_content += "### Test Code\n"
|
||||
comb_content += f"**File Path:** `{test_path}`\n\n"
|
||||
comb_content += "```python\n"
|
||||
comb_content += test_code
|
||||
comb_content += "\n```\n\n"
|
||||
|
||||
else:
|
||||
combined_content += (
|
||||
comb_content += (
|
||||
f"**Note:** Test file not found for `{tool_name}`.\n\n"
|
||||
)
|
||||
|
||||
combined_content += "---\n\n"
|
||||
comb_content += "---\n\n"
|
||||
|
||||
return ToolResponse(content=[TextBlock(type="text", text=combined_content)])
|
||||
return ToolResponse(
|
||||
content=[TextBlock(type="text", text=comb_content)],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return ToolResponse(
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"Error occurred while getting operator examples: {str(e)}\n"
|
||||
f"Please check the requirement description and try again.",
|
||||
)
|
||||
]
|
||||
text=(
|
||||
"Error occurred while getting operator examples: "
|
||||
f"{str(e)}\n"
|
||||
"Please check the requirement description and try "
|
||||
"again."
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def configure_data_juicer_path(data_juicer_path: str) -> ToolResponse:
|
||||
"""Configure DataJuicer path.
|
||||
If the user provides the data_juicer_path, please use this method to configure it.
|
||||
If the user provides the data_juicer_path, please use this method to
|
||||
configure it.
|
||||
|
||||
Args:
|
||||
data_juicer_path (str): Path to DataJuicer installation
|
||||
@@ -196,8 +225,9 @@ def configure_data_juicer_path(data_juicer_path: str) -> ToolResponse:
|
||||
Returns:
|
||||
ToolResponse: Configuration result
|
||||
"""
|
||||
|
||||
global DATA_JUICER_PATH
|
||||
|
||||
|
||||
data_juicer_path = os.path.expanduser(data_juicer_path)
|
||||
|
||||
try:
|
||||
@@ -206,9 +236,12 @@ def configure_data_juicer_path(data_juicer_path: str) -> ToolResponse:
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"Specified DataJuicer path does not exist: {data_juicer_path}",
|
||||
)
|
||||
]
|
||||
text=(
|
||||
"Specified DataJuicer path does not exist: "
|
||||
f"{data_juicer_path}"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
# Update global DATA_JUICER_PATH
|
||||
@@ -218,9 +251,12 @@ def configure_data_juicer_path(data_juicer_path: str) -> ToolResponse:
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"DataJuicer path has been updated to: {DATA_JUICER_PATH}",
|
||||
)
|
||||
]
|
||||
text=(
|
||||
"DataJuicer path has been updated to: ",
|
||||
f"{DATA_JUICER_PATH}",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -228,7 +264,10 @@ def configure_data_juicer_path(data_juicer_path: str) -> ToolResponse:
|
||||
content=[
|
||||
TextBlock(
|
||||
type="text",
|
||||
text=f"Error occurred while configuring DataJuicer path: {str(e)}",
|
||||
)
|
||||
]
|
||||
text=(
|
||||
"Error occurred while configuring DataJuicer path: "
|
||||
f"{str(e)}"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user