refactor(data_juicer_agent): update imports and add tests
This commit is contained in:
126
data_juicer_agent/.gitignore
vendored
126
data_juicer_agent/.gitignore
vendored
@@ -1,126 +0,0 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
.idea/
|
||||
|
||||
# macOS
|
||||
.DS_Store
|
||||
|
||||
# Used to save loggings and files
|
||||
*runs/
|
||||
agentscope.db
|
||||
tmp*.json
|
||||
.vscode/
|
||||
data_agent/
|
||||
outputs/
|
||||
tools/op_manager/cache_retrieve/
|
||||
tools/op_manager/vector_index_cache/
|
||||
@@ -4,31 +4,32 @@ A multi-agent data processing system built on [AgentScope](https://github.com/mo
|
||||
|
||||
## 📋 Table of Contents
|
||||
|
||||
- [📋 Table of Contents](#-table-of-contents)
|
||||
- [What Does This Agent Do?](#what-does-this-agent-do)
|
||||
- [Architecture](#architecture)
|
||||
- [Quick Start](#quick-start)
|
||||
- [System Requirements](#system-requirements)
|
||||
- [Installation](#installation)
|
||||
- [Configuration](#configuration)
|
||||
- [Usage](#usage)
|
||||
- [Agent Introduction](#agent-introduction)
|
||||
- [Data Processing Agent](#data-processing-agent)
|
||||
- [Code Development Agent (DJ Dev Agent)](#code-development-agent-dj-dev-agent)
|
||||
- [Advanced Features](#advanced-features)
|
||||
- [Operator Retrieval](#operator-retrieval)
|
||||
- [Retrieval Modes](#retrieval-modes)
|
||||
- [Usage](#usage-1)
|
||||
- [MCP Agent](#mcp-agent)
|
||||
- [MCP Server Types](#mcp-server-types)
|
||||
- [Configuration](#configuration-1)
|
||||
- [Usage Methods](#usage-methods)
|
||||
- [Feature Preview](#feature-preview)
|
||||
- [Data-Juicer Q\&A Agent (Demo Available)](#data-juicer-qa-agent-demo-available)
|
||||
- [Data Analysis and Visualization Agent (In Development)](#data-analysis-and-visualization-agent-in-development)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Common Issues](#common-issues)
|
||||
- [Optimization Recommendations](#optimization-recommendations)
|
||||
- [DataJuicer Agent](#datajuicer-agent)
|
||||
- [📋 Table of Contents](#-table-of-contents)
|
||||
- [What Does This Agent Do?](#what-does-this-agent-do)
|
||||
- [Architecture](#architecture)
|
||||
- [Quick Start](#quick-start)
|
||||
- [System Requirements](#system-requirements)
|
||||
- [Installation](#installation)
|
||||
- [Configuration](#configuration)
|
||||
- [Usage](#usage)
|
||||
- [Agent Introduction](#agent-introduction)
|
||||
- [Data Processing Agent](#data-processing-agent)
|
||||
- [Code Development Agent (DJ Dev Agent)](#code-development-agent-dj-dev-agent)
|
||||
- [Advanced Features](#advanced-features)
|
||||
- [Operator Retrieval](#operator-retrieval)
|
||||
- [Retrieval Modes](#retrieval-modes)
|
||||
- [Usage](#usage-1)
|
||||
- [MCP Agent](#mcp-agent)
|
||||
- [MCP Server Types](#mcp-server-types)
|
||||
- [Configuration](#configuration-1)
|
||||
- [Usage Methods](#usage-methods)
|
||||
- [Feature Preview](#feature-preview)
|
||||
- [Data-Juicer Q\&A Agent (Demo Available)](#data-juicer-qa-agent-demo-available)
|
||||
- [Data Analysis and Visualization Agent (In Development)](#data-analysis-and-visualization-agent-in-development)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [Common Issues](#common-issues)
|
||||
- [Optimization Recommendations](#optimization-recommendations)
|
||||
|
||||
## What Does This Agent Do?
|
||||
|
||||
@@ -68,7 +69,14 @@ Router Agent ──┐
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
uv pip install -e .
|
||||
# Recommended to use uv
|
||||
uv pip install -r requirements.txt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
@@ -4,30 +4,31 @@
|
||||
|
||||
## 📋 目录
|
||||
|
||||
- [📋 目录](#-目录)
|
||||
- [这个智能体做了什么?](#这个智能体做了什么)
|
||||
- [架构](#架构)
|
||||
- [快速开始](#快速开始)
|
||||
- [系统要求](#系统要求)
|
||||
- [安装](#安装)
|
||||
- [配置](#配置)
|
||||
- [使用](#使用)
|
||||
- [智能体介绍](#智能体介绍)
|
||||
- [数据处理智能体](#数据处理智能体)
|
||||
- [代码开发智能体](#代码开发智能体)
|
||||
- [高级功能](#高级功能)
|
||||
- [算子检索](#算子检索)
|
||||
- [检索模式](#检索模式)
|
||||
- [使用](#使用-1)
|
||||
- [MCP 智能体](#mcp-智能体)
|
||||
- [MCP 服务器类型](#mcp-服务器类型)
|
||||
- [配置](#配置-1)
|
||||
- [使用方法](#使用方法)
|
||||
- [功能预览](#功能预览)
|
||||
- [Data-Juicer 问答智能体 (演示可用)](#data-juicer-问答智能体-演示可用)
|
||||
- [数据分析与可视化智能体 (开发中)](#数据分析与可视化智能体-开发中)
|
||||
- [常见问题](#常见问题)
|
||||
- [优化建议](#优化建议)
|
||||
- [DataJuicer 智能体](#datajuicer-智能体)
|
||||
- [📋 目录](#-目录)
|
||||
- [这个智能体做了什么?](#这个智能体做了什么)
|
||||
- [架构](#架构)
|
||||
- [快速开始](#快速开始)
|
||||
- [系统要求](#系统要求)
|
||||
- [安装](#安装)
|
||||
- [配置](#配置)
|
||||
- [使用](#使用)
|
||||
- [智能体介绍](#智能体介绍)
|
||||
- [数据处理智能体](#数据处理智能体)
|
||||
- [代码开发智能体](#代码开发智能体)
|
||||
- [高级功能](#高级功能)
|
||||
- [算子检索](#算子检索)
|
||||
- [检索模式](#检索模式)
|
||||
- [使用](#使用-1)
|
||||
- [MCP 智能体](#mcp-智能体)
|
||||
- [MCP 服务器类型](#mcp-服务器类型)
|
||||
- [配置](#配置-1)
|
||||
- [使用方法](#使用方法)
|
||||
- [功能预览](#功能预览)
|
||||
- [Data-Juicer 问答智能体 (演示可用)](#data-juicer-问答智能体-演示可用)
|
||||
- [数据分析与可视化智能体 (开发中)](#数据分析与可视化智能体-开发中)
|
||||
- [常见问题](#常见问题)
|
||||
- [优化建议](#优化建议)
|
||||
|
||||
## 这个智能体做了什么?
|
||||
|
||||
@@ -67,7 +68,14 @@ Data-Juicer (DJ) 是一个一站式系统,面向大模型的文本及多模态
|
||||
### 安装
|
||||
|
||||
```bash
|
||||
uv pip install -e .
|
||||
# 推荐使用uv
|
||||
uv pip install -r requirements.txt
|
||||
```
|
||||
|
||||
或
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### 配置
|
||||
|
||||
@@ -7,11 +7,16 @@ from agentscope.model import DashScopeChatModel
|
||||
from agentscope.formatter import DashScopeChatFormatter
|
||||
from agentscope.memory import InMemoryMemory
|
||||
from agentscope.agent import UserAgent
|
||||
from agentscope.tool import Toolkit
|
||||
|
||||
from agent_factory import create_agent
|
||||
from prompts import DJ_SYS_PROMPT, DJ_DEV_SYS_PROMPT, ROUTER_SYS_PROMPT, MCP_SYS_PROMPT
|
||||
from tools import dj_toolkit, dj_dev_toolkit, mcp_tools, get_mcp_toolkit, agents2toolkit
|
||||
from .agent_factory import create_agent
|
||||
from .prompts import DJ_SYS_PROMPT, DJ_DEV_SYS_PROMPT, ROUTER_SYS_PROMPT, MCP_SYS_PROMPT
|
||||
from .tools import (
|
||||
dj_toolkit,
|
||||
dj_dev_toolkit,
|
||||
mcp_tools,
|
||||
get_mcp_toolkit,
|
||||
agents2toolkit,
|
||||
)
|
||||
|
||||
# Create shared configuration
|
||||
model = DashScopeChatModel(
|
||||
@@ -145,10 +150,14 @@ async def main(
|
||||
if __name__ == "__main__":
|
||||
# Example tasks
|
||||
# project_root = os.path.abspath(os.path.dirname(__file__))
|
||||
# task = f"数据存储在{project_root}/data/demo-dataset-images.jsonl,筛选掉样本中,文本字段长度小于5的样本,以及图片size小于100Kb的样本。并将输出结果保存到./outputs路径下。"
|
||||
# task = (
|
||||
# f"The data is stored in {project_root}/data/demo-dataset-images.jsonl. "
|
||||
# "Among the samples, the text field length is less than 5 "
|
||||
# "and the image size is less than 100Kb. "
|
||||
# "And save the output results to the ./outputs path."
|
||||
# )
|
||||
#
|
||||
# DJ Development example task:
|
||||
# task = "我想开发一个新的DataJuicer过滤算子,用于过滤掉没有人声的音频文件"
|
||||
# task = "I want to develop a new DataJuicer filter operator to filter out audio files without vocals"
|
||||
#
|
||||
# MCP Agent will be automatically selected for advanced processing tasks
|
||||
fire.Fire(main)
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
[project]
|
||||
name = "data-juicer-agent"
|
||||
version = "0.1.0"
|
||||
description = "A data processing agent with data juicer"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"agentscope>=1.0.5",
|
||||
"faiss-cpu>=1.12.0",
|
||||
"langchain-community",
|
||||
"py-data-juicer>=1.4.2",
|
||||
]
|
||||
5
data_juicer_agent/requirements.txt
Normal file
5
data_juicer_agent/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
agentscope>=1.0.5
|
||||
py-data-juicer>=1.4.2
|
||||
faiss-cpu>=1.12.0
|
||||
fire>=0.7.1
|
||||
langchain-community
|
||||
Reference in New Issue
Block a user