init
This commit is contained in:
40
functionality/rag/README.md
Normal file
40
functionality/rag/README.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# RAG in AgentScope
|
||||
|
||||
This example includes three scripts to demonstrate how to use Retrieval-Augmented Generation (RAG) in AgentScope:
|
||||
|
||||
- the basic usage of RAG module in AgentScope in ``basic_usage.py``,
|
||||
- a simple agentic use case of RAG in ``agentic_usage.py``, and
|
||||
- integrate RAG into ``ReActAgent`` class by retrieving input message(s) at the beginning of each reply in ``react_agent_integration.py``.
|
||||
- build multimodal RAG in ``multimodal_rag.py``.
|
||||
|
||||
> The agentic usage and static integration has their own advantages and limitations.
|
||||
> - The agentic usage requires more powerful LLMs to manage the retrieval process, but it's more flexible and the agent can adjust the retrieval strategy dynamically
|
||||
> - The static integration is more straightforward and easier to implement, but it's less flexible and the input message maybe not specific enough, leading to less relevant retrieval results.
|
||||
|
||||
> Note: The example is built with DashScope chat model. If you want to change the model in this example, don't forget
|
||||
> to change the formatter at the same time! The corresponding relationship between built-in models and formatters are
|
||||
> list in [our tutorial](https://doc.agentscope.io/tutorial/task_prompt.html#id1)
|
||||
|
||||
## Quick Start
|
||||
|
||||
Install the latest agentscope library from PyPI or source, then run the following command to run the example:
|
||||
|
||||
- the basic usage:
|
||||
```bash
|
||||
python basic_usage.py
|
||||
```
|
||||
|
||||
- the agentic usage:
|
||||
```bash
|
||||
python agentic_usage.py
|
||||
```
|
||||
|
||||
- the static integration:
|
||||
```bash
|
||||
python react_agent_integration.py
|
||||
```
|
||||
|
||||
- the multimodal RAG:
|
||||
```bash
|
||||
python multimodal_rag.py
|
||||
```
|
||||
101
functionality/rag/agentic_usage.py
Normal file
101
functionality/rag/agentic_usage.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""The agentic usage example for RAG in AgentScope, where the agent is
|
||||
equipped with RAG tools to answer questions based on a knowledge base.
|
||||
|
||||
The example is more challenging for the agent, requiring the agent to
|
||||
adjust the retrieval parameters to get relevant results.
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from agentscope.agent import ReActAgent, UserAgent
|
||||
from agentscope.embedding import DashScopeTextEmbedding
|
||||
from agentscope.formatter import DashScopeChatFormatter
|
||||
from agentscope.message import Msg
|
||||
from agentscope.model import DashScopeChatModel
|
||||
from agentscope.rag import QdrantStore, SimpleKnowledge, TextReader
|
||||
from agentscope.tool import Toolkit
|
||||
|
||||
# Create a knowledge base instance
|
||||
knowledge = SimpleKnowledge(
|
||||
embedding_store=QdrantStore(
|
||||
location=":memory:",
|
||||
collection_name="test_collection",
|
||||
dimensions=1024, # The dimension of the embedding vectors
|
||||
),
|
||||
embedding_model=DashScopeTextEmbedding(
|
||||
api_key=os.environ["DASHSCOPE_API_KEY"],
|
||||
model_name="text-embedding-v4",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""The main entry of the agent usage example for RAG in AgentScope."""
|
||||
|
||||
# Store some things into the knowledge base for demonstration
|
||||
# In practice, the VDB store would be pre-filled with relevant data
|
||||
reader = TextReader(chunk_size=1024, split_by="sentence")
|
||||
documents = await reader(
|
||||
text=(
|
||||
# Fake personal profile for demonstration
|
||||
"I'm John Doe, 28 years old. My best friend is James "
|
||||
"Smith. I live in San Francisco. I work at OpenAI as a "
|
||||
"software engineer. I love hiking and photography. "
|
||||
"My father is Michael Doe, a doctor. I'm very proud of him. "
|
||||
"My mother is Sarah Doe, a teacher. She is very kind and "
|
||||
"always helps me with my studies.\n"
|
||||
"I'm now a PhD student at Stanford University, majoring in "
|
||||
"Computer Science. My advisor is Prof. Jane Williams, who is "
|
||||
"a leading expert in artificial intelligence. I have published "
|
||||
"several papers in top conferences, such as NeurIPS and ICML. "
|
||||
),
|
||||
)
|
||||
await knowledge.add_documents(documents)
|
||||
|
||||
# Create a toolkit and register the RAG tool function
|
||||
toolkit = Toolkit()
|
||||
toolkit.register_tool_function(
|
||||
knowledge.retrieve_knowledge,
|
||||
func_description=( # Provide a clear description for the tool
|
||||
"Retrieve relevant documents from the knowledge base, which is "
|
||||
"relevant to John Doe's profile. Note the `query` parameter is "
|
||||
"very important for the retrieval quality, and you can try many "
|
||||
"different queries to get the best results. Adjust the `limit` "
|
||||
"and `score_threshold` parameters to get more or fewer results."
|
||||
),
|
||||
)
|
||||
|
||||
# Create an agent and a user
|
||||
agent = ReActAgent(
|
||||
name="Friday",
|
||||
sys_prompt=(
|
||||
"You're a helpful assistant named Friday. "
|
||||
"You're equipped with a 'retrieve_knowledge' tool to help you "
|
||||
"know about the user named John Doe. "
|
||||
"NOTE to adjust the `score_threshold` parameters when you cannot "
|
||||
"get relevant results. "
|
||||
),
|
||||
toolkit=toolkit,
|
||||
model=DashScopeChatModel(
|
||||
api_key=os.environ["DASHSCOPE_API_KEY"],
|
||||
model_name="qwen3-max-preview",
|
||||
),
|
||||
formatter=DashScopeChatFormatter(),
|
||||
)
|
||||
user = UserAgent(name="User")
|
||||
|
||||
# A simple conversation loop beginning with a preset question
|
||||
msg = Msg(
|
||||
"user",
|
||||
"I'm John Doe. Do you know my father?",
|
||||
"user",
|
||||
)
|
||||
while True:
|
||||
msg = await agent(msg)
|
||||
msg = await user(msg)
|
||||
if msg.get_text_content() == "exit":
|
||||
break
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
74
functionality/rag/basic_usage.py
Normal file
74
functionality/rag/basic_usage.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""The main entry point of the RAG example."""
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from agentscope.embedding import DashScopeTextEmbedding
|
||||
from agentscope.rag import PDFReader, QdrantStore, SimpleKnowledge, TextReader
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""The main entry point of the RAG example."""
|
||||
|
||||
# Create readers with chunking arguments
|
||||
reader = TextReader(chunk_size=1024)
|
||||
pdf_reader = PDFReader(chunk_size=1024, split_by="sentence")
|
||||
|
||||
# Read documents
|
||||
documents = await reader(
|
||||
text="I'm Tony Stank, my password is 123456. My best friend is James "
|
||||
"Rhodes.",
|
||||
)
|
||||
|
||||
# Read a sample PDF file
|
||||
pdf_path = os.path.join(
|
||||
os.path.abspath(os.path.dirname(__file__)),
|
||||
"example.pdf",
|
||||
)
|
||||
pdf_documents = await pdf_reader(pdf_path=pdf_path)
|
||||
|
||||
# Create a knowledge base with Qdrant as the embedding store and
|
||||
# DashScope as the embedding model
|
||||
knowledge = SimpleKnowledge(
|
||||
embedding_store=QdrantStore(
|
||||
location=":memory:",
|
||||
collection_name="test_collection",
|
||||
dimensions=1024, # The dimension of the embedding vectors
|
||||
),
|
||||
embedding_model=DashScopeTextEmbedding(
|
||||
api_key=os.environ["DASHSCOPE_API_KEY"],
|
||||
model_name="text-embedding-v4",
|
||||
),
|
||||
)
|
||||
|
||||
# Insert documents into the knowledge base
|
||||
await knowledge.add_documents(documents + pdf_documents)
|
||||
|
||||
# Retrieve relevant documents based on a given query
|
||||
docs = await knowledge.retrieve(
|
||||
query="What is Tony Stank's password?",
|
||||
limit=3,
|
||||
score_threshold=0.7,
|
||||
)
|
||||
print("Q1: What is Tony Stank's password?")
|
||||
for doc in docs:
|
||||
print(
|
||||
f"Document ID: {doc.id}, Score: {doc.score}, "
|
||||
f"Content: {doc.metadata.content['text']}",
|
||||
)
|
||||
|
||||
# Retrieve documents from the PDF file based on a query
|
||||
docs = await knowledge.retrieve(
|
||||
query="climate change",
|
||||
limit=3,
|
||||
score_threshold=0.2,
|
||||
)
|
||||
print("\n\nQ2: climate change")
|
||||
for doc in docs:
|
||||
print(
|
||||
f"Document ID: {doc.id}, Score: {doc.score}, "
|
||||
f"Content: {repr(doc.metadata.content['text'])}",
|
||||
)
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
BIN
functionality/rag/example.pdf
Normal file
BIN
functionality/rag/example.pdf
Normal file
Binary file not shown.
70
functionality/rag/multimodal_rag.py
Normal file
70
functionality/rag/multimodal_rag.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""The example of how to use multimodal RAG in AgentScope"""
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
from agentscope.agent import ReActAgent
|
||||
from agentscope.embedding import DashScopeMultiModalEmbedding
|
||||
from agentscope.formatter import DashScopeChatFormatter
|
||||
from agentscope.message import Msg
|
||||
from agentscope.model import DashScopeChatModel
|
||||
from agentscope.rag import ImageReader, QdrantStore, SimpleKnowledge
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
path_image = "./example.png"
|
||||
plt.figure(figsize=(8, 3))
|
||||
plt.text(0.5, 0.5, "My name is Ming Li", ha="center", va="center", fontsize=30)
|
||||
plt.axis("off")
|
||||
plt.savefig(path_image, bbox_inches="tight", pad_inches=0.1)
|
||||
plt.close()
|
||||
|
||||
|
||||
async def example_multimodal_rag() -> None:
|
||||
"""Example for multimodal RAG"""
|
||||
# Reading the image and converting it to documents
|
||||
reader = ImageReader()
|
||||
docs = await reader(image_url=path_image)
|
||||
|
||||
# Create a knowledge base and add documents
|
||||
knowledge = SimpleKnowledge(
|
||||
embedding_model=DashScopeMultiModalEmbedding(
|
||||
api_key=os.environ["DASHSCOPE_API_KEY"],
|
||||
model_name="multimodal-embedding-v1",
|
||||
dimensions=1024,
|
||||
),
|
||||
embedding_store=QdrantStore(
|
||||
location=":memory:",
|
||||
collection_name="test_collection",
|
||||
dimensions=1024,
|
||||
),
|
||||
)
|
||||
|
||||
await knowledge.add_documents(docs)
|
||||
|
||||
agent = ReActAgent(
|
||||
name="Friday",
|
||||
sys_prompt="You're a helpful assistant named Friday.",
|
||||
model=DashScopeChatModel(
|
||||
api_key=os.environ["DASHSCOPE_API_KEY"],
|
||||
model_name="qwen3-vl-plus",
|
||||
),
|
||||
formatter=DashScopeChatFormatter(),
|
||||
knowledge=knowledge,
|
||||
)
|
||||
|
||||
await agent(
|
||||
Msg(
|
||||
"user",
|
||||
"Do you know my name?",
|
||||
"user",
|
||||
),
|
||||
)
|
||||
|
||||
# Let's see if the agent has stored the retrieved document in its memory
|
||||
print("\nThe retrieved document stored in the agent's memory:")
|
||||
content = (await agent.memory.get_memory())[-4].content
|
||||
print(json.dumps(content, indent=2, ensure_ascii=False))
|
||||
|
||||
|
||||
asyncio.run(example_multimodal_rag())
|
||||
78
functionality/rag/react_agent_integration.py
Normal file
78
functionality/rag/react_agent_integration.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""The example of integrating ReAct agent with RAG."""
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from agentscope.agent import ReActAgent, UserAgent
|
||||
from agentscope.embedding import DashScopeTextEmbedding
|
||||
from agentscope.formatter import DashScopeChatFormatter
|
||||
from agentscope.message import Msg
|
||||
from agentscope.model import DashScopeChatModel
|
||||
from agentscope.rag import QdrantStore, SimpleKnowledge, TextReader
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""The main entry point for the ReAct agent with RAG example."""
|
||||
|
||||
# Create an in-memory knowledge base instance
|
||||
print("Creating the knowledge base...")
|
||||
knowledge = SimpleKnowledge(
|
||||
embedding_store=QdrantStore(
|
||||
location=":memory:",
|
||||
collection_name="test_collection",
|
||||
dimensions=1024, # The dimension of the embedding vectors
|
||||
),
|
||||
embedding_model=DashScopeTextEmbedding(
|
||||
api_key=os.environ["DASHSCOPE_API_KEY"],
|
||||
model_name="text-embedding-v4",
|
||||
),
|
||||
)
|
||||
|
||||
# Insert some documents into the knowledge base
|
||||
# This could be done offline and only once
|
||||
print("Inserting documents into the knowledge base...")
|
||||
reader = TextReader(chunk_size=100, split_by="char")
|
||||
documents = await reader(
|
||||
# Fake personal profile for demonstration
|
||||
"I'm John Doe, 28 years old. My best friend is James "
|
||||
"Smith. I live in San Francisco. I work at OpenAI as a "
|
||||
"software engineer. I love hiking and photography. "
|
||||
"My father is Michael Doe, a doctor. I'm very proud of him. "
|
||||
"My mother is Sarah Doe, a teacher. She is very kind and "
|
||||
"always helps me with my studies.\n"
|
||||
"I'm now a PhD student at Stanford University, majoring in "
|
||||
"Computer Science. My advisor is Prof. Jane Williams, who is "
|
||||
"a leading expert in artificial intelligence. I have published "
|
||||
"several papers in top conferences, such as NeurIPS and ICML. ",
|
||||
)
|
||||
|
||||
print("Inserting documents into the knowledge base...")
|
||||
await knowledge.add_documents(documents)
|
||||
|
||||
# Integrate into the ReActAgent by the `knowledge` argument
|
||||
print("Creating the agent...")
|
||||
agent = ReActAgent(
|
||||
name="Friday",
|
||||
sys_prompt="You are a helpful assistant named Friday.",
|
||||
model=DashScopeChatModel(
|
||||
api_key=os.environ["DASHSCOPE_API_KEY"],
|
||||
model_name="qwen-max",
|
||||
),
|
||||
formatter=DashScopeChatFormatter(),
|
||||
# Equip the agent with the knowledge base
|
||||
knowledge=knowledge,
|
||||
print_hint_msg=True,
|
||||
)
|
||||
user = UserAgent(name="user")
|
||||
|
||||
# Start the conversation
|
||||
print("Start the conversation...")
|
||||
msg = Msg("user", "Do you know who is my best friend?", "user")
|
||||
while True:
|
||||
msg = await agent(msg)
|
||||
msg = await user(msg)
|
||||
if msg.get_text_content() == "exit":
|
||||
break
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
4
functionality/rag/requirements.txt
Normal file
4
functionality/rag/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
agentscope[full]>=1.0.5
|
||||
matplotlib >= 3.10.7
|
||||
nltk >= 3.9.2
|
||||
pypdf >= 6.1.1
|
||||
Reference in New Issue
Block a user