75 lines
2.2 KiB
Python
75 lines
2.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""The main entry point of the RAG example."""
|
|
import asyncio
|
|
import os
|
|
|
|
from agentscope.embedding import DashScopeTextEmbedding
|
|
from agentscope.rag import PDFReader, QdrantStore, SimpleKnowledge, TextReader
|
|
|
|
|
|
async def main() -> None:
|
|
"""The main entry point of the RAG example."""
|
|
|
|
# Create readers with chunking arguments
|
|
reader = TextReader(chunk_size=1024)
|
|
pdf_reader = PDFReader(chunk_size=1024, split_by="sentence")
|
|
|
|
# Read documents
|
|
documents = await reader(
|
|
text="I'm Tony Stank, my password is 123456. My best friend is James "
|
|
"Rhodes.",
|
|
)
|
|
|
|
# Read a sample PDF file
|
|
pdf_path = os.path.join(
|
|
os.path.abspath(os.path.dirname(__file__)),
|
|
"example.pdf",
|
|
)
|
|
pdf_documents = await pdf_reader(pdf_path=pdf_path)
|
|
|
|
# Create a knowledge base with Qdrant as the embedding store and
|
|
# DashScope as the embedding model
|
|
knowledge = SimpleKnowledge(
|
|
embedding_store=QdrantStore(
|
|
location=":memory:",
|
|
collection_name="test_collection",
|
|
dimensions=1024, # The dimension of the embedding vectors
|
|
),
|
|
embedding_model=DashScopeTextEmbedding(
|
|
api_key=os.environ["DASHSCOPE_API_KEY"],
|
|
model_name="text-embedding-v4",
|
|
),
|
|
)
|
|
|
|
# Insert documents into the knowledge base
|
|
await knowledge.add_documents(documents + pdf_documents)
|
|
|
|
# Retrieve relevant documents based on a given query
|
|
docs = await knowledge.retrieve(
|
|
query="What is Tony Stank's password?",
|
|
limit=3,
|
|
score_threshold=0.7,
|
|
)
|
|
print("Q1: What is Tony Stank's password?")
|
|
for doc in docs:
|
|
print(
|
|
f"Document ID: {doc.id}, Score: {doc.score}, "
|
|
f"Content: {doc.metadata.content['text']}",
|
|
)
|
|
|
|
# Retrieve documents from the PDF file based on a query
|
|
docs = await knowledge.retrieve(
|
|
query="climate change",
|
|
limit=3,
|
|
score_threshold=0.2,
|
|
)
|
|
print("\n\nQ2: climate change")
|
|
for doc in docs:
|
|
print(
|
|
f"Document ID: {doc.id}, Score: {doc.score}, "
|
|
f"Content: {repr(doc.metadata.content['text'])}",
|
|
)
|
|
|
|
|
|
asyncio.run(main())
|