init
This commit is contained in:
74
functionality/rag/basic_usage.py
Normal file
74
functionality/rag/basic_usage.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""The main entry point of the RAG example."""
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from agentscope.embedding import DashScopeTextEmbedding
|
||||
from agentscope.rag import PDFReader, QdrantStore, SimpleKnowledge, TextReader
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""The main entry point of the RAG example."""
|
||||
|
||||
# Create readers with chunking arguments
|
||||
reader = TextReader(chunk_size=1024)
|
||||
pdf_reader = PDFReader(chunk_size=1024, split_by="sentence")
|
||||
|
||||
# Read documents
|
||||
documents = await reader(
|
||||
text="I'm Tony Stank, my password is 123456. My best friend is James "
|
||||
"Rhodes.",
|
||||
)
|
||||
|
||||
# Read a sample PDF file
|
||||
pdf_path = os.path.join(
|
||||
os.path.abspath(os.path.dirname(__file__)),
|
||||
"example.pdf",
|
||||
)
|
||||
pdf_documents = await pdf_reader(pdf_path=pdf_path)
|
||||
|
||||
# Create a knowledge base with Qdrant as the embedding store and
|
||||
# DashScope as the embedding model
|
||||
knowledge = SimpleKnowledge(
|
||||
embedding_store=QdrantStore(
|
||||
location=":memory:",
|
||||
collection_name="test_collection",
|
||||
dimensions=1024, # The dimension of the embedding vectors
|
||||
),
|
||||
embedding_model=DashScopeTextEmbedding(
|
||||
api_key=os.environ["DASHSCOPE_API_KEY"],
|
||||
model_name="text-embedding-v4",
|
||||
),
|
||||
)
|
||||
|
||||
# Insert documents into the knowledge base
|
||||
await knowledge.add_documents(documents + pdf_documents)
|
||||
|
||||
# Retrieve relevant documents based on a given query
|
||||
docs = await knowledge.retrieve(
|
||||
query="What is Tony Stank's password?",
|
||||
limit=3,
|
||||
score_threshold=0.7,
|
||||
)
|
||||
print("Q1: What is Tony Stank's password?")
|
||||
for doc in docs:
|
||||
print(
|
||||
f"Document ID: {doc.id}, Score: {doc.score}, "
|
||||
f"Content: {doc.metadata.content['text']}",
|
||||
)
|
||||
|
||||
# Retrieve documents from the PDF file based on a query
|
||||
docs = await knowledge.retrieve(
|
||||
query="climate change",
|
||||
limit=3,
|
||||
score_threshold=0.2,
|
||||
)
|
||||
print("\n\nQ2: climate change")
|
||||
for doc in docs:
|
||||
print(
|
||||
f"Document ID: {doc.id}, Score: {doc.score}, "
|
||||
f"Content: {repr(doc.metadata.content['text'])}",
|
||||
)
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user