init

2025-10-17 21:40:45 +08:00
commit 7d0451131f
155 changed files with 14873 additions and 0 deletions
--- a/functionality/rag/basic_usage.py
+++ b/functionality/rag/basic_usage.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+"""The main entry point of the RAG example."""
+import asyncio
+import os
+
+from agentscope.embedding import DashScopeTextEmbedding
+from agentscope.rag import PDFReader, QdrantStore, SimpleKnowledge, TextReader
+
+
+async def main() -> None:
+    """The main entry point of the RAG example."""
+
+    # Create readers with chunking arguments
+    reader = TextReader(chunk_size=1024)
+    pdf_reader = PDFReader(chunk_size=1024, split_by="sentence")
+
+    # Read documents
+    documents = await reader(
+        text="I'm Tony Stank, my password is 123456. My best friend is James "
+        "Rhodes.",
+    )
+
+    # Read a sample PDF file
+    pdf_path = os.path.join(
+        os.path.abspath(os.path.dirname(__file__)),
+        "example.pdf",
+    )
+    pdf_documents = await pdf_reader(pdf_path=pdf_path)
+
+    # Create a knowledge base with Qdrant as the embedding store and
+    # DashScope as the embedding model
+    knowledge = SimpleKnowledge(
+        embedding_store=QdrantStore(
+            location=":memory:",
+            collection_name="test_collection",
+            dimensions=1024,  # The dimension of the embedding vectors
+        ),
+        embedding_model=DashScopeTextEmbedding(
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            model_name="text-embedding-v4",
+        ),
+    )
+
+    # Insert documents into the knowledge base
+    await knowledge.add_documents(documents + pdf_documents)
+
+    # Retrieve relevant documents based on a given query
+    docs = await knowledge.retrieve(
+        query="What is Tony Stank's password?",
+        limit=3,
+        score_threshold=0.7,
+    )
+    print("Q1: What is Tony Stank's password?")
+    for doc in docs:
+        print(
+            f"Document ID: {doc.id}, Score: {doc.score}, "
+            f"Content: {doc.metadata.content['text']}",
+        )
+
+    # Retrieve documents from the PDF file based on a query
+    docs = await knowledge.retrieve(
+        query="climate change",
+        limit=3,
+        score_threshold=0.2,
+    )
+    print("\n\nQ2: climate change")
+    for doc in docs:
+        print(
+            f"Document ID: {doc.id}, Score: {doc.score}, "
+            f"Content: {repr(doc.metadata.content['text'])}",
+        )
+
+
+asyncio.run(main())